RE: [PATCH net-next] cxgb4: replace ntoh{s,l} and hton{s,l} calls with the generic byteorder

2015-05-19 Thread Hariprasad S
Ignore this. Needs some correction. Using incorrect byte-ordering at 2 places. 
Will send a V2 for the same. 

Thanks,
Hari

From: Hariprasad Shenai [haripra...@chelsio.com]
Sent: Tuesday, May 19, 2015 3:43 PM
To: netdev@vger.kernel.org
Cc: da...@davemloft.net; Casey Leedom; Nirranjan Kirubaharan; Hariprasad S
Subject: [PATCH net-next] cxgb4: replace ntoh{s,l} and hton{s,l} calls with the 
generic byteorder

replace ntoh{s,l} and hton{s,l} calls with the generic byteorder in
cxgb4/t4_hw.c file

Signed-off-by: Hariprasad Shenai haripra...@chelsio.com
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 403 -
 1 file changed, 219 insertions(+), 184 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index c626252..e16f406 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -214,8 +214,8 @@ static void fw_asrt(struct adapter *adap, u32 mbox_addr)
get_mbox_rpl(adap, (__be64 *)asrt, sizeof(asrt) / 8, mbox_addr);
dev_alert(adap-pdev_dev,
  FW assertion at %.16s:%u, val0 %#x, val1 %#x\n,
- asrt.u.assert.filename_0_7, ntohl(asrt.u.assert.line),
- ntohl(asrt.u.assert.x), ntohl(asrt.u.assert.y));
+ asrt.u.assert.filename_0_7, be32_to_cpu(asrt.u.assert.line),
+ be32_to_cpu(asrt.u.assert.x), be32_to_cpu(asrt.u.assert.y));
 }

 static void dump_mbox(struct adapter *adap, int mbox, u32 data_reg)
@@ -378,7 +378,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, 
__be32 *data, u64 *ecc)
 #define MC_DATA(i) MC_BIST_STATUS_REG(mc_bist_status_rdata, i)

for (i = 15; i = 0; i--)
-   *data++ = htonl(t4_read_reg(adap, MC_DATA(i)));
+   *data++ = cpu_to_be32(t4_read_reg(adap, MC_DATA(i)));
if (ecc)
*ecc = t4_read_reg64(adap, MC_DATA(16));
 #undef MC_DATA
@@ -435,7 +435,7 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, 
__be32 *data, u64 *ecc)
 #define EDC_DATA(i) (EDC_BIST_STATUS_REG(edc_bist_status_rdata, i))

for (i = 15; i = 0; i--)
-   *data++ = htonl(t4_read_reg(adap, EDC_DATA(i)));
+   *data++ = cpu_to_be32(t4_read_reg(adap, EDC_DATA(i)));
if (ecc)
*ecc = t4_read_reg64(adap, EDC_DATA(16));
 #undef EDC_DATA
@@ -1618,7 +1618,7 @@ int t4_read_flash(struct adapter *adapter, unsigned int 
addr,
if (ret)
return ret;
if (byte_oriented)
-   *data = (__force __u32) (htonl(*data));
+   *data = (__force __u32)(cpu_to_be32(*data));
}
return 0;
 }
@@ -1979,7 +1979,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, 
unsigned int size)
FW image size not multiple of 512 bytes\n);
return -EINVAL;
}
-   if (ntohs(hdr-len512) * 512 != size) {
+   if ((unsigned int)be16_to_cpu(hdr-len512) * 512 != size) {
dev_err(adap-pdev_dev,
FW image size differs from size in FW header\n);
return -EINVAL;
@@ -1993,7 +1993,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, 
unsigned int size)
return -EINVAL;

for (csum = 0, i = 0; i  size / sizeof(csum); i++)
-   csum += ntohl(p[i]);
+   csum += be32_to_cpu(p[i]);

if (csum != 0x) {
dev_err(adap-pdev_dev,
@@ -2012,7 +2012,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, 
unsigned int size)
 * first page with a bad version.
 */
memcpy(first_page, fw_data, SF_PAGE_SIZE);
-   ((struct fw_hdr *)first_page)-fw_ver = htonl(0x);
+   ((struct fw_hdr *)first_page)-fw_ver = cpu_to_be32(0x);
ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page);
if (ret)
goto out;
@@ -2107,19 +2107,22 @@ int t4_link_start(struct adapter *adap, unsigned int 
mbox, unsigned int port,
fc |= FW_PORT_CAP_FC_TX;

memset(c, 0, sizeof(c));
-   c.op_to_portid = htonl(FW_CMD_OP_V(FW_PORT_CMD) | FW_CMD_REQUEST_F |
-  FW_CMD_EXEC_F | FW_PORT_CMD_PORTID_V(port));
-   c.action_to_len16 = htonl(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
- FW_LEN16(c));
+   c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
+FW_PORT_CMD_PORTID_V(port));
+   c.action_to_len16 =
+   cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
+   FW_LEN16(c));

if (!(lc-supported  FW_PORT_CAP_ANEG)) {
-   c.u.l1cfg.rcap = htonl((lc-supported  ADVERT_MASK) | fc);
+  

pull-request: mac80211 2015-05-19

2015-05-19 Thread Johannes Berg
Hi Dave,

And one more - for the current cycle (and stable as well).

Let me know if there's any problem.

johannes

The following changes since commit ff419b3f95ab7a97c5f72876b53f12a249dacc2a:

  mac80211: fix 90 kernel-doc warnings (2015-05-04 12:56:13 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git 
tags/mac80211-for-davem-2015-05-19

for you to fetch changes up to 47b4e1fc4972cc43a19121bc2608a60aef3bf216:

  mac80211: move WEP tailroom size check (2015-05-11 14:51:29 +0200)


This has just a single fix, for a WEP tailroom check
problem that leads to dropped frames.


Janusz Dziedzic (1):
  mac80211: move WEP tailroom size check

 net/mac80211/wep.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Experimental new bonding driver mode=batman

2015-05-19 Thread Michal Kubecek
On Tue, May 19, 2015 at 02:09:43AM -0400, Patrick Simmons wrote:
 
 I've written a new mode for the kernel bonding driver.  It's similar to
 the round-robin mode, but it keeps statistics on TCP resends so as to
 favor slave devices with more bandwidth when choosing where to send
 packets.  I've tested it on two laptops using WiFi/Ethernet and it seems
 to work okay, but it should be considered experimental.

A description of how is the mode supposed to work would be definitely
helpful.

Below are some comments from a quick look over the patch, I didn't think
too much about the logic of the operation.

 +struct batnode
 +{
 + struct batnode* next_batnode;
 + char slvname[IFNAMSIZ];
 + unsigned long jiffiestamp;
 + u32 tcp_seq_num;
 + u16 port_id;
 +};
 +
 +struct slave_congestion_node
 +{
 + struct slave_congestion_node* next_slave;
 + char this_slave[IFNAMSIZ];
 + u32 congestion_counter;
 +};

Referencing the slave by its name doesn't seem very efficient,
especially if you are going to do it in the xmit handler.

 +#define BATTABLE_SIZE 256
 +static struct batnode battable[BATTABLE_SIZE];
 +static struct slave_congestion_node* slavelist_head_ptr = NULL;

Why one global list rather than one list per bond? And why do you need
your own list in addition to already existing slave structures?

 +static struct slave_congestion_node* get_slave_congestion_node(char* slvname)
 +{
 + //printk(In get_slave_congestion_node: 0x%zx\n,the_slave);
 + struct slave_congestion_node* cur_slave = slavelist_head_ptr;
 + while(cur_slave)
 + {
 +  if(strcmp(cur_slave-this_slave,slvname)==0)
 +  {
 +   //printk(Found a match: 0x%zx\n,cur_slave);
 +   return cur_slave;
 +  }
 +
 +  if(!cur_slave-next_slave)
 +   break;
 +  cur_slave = cur_slave-next_slave;
 + } 
 +
 + //Create new slave node
 + if(!cur_slave) //special case head
 + {
 +  cur_slave = (struct slave_congestion_node*)(kmalloc(sizeof(struct 
 slave_congestion_node),GFP_ATOMIC));
 +  slavelist_head_ptr = cur_slave;
 + }
 + else
 + {
 +  cur_slave-next_slave = (struct 
 slave_congestion_node*)(kmalloc(sizeof(struct 
 slave_congestion_node),GFP_ATOMIC));
 +  cur_slave = cur_slave-next_slave;
 + }

You don't handle kmalloc() failure.

 +
 + //Initialize new slave node
 + cur_slave-next_slave = NULL;
 + strlcpy(cur_slave-this_slave,slvname,IFNAMSIZ);
 + cur_slave-congestion_counter = 1;
 + 
 + //printk(Created new congestion node: 0x%zx\n,cur_slave);
 + return cur_slave;
 +}
 +
 +static void slave_congestion_increment(char* the_slave)
 +{
 + get_slave_congestion_node(the_slave)-congestion_counter++;
 +}
 +
 +static void slave_congestion_decrement(char* the_slave)
 +{
 + get_slave_congestion_node(the_slave)-congestion_counter--;
 +}

Should these three really be in a header file?

 +static DEFINE_SPINLOCK(batlock);

A lot of effort has been done recently to get rid of per-bond spinlocks.
Adding a global one kind of goes against this work.

 @@ -1082,6 +1088,69 @@ static bool bond_should_deliver_exact_ma
   return false;
  }
  
 +static void batman_normalize_congestion(struct bonding* bond, int* 
 congestion)
 +{
 + static long unsigned int jiffiestamp = 0;
 + long unsigned int jiffie_temp = jiffies;
 + struct slave* slave;
 + struct list_head* i;
 + if(jiffie_temp!=jiffiestamp  jiffie_temp%1000==0) //every thousand 
 packets, normalize congestion

Looks more like every 1000 jiffies, not packets. And what if this
function doesn't get called in that one jiffy when jiffies is a multiple
of 1000? (Similar problem later in bond_xmit_batman().)

 +  printk(KERN_DEBUG batman: congestion normalization has 
 occurred.\n);

Use pr_debug() or netdev_dbg().

 +static int batman_timer_invoke(void* bond_)
 +{
 + struct bonding* bond = (struct bonding*)(bond_);
 + struct slave* slave;
 + struct list_head* i;
 +
 + while(!kthread_should_stop())
 + {
 +  unsigned long batflags;
 +  long unsigned int jiffiestamp = jiffies;
 +  
 +  msleep(10*1000);
 +  rcu_read_lock();
 +  spin_lock_irqsave(batlock,batflags);
 +  
 +  long unsigned int jiffie_temp = jiffies;
 +  if(!(jiffie_temp - jiffiestamp  5 * HZ))
 +   bond_for_each_slave_rcu(bond,slave,i)
 +   {
 +struct slave_congestion_node* congest_current = 
 get_slave_congestion_node(slave-dev-name);
 +if(congest_current-congestion_counter = 2)
 + congest_current-congestion_counter/=2;
 +   }
 +
 +  spin_unlock_irqrestore(batlock,batflags);
 +  rcu_read_unlock();
 + }
 +}

This seems to only run something periodically, why do you need a (per
bond) kthread instead of a simple timer?

 @@ 

Re: [Xen-devel] [RFC PATCH 12/13] xen-netfront: implement TX persistent grants

2015-05-19 Thread Joao Martins

On 18 May 2015, at 17:55, David Vrabel david.vra...@citrix.com wrote:
 On 12/05/15 18:18, Joao Martins wrote:
 Instead of grant/revoking the buffer related to the skb, it will use
 an already granted page and memcpy  to it. The grants will be mapped
 by xen-netback and reused overtime, but only unmapped when the vif
 disconnects, as opposed to every packet.
 
 This only happens if the backend supports persistent grants since it
 would, otherwise, introduce the overhead of a memcpy on top of the
 grant map.
 [...]
 --- a/drivers/net/xen-netfront.c
 +++ b/drivers/net/xen-netfront.c
 [...]
 @@ -1610,7 +1622,10 @@ static int xennet_init_queue(struct netfront_queue 
 *queue)
  for (i = 0; i  NET_TX_RING_SIZE; i++) {
  skb_entry_set_link(queue-tx_skbs[i], i+1);
  queue-grant_tx[i].ref = GRANT_INVALID_REF;
 -queue-grant_tx[i].page = NULL;
 +if (queue-info-feature_persistent)
 +queue-grant_tx[i].page = alloc_page(GFP_NOIO);
 
 Need to check for alloc failure here and unwind correctly?
Sorry, I overlooked this check. I will fix that.

 Why NOIO?
May be I am misusing NOIO where I meant __GFP_WAIT.
Tough given we are under rtnl_lock() perhaps GFP_ATOMIC should be used 
instead.--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] vhost: support upto 509 memory regions

2015-05-19 Thread Igor Mammedov
On Mon, 18 May 2015 19:22:34 +0300
Andrey Korolyov and...@xdel.ru wrote:

 On Wed, Feb 18, 2015 at 7:27 AM, Michael S. Tsirkin m...@redhat.com wrote:
  On Tue, Feb 17, 2015 at 04:53:45PM -0800, Eric Northup wrote:
  On Tue, Feb 17, 2015 at 4:32 AM, Michael S. Tsirkin m...@redhat.com 
  wrote:
   On Tue, Feb 17, 2015 at 11:59:48AM +0100, Paolo Bonzini wrote:
  
  
   On 17/02/2015 10:02, Michael S. Tsirkin wrote:
 Increasing VHOST_MEMORY_MAX_NREGIONS from 65 to 509
 to match KVM_USER_MEM_SLOTS fixes issue for vhost-net.

 Signed-off-by: Igor Mammedov imamm...@redhat.com
   
This scares me a bit: each region is 32byte, we are talking
a 16K allocation that userspace can trigger.
  
   What's bad with a 16K allocation?
  
   It fails when memory is fragmented.
  
How does kvm handle this issue?
  
   It doesn't.
  
   Paolo
  
   I'm guessing kvm doesn't do memory scans on data path,
   vhost does.
  
   qemu is just doing things that kernel didn't expect it to need.
  
   Instead, I suggest reducing number of GPA-HVA mappings:
  
   you have GPA 1,5,7
   map them at HVA 11,15,17
   then you can have 1 slot: 1-11
  
   To avoid libc reusing the memory holes, reserve them with MAP_NORESERVE
   or something like this.
 
  This works beautifully when host virtual address bits are more
  plentiful than guest physical address bits.  Not all architectures
  have that property, though.
 
  AFAIK this is pretty much a requirement for both kvm and vhost,
  as we require each guest page to also be mapped in qemu memory.
 
   We can discuss smarter lookup algorithms but I'd rather
   userspace didn't do things that we then have to
   work around in kernel.
  
  
   --
   MST
   --
   To unsubscribe from this list: send the line unsubscribe kvm in
   the body of a message to majord...@vger.kernel.org
   More majordomo info at  http://vger.kernel.org/majordomo-info.html
  --
  To unsubscribe from this list: send the line unsubscribe netdev in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
 
 Hello,
 
 any chance of getting the proposed patch in the mainline? Though it
 seems that most users will not suffer from relatively slot number
 ceiling (they can decrease slot 'granularity' for larger VMs and
 vice-versa), fine slot size, 256M or even 128M, with the large number
 of slots can be useful for a certain kind of tasks for an
 orchestration systems. I`ve made a backport series of all seemingly
 interesting memslot-related improvements to a 3.10 branch, is it worth
 to be tested with straighforward patch like one from above, with
 simulated fragmentation of allocations in host?

I'm almost done with approach suggested by Paolo,
i.e. replace linear search with faster/scalable lookup alg.
Hope to post it soon.

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] net: fix a double free issue for neighbour entry

2015-05-19 Thread Eric Dumazet
On Wed, 2015-05-20 at 09:25 +0800, Ying Xue wrote:
 Calling __ipv4_neigh_lookup_noref() inside rcu_read_lock_bh() can
 guarantee that its searched neighbour entry is not freed before RCU
 grace period, but it cannot ensure that its obtained neighbour will
 be freed shortly. Exactly saying, it cannot prevent neigh_destroy()
 from being executed on another context at the same time. For example,
 if ip_finish_output2() continues to deliver a SKB with a neighbour
 entry whose refcount is zero, neigh_add_timer() may be called in
 neigh_resolve_output() subsequently. As a result, neigh_add_timer()
 takes refcount on the neighbour that already had a refcount of zero.
 When the neighbour refcount is put before the timer's handler is
 exited, neigh_destroy() will be called again, meaning crash happens
 at the moment.
 
 To prevent the issue from occurring, we must check whether the refcount
 of a neighbour searched by __ipv4_neigh_lookup_noref() is decremented
 to zero or not. If it's zero, we should create a new one.
 
 However, as reading neigh's refcount is unsafe through atomic_read()
 like it doesn't imply any memory barrier and the cost is too expensive
 if we enforce a proper implicit or explicit memory barrier on it,
 another checking of identifying whether neigh's dead flag is set or not
 is involved into __neigh_event_send() to further prevent neigh_add_timer()
 from holding a neigh's refcount that already hit zero, thereby avoiding
 what the issue cannot absolutely happen.
 
 Reported-by: Joern Engel jo...@logfs.org
 Cc: Eric Dumazet eduma...@google.com
 Signed-off-by: Ying Xue ying@windriver.com
 ---
  v2:
   - As Eric pointed that identifying whether neigh's refcnt is zero
 through atomic_read() is unsafe, another condition checking of
 verifying neigh's dead flag is set is involved into
 __neigh_event_send() to further prevent neigh_add_timer() from
 holding a neigh's refcnt that already hit zero.
   - Now the patch is created based on net tree considering it's
 a very fatal issue.
 
  net/core/neighbour.c |3 +++
  net/ipv4/ip_output.c |2 +-
  2 files changed, 4 insertions(+), 1 deletion(-)
 
 diff --git a/net/core/neighbour.c b/net/core/neighbour.c
 index 3de6542..c7a675c 100644
 --- a/net/core/neighbour.c
 +++ b/net/core/neighbour.c
 @@ -958,6 +958,9 @@ int __neigh_event_send(struct neighbour *neigh, struct 
 sk_buff *skb)
   if (neigh-nud_state  (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
   goto out_unlock_bh;
  
 + if (neigh-dead)
 + goto out_unlock_bh;
 +
   if (!(neigh-nud_state  (NUD_STALE | NUD_INCOMPLETE))) {
   if (NEIGH_VAR(neigh-parms, MCAST_PROBES) +
   NEIGH_VAR(neigh-parms, APP_PROBES)) {
 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
 index c65b93a..5889774 100644
 --- a/net/ipv4/ip_output.c
 +++ b/net/ipv4/ip_output.c
 @@ -200,7 +200,7 @@ static inline int ip_finish_output2(struct sock *sk, 
 struct sk_buff *skb)
   rcu_read_lock_bh();
   nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)-daddr);
   neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
 - if (unlikely(!neigh))
 + if (unlikely(!neigh || !atomic_read(neigh-refcnt)))
   neigh = __neigh_create(arp_tbl, nexthop, dev, false);
   if (!IS_ERR(neigh)) {
   int res = dst_neigh_output(dst, neigh, skb);

Sorry, this atomic_read() makes no sense to me.

When rcu is used, this is perfectly fine to use an object which refcount
is 0. If you believe the opposite, then point me to relevant
Documentation/RCU/ file.

refcount should only protect the object itself, not the use of it by a
rcu reader. This has nothing to do with rcu but standard refcounting of
objects.

The only forbidden thing would be to try to take a reference on it with
atomic_inc() instead of the more careful atomic_inc_not_zero(), if the
caller needs to exit the rcu_read_lock() section safely (as explained in
Documentation/RCU/rcuref.txt around line 52)

So far, dst_neigh_output() will not try to take a refcnt.


By the time you check atomic_read() the result is meaningless if another
cpu decrements the refcount to 0.

So what is the point, trying to reduce a race window but not properly
remove it ?

I repeat again : using atomic_read() in a rcu lookup is absolutely
useless and is a clear sign you missed a fundamental issue. 

So now, we are back to the patch I initially sent, but you did not
address Julian Anastasov feedback.

Really I am not very happy...


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 net-next 0/4] rocker: transaction fixes

2015-05-19 Thread Simon Horman
Hi,

this series addresses what appear to be errors in the handling of
prepare and then commit transactions in the rocker driver.

In all cases the problem is that data structures visible outside of
the transaction are modified during the prepare phase.

In the case of the first two patches this results in the kernel reporting a
BUG. I have noted test-cases in the change logs.

The third patch is also a bug fix, as noted by  Toshiaki Makita,
however I have not been able to reliably reproduce the problem and
thus have not provided a test case.

The last patch is a correctness fix that does not fix a bug
that manifests as far as I can tell.


Changes: v2-v3
* rocker: do not make neighbour entry changes when preparing transactions
  - Correct inverted logic
  - Added ack from Scott Feldman

Changes: v1-v2
* rocker: do not make neighbour entry changes when preparing transactions
  - Revised changelog to reflect information from Toshiaki Makita
that there is a bug that can manifest
  - Update address and ttl regardless of the value of the transaction state
* All other patches
  - Added acks from Scott Feldman


Simon Horman (4):
  rocker: do not delete fdb entries in rocker_port_fdb_flush() when
preparing transactions
  rocker: do not modify fdb table in rocker_port_fdb() when preparing
transactions
  rocker: do not make neighbour entry changes when preparing
transactions
  rocker: make rocker_port_internal_vlan_id_{get,put}()
non-transactional

 drivers/net/ethernet/rocker/rocker.c | 46 +++-
 1 file changed, 24 insertions(+), 22 deletions(-)

-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: iptables: ensure number of counters is 0 in do_replace()

2015-05-19 Thread David Miller
From: Dave Jones da...@codemonkey.org.uk
Date: Tue, 19 May 2015 19:42:52 -0400

 What's the preferred format, 8 separate patches, or 1 all-in-one
 diff for all instances of this bug ?

Personally, I think you could do this in one patch.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Xen-devel] [RFC 12/23] xen: Extend page_to_mfn to take an offset in the page

2015-05-19 Thread Julien Grall
Hi David,

On 19/05/15 14:57, David Vrabel wrote:
 On 14/05/15 18:00, Julien Grall wrote:
 With 64KB page granularity support in Linux, a page will be split accross
 multiple MFN (Xen is using 4KB page granularity). Thoses MFNs may not be
 contiguous.

 With the offset in the page, the helper will be able to know which MFN
 the driver needs to retrieve.
 
 I think a gnttab_grant_foreign_access_ref()-like helper that takes a
 page would be better.

 You will probably want this helper able to return/fill a set of refs for
 64 KiB pages.

I will see what I can do.

Although, I think this patch is still valid to avoid wrong usage with
64KB page granularity by the caller.

The developer may think that MFN are contiguous which is not always true.

Regards,

-- 
Julien Grall
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 05/10] audit: log creation and deletion of namespace instances

2015-05-19 Thread Richard Guy Briggs
On 15/05/16, Paul Moore wrote:
 On Sat, May 16, 2015 at 10:46 AM, Eric W. Biederman
 ebied...@xmission.com wrote:
  Paul Moore p...@paul-moore.com writes:
  On Sat, May 16, 2015 at 5:46 AM, Daniel J Walsh dwa...@redhat.com wrote:
  On 05/15/2015 05:05 PM, Paul Moore wrote:
  On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
  On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs r...@redhat.com 
  wrote:
  On 15/05/14, Paul Moore wrote:
  * Look at our existing audit records to determine which records should
  have
  namespace and container ID tokens added.  We may only want to add the
  additional fields in the case where the namespace/container ID tokens 
  are
  not the init namespace.
  If we have a record that ties a set of namespace IDs with a container
  ID, then I expect we only need to list the containerID along with auid
  and sessionID.
  The problem here is that the kernel has no concept of a container, 
  and I
  don't think it makes any sense to add one just for audit.  Container 
  is a
  marketing term used by some userspace tools.
 
  I can imagine that both audit could benefit from a concept of a
  namespace *path* that understands nesting (e.g. root/2/5/1 or
  something along those lines).  Mapping these to containers belongs
  in userspace, I think.
  It might be helpful to climb up a few levels in this thread ...
 
  I think we all agree that containers are not a kernel construct.  I 
  further
  believe that the kernel has no business generating container IDs, those 
  should
  come from userspace and will likely be different depending on how you 
  define
  container.  However, what is less clear to me at this point is how the
  kernel should handle the setting, reporting, and general management of 
  this
  container ID token.
 
  Wouldn't the easiest thing be to just treat add a containerid to the
  process context like auid.
 
  I believe so.  At least that was the point I was trying to get across
  when I first jumped into this thread.
 
  It sounds nice but containers are not just a per process construct.
  Sometimes you might know anamespace but not which process instigated
  action to happen on that namespace.
 
 From an auditing perspective I'm not sure we will ever hit those
 cases; did you have a particular example in mind?

The example that immediately came to mind when I first read Eric's
comment was a packet coming in off a network in a particular network
namespace.  That could narrow it down to a subset of containers based on
which network namespace it inhabits, but since it isn't associated with
a particular task yet (other than a kernel thread) it will not be
possible to select the precise nsproxy, let alone the container.

 paul moore

- RGB

--
Richard Guy Briggs rbri...@redhat.com
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red 
Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 06/13] xen-netback: copy buffer on xenvif_start_xmit()

2015-05-19 Thread Wei Liu
On Tue, May 12, 2015 at 07:18:30PM +0200, Joao Martins wrote:
 By introducing persistent grants we speed up the RX thread with the
 decreased copy cost, that leads to a throughput decrease of 20%.
 It is observed that the rx_queue stays mostly at 10% of its capacity,
 as opposed to full capacity when using grant copy. And a finer measure
 with lock_stat (below with pkt_size 64, burst 1) shows much higher wait
 queue contention on queue-wq, which hints that the RX kthread is
 waits/wake_up more often, that is actually doing work.
 
 Without persistent grants:
 
 class namecon-bouncescontentions   waittime-min   waittime-max
 waittime-total   waittime-avgacq-bounces   acquisitions   holdtime-min
 holdtime-max holdtime-total   holdtime-avg
 --
 queue-wq:   792792   0.36  24.36
 1140.30   1.44   42081002671   0.00
 46.75  538164.02   0.54
 --
 queue-wq326  [8115949f] __wake_up+0x2f/0x80
 queue-wq410  [811592bf] finish_wait+0x4f/0xa0
 queue-wq 56  [811593eb] prepare_to_wait+0x2b/0xb0
 --
 queue-wq202  [811593eb] prepare_to_wait+0x2b/0xb0
 queue-wq467  [8115949f] __wake_up+0x2f/0x80
 queue-wq123  [811592bf] finish_wait+0x4f/0xa0
 
 With persistent grants:
 
 queue-wq:   61834  61836   0.32  30.12
 99710.27   1.61 2414001125308   0.00
 75.61 1106578.82   0.98
 --
 queue-wq 5079[8115949f] __wake_up+0x2f/0x80
 queue-wq56280[811592bf] finish_wait+0x4f/0xa0
 queue-wq  479[811593eb] prepare_to_wait+0x2b/0xb0
 --
 queue-wq 1005[811592bf] finish_wait+0x4f/0xa0
 queue-wq56761[8115949f] __wake_up+0x2f/0x80
 queue-wq 4072[811593eb] prepare_to_wait+0x2b/0xb0
 
 Also, with persistent grants, we don't require batching grant copy ops
 (besides the initial copy+map) which makes me believe that deferring
 the skb to the RX kthread just adds up unnecessary overhead (for this
 particular case). This patch proposes copying the buffer on
 xenvif_start_xmit(), which lets us both remove the contention on
 queue-wq and lock on rx_queue. Here, an alternative to
 xenvif_rx_action routine is added namely xenvif_rx_map() that maps
 and copies the buffer to the guest. This is only used when persistent
 grants are used, since it would otherwise mean an hypercall per
 packet.
 
 Improvements are up to a factor of 2.14 with a single queue getting us
 from 1.04 Mpps to 1.7 Mpps (burst 1, pkt_size 64) and 1.5 to 2.6 Mpps
 (burst 2, pkt_size 64) compared to using the kthread. Maximum with grant
 copy is 1.2 Mpps, irrespective of the burst. All of this, measured on
 an Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz.
 
 Signed-off-by: Joao Martins joao.mart...@neclab.eu
 ---
  drivers/net/xen-netback/common.h|  2 ++
  drivers/net/xen-netback/interface.c | 11 +---
  drivers/net/xen-netback/netback.c   | 52 
 +
  3 files changed, 51 insertions(+), 14 deletions(-)
 
 diff --git a/drivers/net/xen-netback/common.h 
 b/drivers/net/xen-netback/common.h
 index 23deb6a..f3ece12 100644
 --- a/drivers/net/xen-netback/common.h
 +++ b/drivers/net/xen-netback/common.h
 @@ -363,6 +363,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue);
  
  int xenvif_dealloc_kthread(void *data);
  
 +int xenvif_rx_map(struct xenvif_queue *queue, struct sk_buff *skb);
 +
  void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
  
  /* Determine whether the needed number of slots (req) are available,
 diff --git a/drivers/net/xen-netback/interface.c 
 b/drivers/net/xen-netback/interface.c
 index 1103568..dfe2b7b 100644
 --- a/drivers/net/xen-netback/interface.c
 +++ b/drivers/net/xen-netback/interface.c
 @@ -109,7 +109,8 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void 
 *dev_id)
  {
   struct xenvif_queue *queue = dev_id;
  
 - xenvif_kick_thread(queue);
 + if (!queue-vif-persistent_grants)
 + xenvif_kick_thread(queue);
  
   return IRQ_HANDLED;
  }
 @@ -168,8 +169,12 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct 
 net_device *dev)
   cb = XENVIF_RX_CB(skb);
   cb-expires = jiffies + vif-drain_timeout;
  
 - xenvif_rx_queue_tail(queue, skb);
 - xenvif_kick_thread(queue);
 + if (!queue-vif-persistent_grants) {
 + xenvif_rx_queue_tail(queue, skb);
 + xenvif_kick_thread(queue);
 + } else if (xenvif_rx_map(queue, skb)) {
 + return NETDEV_TX_BUSY;
 + }
  

We now have two different functions for guest RX, one is xenvif_rx_map,
the other is xenvif_rx_action. They look very similar. Can we only have
one?

   return 

Re: [RFC PATCH 02/13] xen-netback: xenbus feature persistent support

2015-05-19 Thread Wei Liu
On Tue, May 12, 2015 at 07:18:26PM +0200, Joao Martins wrote:
 Checks for feature-persistent that indicates persistent grants
 support. Adds max_persistent_grants module param that specifies the max
 number of persistent grants, which if set to zero disables persistent
 grants.
 
 Signed-off-by: Joao Martins joao.mart...@neclab.eu

This patch needs to be moved later. The feature needs to be implemented
first.

Also you need to patch netif.h to document this new feature. To do this,
you need to patch the master netif.h in Xen tree then sync the change to
Linux. Of course I don't mind if we discuss wording in the Linux
copy then you devise a patch for Xen.

Wei.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next V4 00/12] net/mlx5: ConnectX-4 100G Ethernet driver

2015-05-19 Thread David Miller
From: Amir Vadai am...@mellanox.com
Date: Tue, 19 May 2015 12:25:12 +0300

 On Sun, May 17, 2015 at 8:05 PM, David Miller da...@davemloft.net wrote:
 From: Amir Vadai am...@mellanox.com
 Date: Sun, 17 May 2015 16:02:11 +0300

 We didn't get a response yet regarding your comment about the irq
 renaming [3].

 Well then, please hold off on resubmissions of this series until you
 do get a response and that issue is firmly resolved.
 Hi,
 
 I don't mean to push you, I only want to understand what is expected
 from me and what are the next steps:
 How will the issue be resolved? Do you plan to answer my question [1]
 from last week, and just too busy right now or something like that?

I have not seen any response to me explaining why it's ok to change
the IRQ name strings in the context where this will occur.

Once you explain that, we can make forward progress, but only at that
point.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/2] Renesas Ethernet AVB driver proper

2015-05-19 Thread David Miller
From: Sergei Shtylyov sergei.shtyl...@cogentembedded.com
Date: Sun, 17 May 2015 23:52:42 +0300

 +/* Packet transmit function for Ethernet AVB */
 +static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device 
 *ndev)
 +{
 ...
 + spin_lock_irqsave(priv-lock, flags);
 + if (priv-cur_tx[q] - priv-dirty_tx[q]  priv-num_tx_ring[q]) {
 + if (!ravb_tx_free(ndev, q)) {
 + netif_warn(priv, tx_queued, ndev, TX FD exhausted.\n);
 + netif_stop_queue(ndev);
 + spin_unlock_irqrestore(priv-lock, flags);
 + return NETDEV_TX_BUSY;
 + }

This is not an acceptable way to manage your transmit queue state.

When your transmit queue fills up and you cannot accept the next
arbitrary packet into your TX queue, you must stop the queue
before returning from this function.

Therefore, only error conditions should cause NETDEV_TX_BUSY to
ever be returned from this routine.

I know what argument you are going to make to me in response to this
email.  You are going to tell me that you have these two queues, one
for tstamp packets and one for the rest, and therefore that model of
TX queue management can't be made to work.

That's not acceptable.

Instead, you should classify packets to the different queues using
-ndo_select_queue() and therefore have multiple TX queues, which you
can management the fullness of independantly.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Intel-wired-lan] [PATCH] pci: Limit VPD reads for all Intel Ethernet devices

2015-05-19 Thread Rustad, Mark D
 On May 19, 2015, at 10:50 AM, Alexander Duyck alexander.h.du...@redhat.com 
 wrote:
 
 These two patches are very much related.

They are only related because I saw an opportunity to do this while working on 
the other issue. That is the only relationship.

snip material on the other patch

 Artificially limiting the size of the VPD does nothing but cut off possibly 
 useful data, you would be better of providing all of the data on only the 
 first function than providing only partial data on all functions and adding 
 extra lock overhead.
 This limit only limits the maximum that the OS will read to what is 
 architecturally possible in these devices. Yes, PCIe architecturally 
 provides for the possibility of more, but these devices do not. More 
 repeating data can be read, however slowly, but there is no possibility of 
 useful content beyond the first 1K. If this limit were set to 0x100, which 
 is more in line what the actual usage is, it would be an artificial limit, 
 but at 1K it is not. Oh and it does include devices made by others that 
 incorporate Intel Ethernet silicon, not just Intel-built devices.
 
 As per section 3.4.4 of the X540 datasheet the upper addressable range for 
 the VPD section is 0xFFF which means the upper limit for the hardware is 
 0x1000, not 0x400.

Ok. I have no problem changing it to that. I had been looking at other specs, 
but 0x1000 really is a hard limit.

snip more material mostly relating to the other patch

--
Mark Rustad, Networking Division, Intel Corporation



signature.asc
Description: Message signed with OpenPGP using GPGMail


Re: [PATCH] net-next: ethtool: Added port speed macros.

2015-05-19 Thread David Miller
From: Ben Hutchings b...@decadent.org.uk
Date: Tue, 19 May 2015 18:00:35 +0100

 On Mon, 2015-05-18 at 11:46 -0400, David Miller wrote:
 From: Parav Pandit parav.pan...@avagotech.com
 Date: Mon, 18 May 2015 16:31:47 +0530
 
  Signed-off-by: Parav Pandit parav.pan...@avagotech.com
 
 I thought we had decided that we weren't going to keep adding
 convenience macros for new speeds, and were simply going to use
 the appropriate constants in the future.
 
 Ben?
 
 That's what I thought, but you accepted commit dcf972a334dd ethtool,
 net/mlx4_en: Add 100M, 20G, 56G speeds ethtool reporting support not so
 long ago.

Grumble... my bad.

Given that I should probably accept this patch set as well, hopefully
we won't get very many more of these any time soon.

Sorry for creating this mess.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net-next: ethtool: Added port speed macros.

2015-05-19 Thread David Miller
From: Parav Pandit parav.pan...@avagotech.com
Date: Tue, 19 May 2015 10:15:49 +0530

 On Mon, May 18, 2015 at 9:16 PM, David Miller da...@davemloft.net wrote:
 From: Parav Pandit parav.pan...@avagotech.com
 Date: Mon, 18 May 2015 16:31:47 +0530

 Signed-off-by: Parav Pandit parav.pan...@avagotech.com

 I thought we had decided that we weren't going to keep adding
 convenience macros for new speeds, and were simply going to use
 the appropriate constants in the future.

 
 I added them to accommodate any speed/PHY interface specific
 configuration to make use of it.
 If convenience macros are not needed, I will provide the code cleanup
 patch to remove existing convenience macros. With that we will have
 unified code for old and new constants.
 I will anyway let Ben comment on it.

You cannot remove the existing macros, because they are exported to
userspace and you'll break application compilation if you do that.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: pull-request: mac80211-next 2015-05-19

2015-05-19 Thread David Miller
From: Johannes Berg johan...@sipsolutions.net
Date: Tue, 19 May 2015 09:20:39 +0200

 Hi Dave,
 My last pull request introduced two bugs, and we found two more that
 were present earlier, so I have four bug fixes now.
 
 Please pull.

Pulled, thanks Johannes.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net v3 2/2] ipv6: fix ECMP route replacement

2015-05-19 Thread David Miller
From: Michal Kubecek mkube...@suse.cz
Date: Mon, 18 May 2015 20:54:00 +0200 (CEST)

 When replacing an IPv6 multipath route with ip route replace, i.e.
 NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first
 matching route without fixing its siblings, resulting in corrupted
 siblings linked list; removing one of the siblings can then end in an
 infinite loop.
 
 IPv6 ECMP implementation is a bit different from IPv4 so that route
 replacement cannot work in exactly the same way. This should be a
 reasonable approximation:
 
 1. If the new route is ECMP-able and there is a matching ECMP-able one
 already, replace it and all its siblings (if any).
 
 2. If the new route is ECMP-able and no matching ECMP-able route exists,
 replace first matching non-ECMP-able (if any) or just add the new one.
 
 3. If the new route is not ECMP-able, replace first matching
 non-ECMP-able route (if any) or add the new route.
 
 We also need to remove the NLM_F_REPLACE flag after replacing old
 route(s) by first nexthop of an ECMP route so that each subsequent
 nexthop does not replace previous one.
 
 Fixes: 51ebd3181572 (ipv6: add support of equal cost multipath (ECMP))
 Signed-off-by: Michal Kubecek mkube...@suse.cz

Can I get some reviews please from interested parties?

As far as I can tell, this faithfully implements the policy we decided
upon at the end of the previous thread for this patch.  But I could be
wrong :-)

Thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 3/4] rocker: do not make neighbour entry changes when preparing transactions

2015-05-19 Thread Simon Horman
On Tue, May 19, 2015 at 09:11:29AM -0700, Scott Feldman wrote:
 On Mon, May 18, 2015 at 11:24 PM, Simon Horman
 simon.hor...@netronome.com wrote:
  rocker_port_ipv4_nh() and in turn rocker_port_ipv4_neigh() may be
  be called with trans == SWITCHDEV_TRANS_PREPARE and then
  trans == SWITCHDEV_TRANS_COMMIT from switchdev_port_obj_set() via
  fib_table_insert().
 
  Although I have been unable to find a scenario where a bug manifests
  I do see some incorrect behaviour when adding a fib entry with a gateway.
 
  The first time that rocker_port_ipv4_nh() is called, with
  trans == SWITCHDEV_TRANS_PREPARE, _rocker_neigh_add()
  adds a new entry to the neigh table.
 
  And the second time  rocker_port_ipv4_nh() is called, with
  trans == SWITCHDEV_TRANS_COMMIT, that entry is found and
  _rocker_neigh_update() is called.
 
  Assuming the transaction runs to completion this appears to be harmless.
  I suspect it would be not correct if the transaction was aborted.
 
  This problem does not appear to affect deletion as my analysis is
  that deletion is always performed with trans == SWITCHDEV_TRANS_NONE.
 
  For completeness _rocker_neigh_{add,del,prepare} are updated not
  to manipulate fib table entries if trans == SWITCHDEV_TRANS_PREPARE.
 
  Fixes: c4f20321d968 (rocker: support prepare-commit transaction model)
  Signed-off-by: Simon Horman simon.hor...@netronome.com
  ---
   drivers/net/ethernet/rocker/rocker.c | 16 
   1 file changed, 12 insertions(+), 4 deletions(-)
 
 
 [cut]
 
  @@ -2928,8 +2933,11 @@ static void _rocker_neigh_del(struct rocker_port 
  *rocker_port,
   }
 
   static void _rocker_neigh_update(struct rocker_neigh_tbl_entry *entry,
  +enum switchdev_trans trans,
   u8 *eth_dst, bool ttl_check)
   {
  +   if (trans == SWITCHDEV_TRANS_PREPARE)
  +   return;
 
 On this one, let move the trans==PREPARE test to the else branch, in
 case someone down the line is dependent on eth_dst and ttl_check being
 copied over.
 
 Everything else in the patch looks good.

Thanks Scott, I'll adjust things as you suggest.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v3 for-next 05/13] IB/cm: Reference count ib_cm_ids

2015-05-19 Thread Hefty, Sean
 I find Haggai's argument compelling, it is a very small amount of code
 and data to add a sharing count, and a very large amount to duplicate
 the whole service id map into cma.c.

I get wanting to share the listen list, but we end up with this:

 drivers/infiniband/core/cm.c   | 129 +-

that's more code than what the ib_cm module uses to track listens, and the 
result is a solution that ends up being split in a weird fashion between the 
ib_cm, iw_cm (TBD), and rdma_cm.

I wonder if the existing ib_cm interface is even what we want.  Currently, the 
rdma_cm pushes the private data (i.e. IP address) comparison into the ib_cm.  
This is only used by the rdma_cm.  Should that instead be moved out of the 
ib_cm and handled in the rdma_cm?  And then update the ib_cm to support 
multiple listens on the same service id.

For example, the ib_cm could simply queue all listen requests on the same 
service id.  When a REQ arrives, it just calls each listener back until one 
'claims' the REQ.  The destruction of a listener could then be synced with the 
callback.  From what I can tell, the current proposal requires that the ib_cm 
user be prepared to receive a REQ callback for a listen that it has already 
destroyed.  If needed, a flag can be added to the ib_cm_listen to indicate if 
the service id is shared/exclusive.

- Sean 
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Default XPS settings in mlx4 driver

2015-05-19 Thread Tom Herbert
Hi Ido,

I'm looking at your patch net/mlx4_en: Configure the XPS queue
mapping on driver load. We're testing a 40 CPU system and it looks
like XPS is being configured by default with forty queues 0-39 where
each xps_cpus is (1  i). The problem is that this does not easily
align with RX queues and the TX completion interrupt for TX queue z
happens in interrupt for (z % num_rx_queues). So it looks like, the
default XPS doesn't respect the RX interrupt affinities, so we have TX
queues that are bound to a CPU on one numa node but have their TX
completions happen on another which is suboptimal. It would be nice if
the default XPS could take where the completion interrupt happens into
account somehow.

Looks like there's a few other drivers that are setting XPS, they
might have similar issue.

Thanks,
Tom
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: Default XPS settings in mlx4 driver

2015-05-19 Thread David Miller
From: Tom Herbert t...@herbertland.com
Date: Tue, 19 May 2015 13:38:35 -0700

 Looks like there's a few other drivers that are setting XPS, they
 might have similar issue.

We really need generic infrastructure for such configuration attempts
so that the default policy across all such drivers is consistent.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net/ipv6/udp: Fix ipv6 multicast socket filter regression

2015-05-19 Thread David Miller
From: Henning Rogge hro...@gmail.com
Date: Mon, 18 May 2015 21:08:49 +0200

 Commit 5cf3d46192fc (udp: Simplify__udp*_lib_mcast_deliver)
 simplified the filter for incoming IPv6 multicast but removed
 the check of the local socket address and the UDP destination
 address.
 
 This patch restores the filter to prevent sockets bound to a IPv6
 multicast IP to receive other UDP traffic link unicast.
 
 Signed-off-by: Henning Rogge hro...@gmail.com
 Fixes: 5cf3d46192fc (udp: Simplify__udp*_lib_mcast_deliver)

Applied and queued up for -stable, thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next v3] tcp: add rfc3168, section 6.1.1.1. fallback

2015-05-19 Thread David Miller
From: Daniel Borkmann dan...@iogearbox.net
Date: Tue, 19 May 2015 21:33:42 +0200

 This work as a follow-up of commit f7b3bec6f516 (net: allow setting ecn
 via routing table) and adds RFC3168 section 6.1.1.1. fallback for outgoing
 ECN connections. In other words, this work adds a retry with a non-ECN
 setup SYN packet, as suggested from the RFC on the first timeout:
 
   [...] A host that receives no reply to an ECN-setup SYN within the
   normal SYN retransmission timeout interval MAY resend the SYN and
   any subsequent SYN retransmissions with CWR and ECE cleared. [...]
 ...
 Reference: https://www.ietf.org/proceedings/92/slides/slides-92-iccrg-1.pdf
 Reference: https://www.ietf.org/proceedings/89/slides/slides-89-tsvarea-1.pdf
 Signed-off-by: Daniel Borkmann dan...@iogearbox.net
 Signed-off-by: Florian Westphal f...@strlen.de
 Signed-off-by: Mirja Kühlewind mirja.kuehlew...@tik.ee.ethz.ch
 Signed-off-by: Brian Trammell tramm...@tik.ee.ethz.ch
 Cc: Eric Dumazet eduma...@google.com
 Cc: Dave Täht dave.t...@gmail.com
 ---
  v2 - v3:
   - Very sorry. Typo happened in Dave's name since v1, getting it right
 this time, no bad intentions. ;)
  v1 - v2:
   - Added suggestion from Eric to let ecn_flags be cleared eventually in
 tcp_ecn_rcv_synack(), thanks!
   - Rest as is.

Applied, thanks everyone.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ip: remove unused function prototype

2015-05-19 Thread David Miller
From: Andy Zhou az...@nicira.com
Date: Tue, 19 May 2015 12:41:47 -0700

 ip_do_nat() function was removed prior to kernel 3.4. Remove the
 unnecessary function prototype as well.
 
 Reported-by: Florian Westphal f...@strlen.de
 Signed-off-by: Andy Zhou az...@nicira.com

Applied, thank you.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next] tcp: Return error instead of partial read for saved syn headers

2015-05-19 Thread David Miller
From: Eric B Munson emun...@akamai.com
Date: Mon, 18 May 2015 14:35:58 -0400

 Currently the getsockopt() requesting the cached contents of the syn
 packet headers will fail silently if the caller uses a buffer that is
 too small to contain the requested data.  Rather than fail silently and
 discard the headers, getsockopt() should return an error and report the
 required size to hold the data.
 
 Signed-off-by: Eric B Munson emun...@akamai.com

Applied, thanks Eric.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] be2net: make hwmon interface optional

2015-05-19 Thread David Miller
From: Arnd Bergmann a...@arndb.de
Date: Mon, 18 May 2015 23:06:45 +0200

 The hwmon interface in the be2net driver causes a link error when
 be2net is built-in while the hwmon subsystem is a loadable module:
 
 drivers/built-in.o: In function `be_probe':
 drivers/net/ethernet/emulex/benet/be_main.c:5761: undefined reference to 
 `devm_hwmon_device_register_with_groups'
 
 This adds a new Kconfig symbol, following the example of multiple
 other drivers that have the same problem. The new CONFIG_BE2NET_HWMON
 will not be available when (BE2NET=y  HWMON=m) to avoid this
 problem.
 
 We have to also mark be_hwmon_show_temp as 'static' to ensure the
 compiler can optimize out all the unused code.
 
 Signed-off-by: Arnd Bergmann a...@arndb.de
 Fixes: 29e9122b3a (be2net: Export board temperature using hwmon-sysfs 
 interface.)

Applied, thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net 1/2] tcp: only undo on partial ACKs in CA_Loss

2015-05-19 Thread David Miller
From: Yuchung Cheng ych...@google.com
Date: Mon, 18 May 2015 12:31:44 -0700

 Undo based on TCP timestamps should only happen on ACKs that advance
 SND_UNA, according to the Eifel algorithm in RFC 3522:
 
 Section 3.2:
 
   (4) If the value of the Timestamp Echo Reply field of the
   acceptable ACK's Timestamps option is smaller than the
   value of RetransmitTS, then proceed to step (5),
 
 Section Terminology:
We use the term 'acceptable ACK' as defined in [RFC793].  That is an
ACK that acknowledges previously unacknowledged data.
 
 This is because upon receiving an out-of-order packet, the receiver
 returns the last timestamp that advances RCV_NXT, not the current
 timestamp of the packet in the DUPACK. Without checking the flag,
 the DUPACK will cause tcp_packet_delayed() to return true and
 tcp_try_undo_loss() will revert cwnd reduction.
 
 Note that we check the condition in CA_Recovery already by only
 calling tcp_try_undo_partial() if FLAG_SND_UNA_ADVANCED is set or
 tcp_try_undo_recovery() if snd_una crosses high_seq.
 
 Signed-off-by: Yuchung Cheng ych...@google.com
 Signed-off-by: Neal Cardwell ncardw...@google.com

Applied.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net 2/2] tcp: don't over-send F-RTO probes

2015-05-19 Thread David Miller
From: Yuchung Cheng ych...@google.com
Date: Mon, 18 May 2015 12:31:45 -0700

 After sending the new data packets to probe (step 2), F-RTO may
 incorrectly send more probes if the next ACK advances SND_UNA and
 does not sack new packet. However F-RTO RFC 5682 probes at most
 once. This bug may cause sender to always send new data instead of
 repairing holes, inducing longer HoL blocking on the receiver for
 the application.
 
 Signed-off-by: Yuchung Cheng ych...@google.com
 Signed-off-by: Neal Cardwell ncardw...@google.com

Applied.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] irda: irda-usb: use msecs_to_jiffies for conversions

2015-05-19 Thread David Miller
From: Joe Perches j...@perches.com
Date: Tue, 19 May 2015 07:32:15 -0700

 On Tue, 2015-05-19 at 12:51 +0200, Nicholas Mc Guire wrote:
 Converting milliseconds to jiffies by val * HZ / 1000 is technically
 is not a clean solution as it does not handle all corner cases correctly.
 By changing the conversion to use msecs_to_jiffies(val) conversion is
 correct in all cases.
 
 in the current code: 
   mod_timer(self-rx_defer_timer, jiffies + (10 * HZ / 1000));
 for HZ  100 (e.g. CONFIG_HZ == 64|32 in alpha) this effectively results 
 in no delay at all.
 []
 diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
 []
 @@ -848,7 +848,9 @@ static void irda_usb_receive(struct urb *urb)
   * Jean II */
  self-rx_defer_timer.function = irda_usb_rx_defer_expired;
  self-rx_defer_timer.data = (unsigned long) urb;
 -mod_timer(self-rx_defer_timer, jiffies + (10 * HZ / 1000));
 +mod_timer(self-rx_defer_timer,
 +  jiffies + (msecs_to_jiffies(10)));
 
 The unnecessary () around msecs_to_jiffies could be removed.

Agreed.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: pull-request: mac80211 2015-05-19

2015-05-19 Thread David Miller
From: Johannes Berg johan...@sipsolutions.net
Date: Tue, 19 May 2015 09:22:34 +0200

 And one more - for the current cycle (and stable as well).
 
 Let me know if there's any problem.

Also pulled, thanks Johannes.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 for-next 05/12] IB/cm: Share listening CM IDs

2015-05-19 Thread Jason Gunthorpe
On Tue, May 19, 2015 at 12:35:45PM -0600, Jason Gunthorpe wrote:
 On Sun, May 17, 2015 at 08:51:01AM +0300, Haggai Eran wrote:
  @@ -212,6 +212,8 @@ struct cm_id_private {
  spinlock_t lock;/* Do not acquire inside cm.lock */
  struct completion comp;
  atomic_t refcount;
  +   /* Number of clients sharing this ib_cm_id. Only valid for listeners. */
  +   atomic_t sharecount;
 
 No need for this atomic, hold the lock
 
 The use of the atomic looks racy:
 
  +   if (!atomic_dec_and_test(cm_id_priv-sharecount)) {
  +   /* The id is still shared. */
  +   return;
  +   }
 
 Might race with this:
 
  +   if (atomic_inc_return(cm_id_priv-sharecount) == 1) {
  +   /* This ID is already being destroyed */
  +   atomic_dec(cm_id_priv-sharecount);
  +   goto new_id;
  +   }
  +
 
 Resulting in use-after-free of cm_id_priv-sharecount

Actually, there is something else odd here.. I mentioned the above
because there wasn't obvious ref'ing on the cm_id_priv. Looking closer
the cm.lock should prevent use-after-free, but there is still no ref.

The more I look at this, the more I think it is sketchy. Don't try and
merge sharecount and refcount together, after cm_find_listen is called
you have to increment the refcount before dropping cm.lock.

Decrement the refcount when destroying a shared listen.

I also don't see how the 'goto new_id' can work, if cm_find_listen
succeeds then __ib_cm_listen is guarenteed to fail.

Fix the locking to make that impossible, associate sharecount with the
cm.lock and, rework how cm_destroy_id grabs the cm_id_priv-lock spinlock:

case IB_CM_LISTEN:
spin_lock_irq(cm.lock);
if (cm_id_priv-sharecount != 0) {
 cm_id_prv-sharecount--;
 // paired with in in ib_cm_id_create_and_listen
 atomic_dec(cm_id_priv-refcount);
 spin_unlock_irq(cm.lock);
 return;
}
rb_erase(cm_id_priv-service_node, cm.listen_service_table);
spin_unlock_irq(cm.lock);

spin_lock_irq(cm_id_priv-lock);
cm_id-state = IB_CM_IDLE;
spin_unlock_irq(cm_id_priv-lock);
break;

Now that condition is eliminated, the unneeded atomic is gone, and
refcount still acts like a proper kref should.

Jason
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net v2] switchdev: don't abort hardware ipv4 fib offload on failure to program fib entry in hardware

2015-05-19 Thread David Miller
From: Andy Gospodarek go...@cumulusnetworks.com
Date: Tue, 19 May 2015 15:47:32 -0400

 Are you actually saying that if users complain loudly enough about
 the current behavior (not the change Roopa has proposed) that you
 would be open to considering a change the current behavior?

I am saying that we have a contract with users not to break existing
behavior.  Full stop.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv2 net-next 0/2] Remove dead code and replace byte-oder functions

2015-05-19 Thread David Miller
From: Hariprasad Shenai haripra...@chelsio.com
Date: Tue, 19 May 2015 18:20:42 +0530

 This series removes dead fn t4_read_edc and t4_read_mc, also replaces
 ntoh{s,l} and hton{s,l} calls with the generic byteorder.
 
 PATCH 2/2 was sent as a single PATCH, but had some byte-ordering issues
 in t4_read_edc and t4_read_mc function. Found that t4_read_edc and
 t4_read_mc is unused, so PATCH 1/2 is added to remove it.
 
 This patch series is created against net-next tree and includes
 patches on cxgb4 driver.
 
 We have included all the maintainers of respective drivers. Kindly review
 the change and let us know in case of any review comments.

Series applied, thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 0/4] sfc: add MCDI tracing

2015-05-19 Thread David Miller
From: Edward Cree ec...@solarflare.com
Date: Tue, 19 May 2015 18:33:55 +0100

 This patchset adds support for logging Management-Controller-to-Driver 
 Interface interactions between the sfc driver and a bound device, to aid in 
 debugging.
 Solarflare has a tool to decode the resulting traces and will look to 
 open-source this if there is any external interest, but the protocol is 
 already detailed in drivers/net/ethernet/sfc/mcdi_pcol.h.

Please properly format your email message body contents to ~80
columns.

Thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Kernel crash while using tc script

2015-05-19 Thread Daniel Borkmann

On 05/19/2015 10:11 PM, Vijay Subramanian wrote:

Hi,

It seems latest net-next kernel crashes while unloading modules.
Please see simple script below to reproduce the crash.

===

#!/bin/bash

while true; do

# modules will be loaded automatically

tc qdisc add dev eth1 root handle 1: prio

tc filter add dev eth1 parent 1: u32 match u32 0 0  flowid 1


tc qdisc del dev eth1 root

rmmod cls_u32

rmmod sch_prio


done

=

It seems there is some refcounting or locking issue issue. I am unable
to easily post the dump but sometimes it points to crashes in various
functions in prio_class_ops  (sch_prio.c), such as prio_walk(),
prio_dump_class etc. Probably, sch_prio call back functions are
invoked when they should not be.

I bisected this down to following commit:

commit 78fd1d0ab072d4d9b5f0b7c14a1516665170b565
Author: Thomas Graf tg...@suug.ch
Date:   Tue Oct 21 22:05:38 2014 +0200
netlink: Re-add locking to netlink_lookup() and seq walker

If there are suggestions for me to try or you need more info, let me know.


Hmm, seems rather like the synchronize_net() removal in netlink_release()
must have uncovered a bug elsewhere.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Intel-wired-lan] [PATCH] pci: Limit VPD reads for all Intel Ethernet devices

2015-05-19 Thread Rustad, Mark D
 On May 19, 2015, at 2:17 PM, Alexander Duyck alexander.du...@gmail.com 
 wrote:
 
 Any chance you could point me toward the software in question?  Just 
 wondering because it seems like what you are fixing with this is an 
 implementation issue in the application since you really shouldn't be 
 accessing areas outside the scope of the VPD data structure, and doing so is 
 undefined in terms of what happens if you do.

I don't have it, but if you dump VPD via sysfs you will see it comes out as 32k 
in size. The kernel just blindly provides access to the full 32K space provided 
by the spec. I'm sure that we agree that the kernel should not go parse it and 
find the actual size. If it is read via stdio, say fread, the read access would 
be whatever buffer size it chooses to use.

If you looked at the quirks, you might have noticed that Broadcom limited the 
VPD access for some devices for functional reasons. That is what gave me the 
idea for limiting access to what was possibly there. With the existing Intel 
Ethernet quirk, it seemed like a simple thing to do.

--
Mark Rustad, Networking Division, Intel Corporation



signature.asc
Description: Message signed with OpenPGP using GPGMail


Re: [PATCH] net-next: ethtool: Added port speed macros.

2015-05-19 Thread David Miller
From: David Miller da...@davemloft.net
Date: Tue, 19 May 2015 14:52:02 -0400 (EDT)

 Given that I should probably accept this patch set as well, hopefully
 we won't get very many more of these any time soon.

And that's what I've just done.

Applied to net-next, thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net v2] switchdev: don't abort hardware ipv4 fib offload on failure to program fib entry in hardware

2015-05-19 Thread David Miller
From: roopa ro...@cumulusnetworks.com
Date: Mon, 18 May 2015 22:58:54 -0700

 from where I see, with the limitations on these boxes, this requires
 every app, every `ip route` cmd running on the box to explicitly
 specify offload when running on this hardware.

Users know what they are doing, and will ask for a policy change.

The default should be what happens right now.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 4/4] rocker: make rocker_port_internal_vlan_id_{get,put}() non-transactional

2015-05-19 Thread Scott Feldman
On Mon, May 18, 2015 at 11:24 PM, Simon Horman
simon.hor...@netronome.com wrote:
 The motivation for this is that rocker_port_internal_vlan_id_{get,put} appear
 to only partially implement the transaction model: memory allocation
 and freeing is transactional, but hash and bitmap manipulation is not.

 The latter could be fixed, however, as it is not currently exercised
 due to trans always being SWITCHDEV_TRANS_NONE it seems cleaner
 to make rocker_port_internal_vlan_id_get non-transactional.

 Found by inspection.
 I do not believe that this change should have any run-time effect.

 Fixes: c4f20321d968 (rocker: support prepare-commit transaction model)
 Signed-off-by: Simon Horman simon.hor...@netronome.com

This works for now.  Once we switch over to using bridge's PVID, we
don't need any of this internal vlan stuff anyway, so eventually most
(all?) of this internal vlan code goes away.

Acked-by: Scott Feldman sfel...@gmail.com
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Intel-wired-lan] [PATCH] pci: Use a bus-global mutex to protect VPD operations

2015-05-19 Thread Alexander Duyck



On 05/18/2015 05:00 PM, Mark D Rustad wrote:

Some devices have a problem with concurrent VPD access to different
functions of the same physical device, so move the protecting mutex
from the pci_vpd structure to the pci_bus structure. There are a
number of reports on support sites for a variety of devices from
various vendors getting the vpd r/w failed message. This is likely
to at least fix some of them. Thanks to Shannon Nelson for helping
to come up with this approach.

Signed-off-by: Mark Rustad mark.d.rus...@intel.com
Acked-by: Shannon Nelson shannon.nel...@intel.com
Acked-by: Jeff Kirsher jeffrey.t.kirs...@intel.com


Instead of moving the mutex lock around you would be much better served 
by simply removing the duplicate VPD entries for a given device in a 
PCIe quirk.  Then you can save yourself the extra pain and effort of 
having to deal with serialized VPD accesses for a multifunction device.


The logic for the quirk should be fairly simple.
  1.  Scan for any other devices with VPD that share the same bus and 
device number.

  2.  If bdf is equal to us keep searching.
  3.  If bdf is less than our bdf we release our VPD area and set VPD 
pointer to NULL.


- Alex

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 4/4] sfc: Initialise MCDI buffers to 0 on declaration.

2015-05-19 Thread Edward Cree
From: Jon Cooper jcoo...@solarflare.com

Signed-off-by: Edward Cree ec...@solarflare.com
---
 drivers/net/ethernet/sfc/ef10.c | 24 +---
 drivers/net/ethernet/sfc/mcdi.c |  8 
 drivers/net/ethernet/sfc/mcdi.h |  8 +---
 drivers/net/ethernet/sfc/ptp.c  |  6 +++---
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 2e8e5de..ed0ec88 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -350,7 +350,7 @@ static int efx_ef10_probe_vf(struct efx_nic *efx 
__attribute__ ((unused)))
 
 static int efx_ef10_free_vis(struct efx_nic *efx)
 {
-   MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, 0);
+   MCDI_DECLARE_BUF_ERR(outbuf);
size_t outlen;
int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
outbuf, sizeof(outbuf), outlen);
@@ -421,7 +421,7 @@ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, 
unsigned int n)
 static int efx_ef10_link_piobufs(struct efx_nic *efx)
 {
struct efx_ef10_nic_data *nic_data = efx-nic_data;
-   MCDI_DECLARE_BUF(inbuf,
+   _MCDI_DECLARE_BUF(inbuf,
 max(MC_CMD_LINK_PIOBUF_IN_LEN,
 MC_CMD_UNLINK_PIOBUF_IN_LEN));
struct efx_channel *channel;
@@ -432,6 +432,8 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_OUT_LEN != 0);
BUILD_BUG_ON(MC_CMD_UNLINK_PIOBUF_OUT_LEN != 0);
 
+   memset(inbuf, 0, sizeof(inbuf));
+
/* Link a buffer to each VI in the write-combining mapping */
for (index = 0; index  nic_data-n_piobufs; ++index) {
MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_PIOBUF_HANDLE,
@@ -1315,17 +1317,17 @@ static void efx_ef10_tx_init(struct efx_tx_queue 
*tx_queue)
 {
MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
   EFX_BUF_SIZE));
-   MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_TXQ_OUT_LEN);
bool csum_offload = tx_queue-queue  EFX_TXQ_TYPE_OFFLOAD;
size_t entries = tx_queue-txd.buf.len / EFX_BUF_SIZE;
struct efx_channel *channel = tx_queue-channel;
struct efx_nic *efx = tx_queue-efx;
struct efx_ef10_nic_data *nic_data = efx-nic_data;
-   size_t inlen, outlen;
+   size_t inlen;
dma_addr_t dma_addr;
efx_qword_t *txd;
int rc;
int i;
+   BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
 
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue-ptr_mask + 1);
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel-channel);
@@ -1350,7 +1352,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue 
*tx_queue)
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
 
rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
- outbuf, sizeof(outbuf), outlen);
+ NULL, 0, NULL);
if (rc)
goto fail;
 
@@ -1383,7 +1385,7 @@ fail:
 static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue)
 {
MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
-   MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_TXQ_OUT_LEN);
+   MCDI_DECLARE_BUF_ERR(outbuf);
struct efx_nic *efx = tx_queue-efx;
size_t outlen;
int rc;
@@ -1690,15 +1692,15 @@ static void efx_ef10_rx_init(struct efx_rx_queue 
*rx_queue)
MCDI_DECLARE_BUF(inbuf,
 MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
EFX_BUF_SIZE));
-   MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_RXQ_OUT_LEN);
struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
size_t entries = rx_queue-rxd.buf.len / EFX_BUF_SIZE;
struct efx_nic *efx = rx_queue-efx;
struct efx_ef10_nic_data *nic_data = efx-nic_data;
-   size_t inlen, outlen;
+   size_t inlen;
dma_addr_t dma_addr;
int rc;
int i;
+   BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
 
rx_queue-scatter_n = 0;
rx_queue-scatter_len = 0;
@@ -1727,7 +1729,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue 
*rx_queue)
inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
 
rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
- outbuf, sizeof(outbuf), outlen);
+ NULL, 0, NULL);
if (rc)
netdev_WARN(efx-net_dev, failed to initialise RXQ %d\n,
efx_rx_queue_index(rx_queue));
@@ -1736,7 +1738,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue 
*rx_queue)
 static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue)
 {
MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
-   MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_RXQ_OUT_LEN);
+   MCDI_DECLARE_BUF_ERR(outbuf);
struct efx_nic *efx = rx_queue-efx;
size_t outlen;
int rc;
@@ -1898,7