Re: ipv6 sysctl

2017-03-02 Thread Ani Sinha
Hey netdev guys,

Any feedback on this? :-)

thanks
ani


On Tue, Feb 28, 2017 at 11:22 AM, Ani Sinha  wrote:
> Hi guys,
>
> Commit a79ca223e029 ('ipv6: fix bad free of addrconf_init_net')
> introduced in linux 3.9 tries to fix an issue involving free-ing
> statically allocated memory. Additionally, it subtly changes behavior
> of how certain ipv6 sysctl values are inherited from the default net
> namespace to the child namespaces.   Before a79ca223e029, the default
> namespace would directly modify the values in statically allocated
> struct ipv6_devconf for example and all child namespaces would inherit
> these values upon creation (their own private copy was initialized
> using the statically allocated ipv6_devconf). After this change, any
> sysctl value changes in default net namespace is not seen by any new
> child namespaces that are created afterwards. This is because all
> network namespaces, including the default namespace has it's own
> private copy of  struct ipv6_devconf which is initialized by certain
> fixed values. This is in contrast to what we have in ipv4 where child
> namespaces continues to inherit values from the default namespace upon
> creation.
>
> I see that there was a previous discussion here :
> https://patchwork.kernel.org/patch/4639391/
>
> Was the above inconsistency between ipv4 and ipv6 sysctl
> initialization intentional or was it an unintended effect of the above
> change ? It would be nice to have a symmetric behavior between ipv4
> and ipv6. Please share your thoughts on this.
>
> thanks,
> ani


[PATCH 1/1] rds: remove unnecessary returned value check

2017-03-02 Thread Zhu Yanjun
The function rds_trans_register always returns 0. As such, it is not
necessary to check the returned value.

Cc: Joe Jin 
Cc: Junxiao Bi 
Signed-off-by: Zhu Yanjun 
---
 net/rds/ib.c| 6 +-
 net/rds/rds.h   | 2 +-
 net/rds/tcp.c   | 6 +-
 net/rds/transport.c | 4 +---
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 0f557b2..7a64c8d 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -438,16 +438,12 @@ int rds_ib_init(void)
if (ret)
goto out_sysctl;
 
-   ret = rds_trans_register(_ib_transport);
-   if (ret)
-   goto out_recv;
+   rds_trans_register(_ib_transport);
 
rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
 
goto out;
 
-out_recv:
-   rds_ib_recv_exit();
 out_sysctl:
rds_ib_sysctl_exit();
 out_ibreg:
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 966d2ee..39518ef 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -903,7 +903,7 @@ void rds_connect_path_complete(struct rds_conn_path *conn, 
int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */
-int rds_trans_register(struct rds_transport *trans);
+void rds_trans_register(struct rds_transport *trans);
 void rds_trans_unregister(struct rds_transport *trans);
 struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
 void rds_trans_put(struct rds_transport *trans);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5438f67..a973d3b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -652,16 +652,12 @@ static int rds_tcp_init(void)
if (ret)
goto out_pernet;
 
-   ret = rds_trans_register(_tcp_transport);
-   if (ret)
-   goto out_recv;
+   rds_trans_register(_tcp_transport);
 
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
goto out;
 
-out_recv:
-   rds_tcp_recv_exit();
 out_pernet:
unregister_pernet_subsys(_tcp_net_ops);
 out_notifier:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 2ffd3e3..0b188dd 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -40,7 +40,7 @@
 static struct rds_transport *transports[RDS_TRANS_COUNT];
 static DECLARE_RWSEM(rds_trans_sem);
 
-int rds_trans_register(struct rds_transport *trans)
+void rds_trans_register(struct rds_transport *trans)
 {
BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
 
@@ -55,8 +55,6 @@ int rds_trans_register(struct rds_transport *trans)
}
 
up_write(_trans_sem);
-
-   return 0;
 }
 EXPORT_SYMBOL_GPL(rds_trans_register);
 
-- 
2.7.4



BUG due to "xen-netback: protect resource cleaning on XenBus disconnect"

2017-03-02 Thread Juergen Gross
With commits f16f1df65 and 9a6cdf52b we get in our Xen testing:

[  174.512861] switch: port 2(vif3.0) entered disabled state
[  174.522735] BUG: sleeping function called from invalid context at
/home/build/linux-linus/mm/vmalloc.c:1441
[  174.523451] in_atomic(): 1, irqs_disabled(): 0, pid: 28, name: xenwatch
[  174.524131] CPU: 1 PID: 28 Comm: xenwatch Tainted: GW
4.10.0upstream-11073-g4977ab6-dirty #1
[  174.524819] Hardware name: MSI MS-7680/H61M-P23 (MS-7680), BIOS V17.0
03/14/2011
[  174.525517] Call Trace:
[  174.526217]  show_stack+0x23/0x60
[  174.526899]  dump_stack+0x5b/0x88
[  174.527562]  ___might_sleep+0xde/0x130
[  174.528208]  __might_sleep+0x35/0xa0
[  174.528840]  ? _raw_spin_unlock_irqrestore+0x13/0x20
[  174.529463]  ? __wake_up+0x40/0x50
[  174.530089]  remove_vm_area+0x20/0x90
[  174.530724]  __vunmap+0x1d/0xc0
[  174.531346]  ? delete_object_full+0x13/0x20
[  174.531973]  vfree+0x40/0x80
[  174.532594]  set_backend_state+0x18a/0xa90
[  174.533221]  ? dwc_scan_descriptors+0x24d/0x430
[  174.533850]  ? kfree+0x5b/0xc0
[  174.534476]  ? xenbus_read+0x3d/0x50
[  174.535101]  ? xenbus_read+0x3d/0x50
[  174.535718]  ? xenbus_gather+0x31/0x90
[  174.536332]  ? ___might_sleep+0xf6/0x130
[  174.536945]  frontend_changed+0x6b/0xd0
[  174.537565]  xenbus_otherend_changed+0x7d/0x80
[  174.538185]  frontend_changed+0x12/0x20
[  174.538803]  xenwatch_thread+0x74/0x110
[  174.539417]  ? woken_wake_function+0x20/0x20
[  174.540049]  kthread+0xe5/0x120
[  174.540663]  ? xenbus_printf+0x50/0x50
[  174.541278]  ? __kthread_init_worker+0x40/0x40
[  174.541898]  ret_from_fork+0x21/0x2c
[  174.548635] switch: port 2(vif3.0) entered disabled state

I believe calling vfree() when holding a spin_lock isn't a good idea.

Boris, this is the dumpdata failure:
FAILURE 4.10.0upstream-11073-g4977ab6-dirty(x86_64)
4.10.0upstream-11073-g4977ab6-dirty(i386)\: 2017-03-02 (tst007)


Juergen


[PATCH net] sctp: change to save MSG_MORE flag into assoc

2017-03-02 Thread Xin Long
David Laight noticed the support for MSG_MORE with datamsg->force_day
didn't really work as we expected, as the first msg with MSG_MORE set
would always block the following chunks' dequeuing.

This Patch is to rewrite it by saving the MSG_MORE flag into assoc as
Divid Laight suggested.

asoc->force_delay is used to save MSG_MORE flag before a msg is sent.
Once this msg is queued, asoc->force_delay is set back to 0, so that
it will not affect other places flushing out queue.

asoc->force_delay works as a 'local param' here as the msg sending is
under protection of sock lock.  It would make sctp's MSG_MORE work as
tcp's.

Fixes: 4ea0c32f5f42 ("sctp: add support for MSG_MORE")
Signed-off-by: Xin Long 
---
 include/net/sctp/structs.h | 2 +-
 net/sctp/output.c  | 2 +-
 net/sctp/socket.c  | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a244db5..3378c02 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -497,7 +497,6 @@ struct sctp_datamsg {
/* Did the messenge fail to send? */
int send_error;
u8 send_failed:1,
-  force_delay:1,
   can_delay;   /* should this message be Nagle delayed */
 };
 
@@ -1876,6 +1875,7 @@ struct sctp_association {
 
__u8 need_ecne:1,   /* Need to send an ECNE Chunk? */
 temp:1,/* Is it a temporary association? */
+force_delay:1,
 prsctp_enable:1,
 reconf_enable:1;
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 85406d5..5f6acac 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -705,7 +705,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct 
sctp_packet *packet,
 */
 
if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) &&
-   !chunk->msg->force_delay)
+   !asoc->force_delay)
/* Nothing unacked */
return SCTP_XMIT_OK;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 465a9c8..d62cf9e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1964,7 +1964,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr 
*msg, size_t msg_len)
err = PTR_ERR(datamsg);
goto out_free;
}
-   datamsg->force_delay = !!(msg->msg_flags & MSG_MORE);
+   asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, >chunks, frag_list) {
@@ -1982,6 +1982,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr 
*msg, size_t msg_len)
 * breaks.
 */
err = sctp_primitive_SEND(net, asoc, datamsg);
+   asoc->force_delay = 0;
/* Did the lower layer accept the chunk? */
if (err) {
sctp_datamsg_free(datamsg);
-- 
2.1.0



[PATCH] can: m_can: support transmit frame in CAN FD format

2017-03-02 Thread Wenyou Yang
Add support to transmit the frame in the CAN FD format and with
the bit rate switching.

Tested on SAMA5D2 Xplained board.

Signed-off-by: Wenyou Yang 
---
The testing is based on
[RESEND PATCH 1/1] can: m_can: fix bitrate setup on latest silicon
http://lkml.iu.edu/hypermail/linux/kernel/1702.1/05347.html

 drivers/net/can/m_can/m_can.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 195f15edb32e..9ef9b337d25b 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -266,8 +266,12 @@ enum m_can_mram_cfg {
 
 /* Tx Buffer Element */
 /* R0 */
+#define TX_BUF_ESI BIT(31)
 #define TX_BUF_XTD BIT(30)
 #define TX_BUF_RTR BIT(29)
+#define TX_BUF_EFC BIT(23)
+#define TX_BUF_EDL BIT(21)
+#define TX_BUF_BRS BIT(20)
 
 /* address offset and element number for each FIFO/Buffer in the Message RAM */
 struct mram_cfg {
@@ -884,7 +888,7 @@ static void m_can_chip_config(struct net_device *dev)
}
 
if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
-   cccr |= CCCR_CME_CANFD_BRS << CCCR_CME_SHIFT;
+   cccr |= (CCCR_CME_CANFD_BRS | CCCR_CME_CANFD) << CCCR_CME_SHIFT;
 
m_can_write(priv, M_CAN_CCCR, cccr);
m_can_write(priv, M_CAN_TEST, test);
@@ -1047,6 +1051,7 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
struct canfd_frame *cf = (struct canfd_frame *)skb->data;
u32 id, cccr;
int i;
+   u32 dlc;
 
if (can_dropped_invalid_skb(dev, skb))
return NETDEV_TX_OK;
@@ -1065,7 +1070,6 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
 
/* message ram configuration */
m_can_fifo_write(priv, 0, M_CAN_FIFO_ID, id);
-   m_can_fifo_write(priv, 0, M_CAN_FIFO_DLC, can_len2dlc(cf->len) << 16);
 
for (i = 0; i < cf->len; i += 4)
m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(i / 4),
@@ -1073,20 +1077,29 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
 
can_put_echo_skb(skb, dev, 0);
 
+   dlc = can_len2dlc(cf->len) << 16;
+
if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
cccr = m_can_read(priv, M_CAN_CCCR);
cccr &= ~(CCCR_CMR_MASK << CCCR_CMR_SHIFT);
if (can_is_canfd_skb(skb)) {
-   if (cf->flags & CANFD_BRS)
+   dlc |= TX_BUF_EDL;
+   if (cf->flags & CANFD_ESI)
+   dlc |= TX_BUF_ESI;
+   if (cf->flags & CANFD_BRS) {
+   dlc |= TX_BUF_BRS;
cccr |= CCCR_CMR_CANFD_BRS << CCCR_CMR_SHIFT;
-   else
+   } else {
cccr |= CCCR_CMR_CANFD << CCCR_CMR_SHIFT;
+   }
} else {
cccr |= CCCR_CMR_CAN << CCCR_CMR_SHIFT;
}
m_can_write(priv, M_CAN_CCCR, cccr);
}
 
+   m_can_fifo_write(priv, 0, M_CAN_FIFO_DLC, dlc);
+
/* enable first TX buffer to start transfer  */
m_can_write(priv, M_CAN_TXBTIE, 0x1);
m_can_write(priv, M_CAN_TXBAR, 0x1);
-- 
2.11.0



Re: net/sctp: use-after-free in sctp_association_put

2017-03-02 Thread Xin Long
On Fri, Mar 3, 2017 at 3:21 AM, Dmitry Vyukov  wrote:
> On Thu, Mar 2, 2017 at 9:06 AM, Xin Long  wrote:
>> On Thu, Mar 2, 2017 at 3:18 AM, Dmitry Vyukov  wrote:
>>> Hello,
>>>
>>> I've got the following report while running syzkaller fuzzer on
>>> linux-next/8813198236a044b76e251dcae937b180dd527999:
>>>
>>> BUG: KASAN: use-after-free in sctp_association_destroy
>>> net/sctp/associola.c:416 [inline] at addr 8801c0fa415c
>>> BUG: KASAN: use-after-free in sctp_association_put+0x294/0x300
>>> net/sctp/associola.c:881 at addr 8801c0fa415c
>>> Read of size 1 by task syz-executor1/10956
>>> CPU: 1 PID: 10956 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170213 
>>> #1
>>> Hardware name: Google Google Compute Engine/Google Compute Engine,
>>> BIOS Google 01/01/2011
>>> Call Trace:
>>>  
>>>  __dump_stack lib/dump_stack.c:15 [inline]
>>>  dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
>>>  kasan_object_err+0x1c/0x70 mm/kasan/report.c:162
>>>  print_address_description mm/kasan/report.c:200 [inline]
>>>  kasan_report_error mm/kasan/report.c:289 [inline]
>>>  kasan_report.part.2+0x1e5/0x4b0 mm/kasan/report.c:311
>>>  kasan_report mm/kasan/report.c:329 [inline]
>>>  __asan_report_load1_noabort+0x29/0x30 mm/kasan/report.c:329
>>>  sctp_association_destroy net/sctp/associola.c:416 [inline]
>>>  sctp_association_put+0x294/0x300 net/sctp/associola.c:881
>>>  sctp_generate_timeout_event+0x115/0x360 net/sctp/sm_sideeffect.c:317
>>>  sctp_generate_t1_init_event+0x1a/0x20 net/sctp/sm_sideeffect.c:329
>>>  call_timer_fn+0x241/0x820 kernel/time/timer.c:1308
>>>  expire_timers kernel/time/timer.c:1348 [inline]
>>>  __run_timers+0x9e7/0xe90 kernel/time/timer.c:1642
>>>  run_timer_softirq+0x21/0x80 kernel/time/timer.c:1655
>>>  __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
>>>  invoke_softirq kernel/softirq.c:364 [inline]
>>>  irq_exit+0x1cc/0x200 kernel/softirq.c:405
>>>  exiting_irq arch/x86/include/asm/apic.h:658 [inline]
>>>  smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
>>>  apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
>>> RIP: 0010:arch_local_irq_enable arch/x86/include/asm/paravirt.h:788 [inline]
>>> RIP: 0010:__raw_spin_unlock_irq include/linux/spinlock_api_smp.h:168 
>>> [inline]
>>> RIP: 0010:_raw_spin_unlock_irq+0x56/0x70 kernel/locking/spinlock.c:199
>>> RSP: 0018:8801c280f178 EFLAGS: 0286 ORIG_RAX: ff10
>>> RAX: dc00 RBX: 8801dbf24a00 RCX: 0006
>>> RDX: 10a18d03 RSI: 8801d71c88e0 RDI: 850c6818
>>> RBP: 8801c280f180 R08: 0002 R09: 
>>> R10: 0006 R11:  R12: 8801c0f3a4c0
>>> R13: 110038501e38 R14: 8801d71c80c0 R15: 8801d71c80c0
>>>  
>>>  finish_lock_switch kernel/sched/sched.h:1248 [inline]
>>>  finish_task_switch+0x1c2/0x720 kernel/sched/core.c:2792
>>>  context_switch kernel/sched/core.c:2928 [inline]
>>>  __schedule+0x893/0x2290 kernel/sched/core.c:3468
>>>  preempt_schedule_common+0x35/0x60 kernel/sched/core.c:3579
>>>  _cond_resched+0x17/0x20 kernel/sched/core.c:4977
>>>  slab_pre_alloc_hook mm/slab.h:427 [inline]
>>>  slab_alloc mm/slab.c:3390 [inline]
>>>  __do_kmalloc mm/slab.c:3730 [inline]
>>>  __kmalloc_track_caller+0x26a/0x690 mm/slab.c:3747
>>>  kstrdup+0x39/0x70 mm/util.c:54
>>>  snd_timer_instance_new+0xfc/0x5d0 sound/core/timer.c:110
>>>  snd_timer_open+0x878/0x1740 sound/core/timer.c:290
>>>  snd_timer_user_tselect sound/core/timer.c:1621 [inline]
>>>  __snd_timer_user_ioctl sound/core/timer.c:1901 [inline]
>>>  snd_timer_user_ioctl+0x9b1/0x34a0 sound/core/timer.c:1931
>>>  vfs_ioctl fs/ioctl.c:43 [inline]
>>>  do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683
>>>  SYSC_ioctl fs/ioctl.c:698 [inline]
>>>  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689
>>>  entry_SYSCALL_64_fastpath+0x1f/0xc2
>>> RIP: 0033:0x44fb59
>>> RSP: 002b:7f0dc184db58 EFLAGS: 0212 ORIG_RAX: 0010
>>> RAX: ffda RBX: 40345410 RCX: 0044fb59
>>> RDX: 20001000 RSI: 40345410 RDI: 0005
>>> RBP: 0005 R08:  R09: 
>>> R10:  R11: 0212 R12: 00708000
>>> R13: 00a5fc57 R14: 7f0dc184e9c0 R15: 
>>> Object at 8801c0fa4140, in cache kmalloc-4096 size: 4096
>>> Allocated:
>>> PID = 10965
>>>  save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
>>>  save_stack+0x43/0xd0 mm/kasan/kasan.c:504
>>>  set_track mm/kasan/kasan.c:516 [inline]
>>>  kasan_kmalloc+0xaa/0xd0 mm/kasan/kasan.c:607
>>>  kmem_cache_alloc_trace+0x10b/0x670 mm/slab.c:3634
>>>  kmalloc include/linux/slab.h:490 [inline]
>>>  kzalloc include/linux/slab.h:663 [inline]
>>>  sctp_association_new+0x114/0x2120 net/sctp/associola.c:306
>>>  sctp_sendmsg+0x1585/0x38f0 net/sctp/socket.c:1835
>>>  inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
>>>  sock_sendmsg_nosec 

[PATCH net 0/2] nfp: RX and XDP buffer fixes

2017-03-02 Thread Jakub Kicinski
Hi!

Two trivial fixes for code introduced with XDP support.  First
one corrects the buffer size we populate a register with.  The
register is designed to be used for scatter transfers which 
the driver (and most FWs) don't support so it's not critical.
The other one for DMA direction is mostly cosmetic, DMA API
doesn't seem to care today about the precise direction in sync
calls.

Jakub Kicinski (2):
  nfp: don't tell FW about the reserved buffer space
  nfp: correct DMA direction in XDP DMA sync

 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

-- 
2.11.0



Re: [PATCH net] tcp: fix potential double free issue for fastopen_req

2017-03-02 Thread David Miller
From: Wei Wang 
Date: Wed,  1 Mar 2017 13:29:48 -0800

> From: Wei Wang 
> 
> tp->fastopen_req could potentially be double freed if a malicious
> user does the following:
> 1. Enable TCP_FASTOPEN_CONNECT sockopt and do a connect() on the socket.
> 2. Call connect() with AF_UNSPEC to disconnect the socket.
> 3. Make this socket a listening socket by calling listen().
> 4. Accept incoming connections and generate child sockets. All child
>sockets will get a copy of the pointer of fastopen_req.
> 5. Call close() on all sockets. fastopen_req will get freed multiple
>times.
> 
> Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
> Reported-by: Andrey Konovalov 
> Signed-off-by: Wei Wang 
> Signed-off-by: Eric Dumazet 

Applied, and queued up for -stable.


Re: [PATCH net] rxrpc: Fix potential NULL-pointer exception

2017-03-02 Thread David Howells
David Howells  wrote:

> Fix a potential NULL-pointer exception in rxrpc_do_sendmsg().  The call
> state check that I added should have gone into the else-body of the
> if-statement where we actually have a call to check.
> 
> Found by CoverityScan CID#1414316 ("Dereference after null check").
> 
> Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and 
> sendmsg/recvmsg")
> Reported-by: Colin Ian King 
> Signed-off-by: David Howells 

Please ignore this - there's another patch interposed that I haven't sent
upstream yet.  Will rebase on net/master.

David


[net 0/2][pull request] Intel Wired LAN Driver Updates 2017-03-02

2017-03-02 Thread Jeff Kirsher
This series contains fixes to ixgbe only.

Paolo fixes the driver so that you can actually update the RSS key value
via ethtool.

Alex fixes an issue on architectures that have a cache line size larger
than 64 Bytes, where the amount of headroom for the frame starts
shrinking.  To take this into account, Alex adds one small check so that
we compare the max_frame to the amount of actual data we can store, so
we will automatically enable 3K receive buffers as soon as the maximum
frame size we can handle drops below the standard Ethernet MTU.

The following are changes since commit 9f674e48c13dcbc31ac903433727837795b81efe:
  xen-netback: Use GFP_ATOMIC to allocate hash
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue 10GbE

Alexander Duyck (1):
  ixgbe: Limit use of 2K buffers on architectures with 256B or larger
cache lines

Paolo Abeni (1):
  ixgbe: update the rss key on h/w, when ethtool ask for it

 drivers/net/ethernet/intel/ixgbe/ixgbe.h |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |  4 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c| 22 ++
 3 files changed, 23 insertions(+), 6 deletions(-)

-- 
2.12.0



[net 2/2] ixgbe: Limit use of 2K buffers on architectures with 256B or larger cache lines

2017-03-02 Thread Jeff Kirsher
From: Alexander Duyck 

On architectures that have a cache line size larger than 64 Bytes we start
running into issues where the amount of headroom for the frame starts
shrinking.

The size of skb_shared_info on a system with a 64B L1 cache line size is
320.  This increases to 384 with a 128B cache line, and 512 with a 256B
cache line.

In addition the NET_SKB_PAD value increases as well consistent with the
cache line size.  As a result when we get to a 256B cache line as seen on
the s390 we end up 768 bytes used by padding and shared info leaving us
with only 1280 bytes to use for data storage.  On architectures such as
this we should default to using 3K Rx buffers out of a 8K page instead of
trying to do 1.5K buffers out of a 4K page.

To take all of this into account I have added one small check so that we
compare the max_frame to the amount of actual data we can store.  This was
already occurring for igb, but I had overlooked it for ixgbe as it doesn't
have strict limits for 82599 once we enable jumbo frames.  By adding this
check we will automatically enable 3K Rx buffers as soon as the maximum
frame size we can handle drops below the standard Ethernet MTU.

I also went through and fixed one small typo that I found where I had left
an IGB in a variable name due to a copy/paste error.

Signed-off-by: Alexander Duyck 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h  | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 7a951b116821..b1ecc2627a5a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -96,7 +96,7 @@
 #define IXGBE_MAX_FRAME_BUILD_SKB \
(SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K) - IXGBE_SKB_PAD)
 #else
-#define IGB_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K
+#define IXGBE_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K
 #endif
 
 /*
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 67ab13fd163c..a7a430a7be2c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3972,7 +3972,8 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter 
*adapter)
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
set_bit(__IXGBE_RX_3K_BUFFER, _ring->state);
 
-   if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))
+   if ((max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) ||
+   (max_frame > IXGBE_MAX_FRAME_BUILD_SKB))
set_bit(__IXGBE_RX_3K_BUFFER, _ring->state);
 #endif
}
-- 
2.12.0



[net 1/2] ixgbe: update the rss key on h/w, when ethtool ask for it

2017-03-02 Thread Jeff Kirsher
From: Paolo Abeni 

Currently ixgbe_set_rxfh() updates the rss_key copy in the driver
memory, but does not push the new value into the h/w. This commit
add a new helper for the latter operation and call it in
ixgbe_set_rxfh(), so that the h/w rss key value can be really
updated via ethtool.

Signed-off-by: Paolo Abeni 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |  4 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c| 19 ---
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index a2cc43d2..7a951b116821 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -929,6 +929,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
  struct ixgbe_adapter *adapter,
  struct ixgbe_ring *tx_ring);
 u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
+void ixgbe_store_key(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
   u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a7574c7b12af..90fa5bf23d1b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2998,8 +2998,10 @@ static int ixgbe_set_rxfh(struct net_device *netdev, 
const u32 *indir,
}
 
/* Fill out the rss hash key */
-   if (key)
+   if (key) {
memcpy(adapter->rss_key, key, ixgbe_get_rxfh_key_size(netdev));
+   ixgbe_store_key(adapter);
+   }
 
ixgbe_store_reta(adapter);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 060cdce8058f..67ab13fd163c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3474,6 +3474,21 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter 
*adapter)
 }
 
 /**
+ * ixgbe_store_key - Write the RSS key to HW
+ * @adapter: device handle
+ *
+ * Write the RSS key stored in adapter.rss_key to HW.
+ */
+void ixgbe_store_key(struct ixgbe_adapter *adapter)
+{
+   struct ixgbe_hw *hw = >hw;
+   int i;
+
+   for (i = 0; i < 10; i++)
+   IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+}
+
+/**
  * ixgbe_store_reta - Write the RETA table to HW
  * @adapter: device handle
  *
@@ -3538,7 +3553,6 @@ static void ixgbe_store_vfreta(struct ixgbe_adapter 
*adapter)
 
 static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
 {
-   struct ixgbe_hw *hw = >hw;
u32 i, j;
u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
@@ -3551,8 +3565,7 @@ static void ixgbe_setup_reta(struct ixgbe_adapter 
*adapter)
rss_i = 4;
 
/* Fill out hash function seeds */
-   for (i = 0; i < 10; i++)
-   IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+   ixgbe_store_key(adapter);
 
/* Fill out redirection table */
memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));
-- 
2.12.0



Re: [PATCH net] xen-netback: Use GFP_ATOMIC to allocate hash

2017-03-02 Thread David Miller
From: Anoob Soman 
Date: Thu, 2 Mar 2017 10:50:20 +

> Allocation of new_hash, inside xenvif_new_hash(), always happen
> in softirq context, so use GFP_ATOMIC instead of GFP_KERNEL for new
> hash allocation.
> 
> Signed-off-by: Anoob Soman 

Applied.


Re: [Patch net] bonding: use ETH_MAX_MTU as max mtu

2017-03-02 Thread David Miller
From: Cong Wang 
Date: Thu,  2 Mar 2017 12:24:36 -0800

> This restores the ability of setting bond device's mtu to 9000.
> 
> Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
> Reported-by: daz...@gmail.com
> Reported-by: Brad Campbell 
> Cc: Jarod Wilson 
> Signed-off-by: Cong Wang 

Applied and queued up for -stable, thanks.


Re: [PATCH-v4-RESEND 1/4] vsock: track pkt owner vsock

2017-03-02 Thread Peng Tao
On Fri, Mar 3, 2017 at 5:13 AM, David Miller  wrote:
> From: Peng Tao 
> Date: Wed,  1 Mar 2017 11:56:24 +0800
>
>> So that we can cancel a queued pkt later if necessary.
>>
>> Reviewed-by: Stefan Hajnoczi 
>> Signed-off-by: Peng Tao 
>> ---
>>  include/linux/virtio_vsock.h| 2 ++
>>  net/vmw_vsock/virtio_transport_common.c | 7 +++
>>  2 files changed, 9 insertions(+)
>>
>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>> index 9638bfe..193ad3a 100644
>> --- a/include/linux/virtio_vsock.h
>> +++ b/include/linux/virtio_vsock.h
>> @@ -48,6 +48,7 @@ struct virtio_vsock_pkt {
>>   struct virtio_vsock_hdr hdr;
>>   struct work_struct work;
>>   struct list_head list;
>> + void *cancel_token; /* only used for cancellation */
>
> The type here is fixed, you only store vhost_sock object pointers
> here, so don't use "void *" please.
It used to be "struct vhost_sock *" but no refcount is held. Stefan
suggested to use "void *cancel_token" to make the code harder to
misuse.

Quoting Stefan:
"This field is just an opaque token used for cancellation rather than
a struct vsock_sock pointer that we are allowed to dereference.  You
could change this field to void *cancel_token to make the code harder
to misuse."

Ref:
https://www.mail-archive.com/netdev@vger.kernel.org/msg142550.html

Cheers,
Tao


Re: [PATCH v5 06/10] seccomp,landlock: Handle Landlock events per process hierarchy

2017-03-02 Thread Andy Lutomirski
On Thu, Mar 2, 2017 at 4:48 PM, Mickaël Salaün  wrote:
>
> On 02/03/2017 17:36, Andy Lutomirski wrote:
>> On Wed, Mar 1, 2017 at 3:28 PM, Mickaël Salaün  wrote:
>>>
>>>
>>> On 01/03/2017 23:20, Andy Lutomirski wrote:
 On Wed, Mar 1, 2017 at 2:14 PM, Mickaël Salaün  wrote:
>
> On 28/02/2017 21:01, Andy Lutomirski wrote:
>> On Tue, Feb 21, 2017 at 5:26 PM, Mickaël Salaün  wrote:
> This design makes it possible for a process to add more constraints to
> its children on the fly. I think it is a good feature to have and a
> safer default inheritance mechanism, but it could be guarded by an
> option flag if we want both mechanism to be available. The same design
> could be used by seccomp filter too.
>

 Then let's do it right.

 Currently each task has an array of seccomp filter layers.  When a
 task forks, the child inherits the layers.  All the layers are
 presently immutable.  With Landlock, a layer can logically be a
 syscall fitler layer or a Landlock layer.  This fits in to the
 existing model just fine.

 If we want to have an interface to allow modification of an existing
 layer, let's make it so that, when a layer is added, you have to
 specify a flag to make the layer modifiable (by current, presumably,
 although I can imagine other policies down the road).  Then have a
 separate API that modifies a layer.

 IOW, I think your patch is bad for three reasons, all fixable:

 1. The default is wrong.  A layer should be immutable to avoid an easy
 attack in which you try to sandbox *yourself* and then you just modify
 the layer to weaken it.
>>>
>>> This is not possible, there is only an operation for now:
>>> SECCOMP_ADD_LANDLOCK_RULE. You can only add more rules to the list (as
>>> for seccomp filter). There is no way to weaken a sandbox. The question
>>> is: how do we want to handle the rules *tree* (from the kernel point of
>>> view)?
>>>
>>
>> Fair enough.  But I still think that immutability (like regular
>> seccomp) should be the default.  For security, simplicity is
>> important.  I guess there could be two ways to relax immutability:
>> allowing making the layer stricter and allowing any change at all.
>>
>> As a default, though, programs should be able to expect that:
>>
>> seccomp(SECCOMP_ADD_WHATEVER, ...);
>> fork();
>>
>> [parent gets compromised]
>> [in parent]seccomp(anything whatsoever);
>>
>> will not affect the child,  If the parent wants to relax that, that's
>> fine, but I think it should be explicit.
>
> Good point. However the term "immutability" doesn't fit right because
> the process is still allowed to add more rules to itself (as for
> seccomp). The difference lays in the way a rule may be "appended" (by
> the current process) or "inserted" (by a parent process).
>
> I think three or four kind of operations (through the seccomp syscall)
> make sense:
> * append a rule (for the current process and its future children)

Sure, but this operation should *never* affect existing children,
existing seccomp layers, existing nodes, etc.  It should affect
current and future children only.  Or it could simply not exist for
Landlock and instead you'd have to add a layer (see below) and then
program that layer.

> * add a node (insert point), from which the inserted rules will be tied
> * insert a rule in the node, which will be inherited by futures children

I would advocate calling this a "seccomp layer" and making creation
and manipulation of them generic.

> * (maybe a "lock" command to make a layer immutable for the current
> process and its children)

Hmm, maybe.

>
> Doing so, a process is only allowed to insert a rule if a node was
> previously added. To forbid itself to insert new rules to one of its
> children, a process just need to not add a node before forking. Like
> this, there is no need for special rule flags nor default behavior,
> everything is explicit.

This is still slightly too complicated.  If you really want an
operation that adds a layer (please don't call it a node in the ABI)
and adds a rule to that layer in a single operation, it should be a
separate operation.  Please make everything explicit.

(I don't like exposing the word "node" to userspace because it means
nothing.  Having more than one layer of filter makes sense to me,
which is why I like "layer".  I'm sure that other good choices exist.)

>
> For this series, I will stick to the same behavior as seccomp filter:
> only append rules to the current process (and its future children).
>
>
 2. The API that adds a layer should be different from the API that
 modifies a layer.
>>>
>>> Right, but it doesn't apply now because we can only add rules.
>>
>> That's not what the code appears to do, though.  Sometimes it makes a
>> new layer without modifying tasks that share the layer and sometimes
>> it modifies the 

Re: [Patch net] bonding: use ETH_MAX_MTU as max mtu

2017-03-02 Thread Brad Campbell

On 03/03/17 06:43, David Miller wrote:

From: Cong Wang 
Date: Thu,  2 Mar 2017 12:24:36 -0800


This restores the ability of setting bond device's mtu to 9000.

Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
Reported-by: daz...@gmail.com
Reported-by: Brad Campbell 
Cc: Jarod Wilson 
Signed-off-by: Cong Wang 


Applied and queued up for -stable, thanks.



Thanks very much for this. I've tested it against 4.10.1 and it does the 
job nicely.


Regards,
Brad


Re: [kernel-hardening] [PATCH v5 06/10] seccomp,landlock: Handle Landlock events per process hierarchy

2017-03-02 Thread Mickaël Salaün


On 02/03/2017 11:22, Djalal Harouni wrote:
> On Wed, Feb 22, 2017 at 2:26 AM, Mickaël Salaün  wrote:
>> The seccomp(2) syscall can be use to apply a Landlock rule to the
>> current process. As with a seccomp filter, the Landlock rule is enforced
>> for all its future children. An inherited rule tree can be updated
>> (append-only) by the owner of inherited Landlock nodes (e.g. a parent
>> process that create a new rule). However, an intermediate task, which
>> did not create a rule, will not be able to update its children's rules.
>>
>> Landlock rules can be tied to a Landlock event. When such an event is
>> triggered, a tree of rules can be evaluated. Thisk kind of tree is
>> created with a first node.  This node reference a list of rules and an
>> optional parent node. Each rule return a 32-bit value which can
>> interrupt the evaluation with a non-zero value. If every rules returned
>> zero, the evaluation continues with the rule list of the parent node,
>> until the end of the tree.
>>
>> Changes since v4:
>> * merge manager and seccomp patches
>> * return -EFAULT in seccomp(2) when user_bpf_fd is null to easely check
>>   if Landlock is supported
>> * only allow a process with the global CAP_SYS_ADMIN to use Landlock
>>   (will be lifted in the future)
>> * add an early check to exit as soon as possible if the current process
>>   does not have Landlock rules
>>
>> Changes since v3:
>> * remove the hard link with seccomp (suggested by Andy Lutomirski and
>>   Kees Cook):
>>   * remove the cookie which could imply multiple evaluation of Landlock
>> rules
>>   * remove the origin field in struct landlock_data
>> * remove documentation fix (merged upstream)
>> * rename the new seccomp command to SECCOMP_ADD_LANDLOCK_RULE
>> * internal renaming
>> * split commit
>> * new design to be able to inherit on the fly the parent rules
>>
>> Changes since v2:
>> * Landlock programs can now be run without seccomp filter but for any
>>   syscall (from the process) or interruption
>> * move Landlock related functions and structs into security/landlock/*
>>   (to manage cgroups as well)
>> * fix seccomp filter handling: run Landlock programs for each of their
>>   legitimate seccomp filter
>> * properly clean up all seccomp results
>> * cosmetic changes to ease the understanding
>> * fix some ifdef
>>
>> Signed-off-by: Mickaël Salaün 
>> Cc: Alexei Starovoitov 
>> Cc: Andrew Morton 
>> Cc: Andy Lutomirski 
>> Cc: James Morris 
>> Cc: Kees Cook 
>> Cc: Serge E. Hallyn 
>> Cc: Will Drewry 
>> ---
>>  include/linux/seccomp.h  |   8 ++
>>  include/uapi/linux/seccomp.h |   1 +
>>  kernel/fork.c|  14 +-
>>  kernel/seccomp.c |   8 ++
>>  security/landlock/Makefile   |   2 +-
>>  security/landlock/hooks.c|  42 +-
>>  security/landlock/manager.c  | 321 
>> +++
>>  7 files changed, 392 insertions(+), 4 deletions(-)
>>  create mode 100644 security/landlock/manager.c
>>
>> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
>> index e25aee2cdfc0..9a38de3c0e72 100644
>> --- a/include/linux/seccomp.h
>> +++ b/include/linux/seccomp.h
>> @@ -10,6 +10,10 @@
>>  #include 
>>  #include 
>>
>> +#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_SECURITY_LANDLOCK)
>> +struct landlock_events;
>> +#endif /* CONFIG_SECCOMP_FILTER && CONFIG_SECURITY_LANDLOCK */
>> +
>>  struct seccomp_filter;
>>  /**
>>   * struct seccomp - the state of a seccomp'ed process
>> @@ -18,6 +22,7 @@ struct seccomp_filter;
>>   * system calls available to a process.
>>   * @filter: must always point to a valid seccomp-filter or NULL as it is
>>   *  accessed without locking during system call entry.
>> + * @landlock_events: contains an array of Landlock rules.
>>   *
>>   *  @filter must only be accessed from the context of current as 
>> there
>>   *  is no read locking.
>> @@ -25,6 +30,9 @@ struct seccomp_filter;
>>  struct seccomp {
>> int mode;
>> struct seccomp_filter *filter;
>> +#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_SECURITY_LANDLOCK)
>> +   struct landlock_events *landlock_events;
>> +#endif /* CONFIG_SECCOMP_FILTER && CONFIG_SECURITY_LANDLOCK */
>>  };
>>
>>  #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
>> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
>> index 0f238a43ff1e..56dd692cddac 100644
>> --- a/include/uapi/linux/seccomp.h
>> +++ b/include/uapi/linux/seccomp.h
>> @@ -13,6 +13,7 @@
>>  /* Valid operations for seccomp syscall. */
>>  #define SECCOMP_SET_MODE_STRICT0
>>  #define SECCOMP_SET_MODE_FILTER1
>> +#define SECCOMP_ADD_LANDLOCK_RULE  2
>>
>>  /* Valid flags for SECCOMP_SET_MODE_FILTER */
>>  #define SECCOMP_FILTER_FLAG_TSYNC  1
>> diff --git 

Re: [PATCH v5 06/10] seccomp,landlock: Handle Landlock events per process hierarchy

2017-03-02 Thread Mickaël Salaün


On 03/03/2017 01:55, Andy Lutomirski wrote:
> On Thu, Mar 2, 2017 at 4:48 PM, Mickaël Salaün  wrote:
>>
>> On 02/03/2017 17:36, Andy Lutomirski wrote:
>>> On Wed, Mar 1, 2017 at 3:28 PM, Mickaël Salaün  wrote:


 On 01/03/2017 23:20, Andy Lutomirski wrote:
> On Wed, Mar 1, 2017 at 2:14 PM, Mickaël Salaün  wrote:
>>
>> On 28/02/2017 21:01, Andy Lutomirski wrote:
>>> On Tue, Feb 21, 2017 at 5:26 PM, Mickaël Salaün  
>>> wrote:
>> This design makes it possible for a process to add more constraints to
>> its children on the fly. I think it is a good feature to have and a
>> safer default inheritance mechanism, but it could be guarded by an
>> option flag if we want both mechanism to be available. The same design
>> could be used by seccomp filter too.
>>
>
> Then let's do it right.
>
> Currently each task has an array of seccomp filter layers.  When a
> task forks, the child inherits the layers.  All the layers are
> presently immutable.  With Landlock, a layer can logically be a
> syscall fitler layer or a Landlock layer.  This fits in to the
> existing model just fine.
>
> If we want to have an interface to allow modification of an existing
> layer, let's make it so that, when a layer is added, you have to
> specify a flag to make the layer modifiable (by current, presumably,
> although I can imagine other policies down the road).  Then have a
> separate API that modifies a layer.
>
> IOW, I think your patch is bad for three reasons, all fixable:
>
> 1. The default is wrong.  A layer should be immutable to avoid an easy
> attack in which you try to sandbox *yourself* and then you just modify
> the layer to weaken it.

 This is not possible, there is only an operation for now:
 SECCOMP_ADD_LANDLOCK_RULE. You can only add more rules to the list (as
 for seccomp filter). There is no way to weaken a sandbox. The question
 is: how do we want to handle the rules *tree* (from the kernel point of
 view)?

>>>
>>> Fair enough.  But I still think that immutability (like regular
>>> seccomp) should be the default.  For security, simplicity is
>>> important.  I guess there could be two ways to relax immutability:
>>> allowing making the layer stricter and allowing any change at all.
>>>
>>> As a default, though, programs should be able to expect that:
>>>
>>> seccomp(SECCOMP_ADD_WHATEVER, ...);
>>> fork();
>>>
>>> [parent gets compromised]
>>> [in parent]seccomp(anything whatsoever);
>>>
>>> will not affect the child,  If the parent wants to relax that, that's
>>> fine, but I think it should be explicit.
>>
>> Good point. However the term "immutability" doesn't fit right because
>> the process is still allowed to add more rules to itself (as for
>> seccomp). The difference lays in the way a rule may be "appended" (by
>> the current process) or "inserted" (by a parent process).
>>
>> I think three or four kind of operations (through the seccomp syscall)
>> make sense:
>> * append a rule (for the current process and its future children)
> 
> Sure, but this operation should *never* affect existing children,
> existing seccomp layers, existing nodes, etc.  It should affect
> current and future children only.  Or it could simply not exist for
> Landlock and instead you'd have to add a layer (see below) and then
> program that layer.
> 
>> * add a node (insert point), from which the inserted rules will be tied
>> * insert a rule in the node, which will be inherited by futures children
> 
> I would advocate calling this a "seccomp layer" and making creation
> and manipulation of them generic.
> 
>> * (maybe a "lock" command to make a layer immutable for the current
>> process and its children)
> 
> Hmm, maybe.
> 
>>
>> Doing so, a process is only allowed to insert a rule if a node was
>> previously added. To forbid itself to insert new rules to one of its
>> children, a process just need to not add a node before forking. Like
>> this, there is no need for special rule flags nor default behavior,
>> everything is explicit.
> 
> This is still slightly too complicated.  If you really want an
> operation that adds a layer (please don't call it a node in the ABI)
> and adds a rule to that layer in a single operation, it should be a
> separate operation.  Please make everything explicit.
> 
> (I don't like exposing the word "node" to userspace because it means
> nothing.  Having more than one layer of filter makes sense to me,
> which is why I like "layer".  I'm sure that other good choices exist.)

I keep that for a future discussion, I'm now convinced the simple
inheritance (seccomp-like) doesn't block future extension.

> 
>>
>> For this series, I will stick to the same behavior as seccomp filter:
>> only append rules to the current process (and its future children).
>>
>>
> 2. The API that 

Re: [PATCH v5 06/10] seccomp,landlock: Handle Landlock events per process hierarchy

2017-03-02 Thread Mickaël Salaün

On 02/03/2017 17:36, Andy Lutomirski wrote:
> On Wed, Mar 1, 2017 at 3:28 PM, Mickaël Salaün  wrote:
>>
>>
>> On 01/03/2017 23:20, Andy Lutomirski wrote:
>>> On Wed, Mar 1, 2017 at 2:14 PM, Mickaël Salaün  wrote:

 On 28/02/2017 21:01, Andy Lutomirski wrote:
> On Tue, Feb 21, 2017 at 5:26 PM, Mickaël Salaün  wrote:
 This design makes it possible for a process to add more constraints to
 its children on the fly. I think it is a good feature to have and a
 safer default inheritance mechanism, but it could be guarded by an
 option flag if we want both mechanism to be available. The same design
 could be used by seccomp filter too.

>>>
>>> Then let's do it right.
>>>
>>> Currently each task has an array of seccomp filter layers.  When a
>>> task forks, the child inherits the layers.  All the layers are
>>> presently immutable.  With Landlock, a layer can logically be a
>>> syscall fitler layer or a Landlock layer.  This fits in to the
>>> existing model just fine.
>>>
>>> If we want to have an interface to allow modification of an existing
>>> layer, let's make it so that, when a layer is added, you have to
>>> specify a flag to make the layer modifiable (by current, presumably,
>>> although I can imagine other policies down the road).  Then have a
>>> separate API that modifies a layer.
>>>
>>> IOW, I think your patch is bad for three reasons, all fixable:
>>>
>>> 1. The default is wrong.  A layer should be immutable to avoid an easy
>>> attack in which you try to sandbox *yourself* and then you just modify
>>> the layer to weaken it.
>>
>> This is not possible, there is only an operation for now:
>> SECCOMP_ADD_LANDLOCK_RULE. You can only add more rules to the list (as
>> for seccomp filter). There is no way to weaken a sandbox. The question
>> is: how do we want to handle the rules *tree* (from the kernel point of
>> view)?
>>
> 
> Fair enough.  But I still think that immutability (like regular
> seccomp) should be the default.  For security, simplicity is
> important.  I guess there could be two ways to relax immutability:
> allowing making the layer stricter and allowing any change at all.
> 
> As a default, though, programs should be able to expect that:
> 
> seccomp(SECCOMP_ADD_WHATEVER, ...);
> fork();
> 
> [parent gets compromised]
> [in parent]seccomp(anything whatsoever);
> 
> will not affect the child,  If the parent wants to relax that, that's
> fine, but I think it should be explicit.

Good point. However the term "immutability" doesn't fit right because
the process is still allowed to add more rules to itself (as for
seccomp). The difference lays in the way a rule may be "appended" (by
the current process) or "inserted" (by a parent process).

I think three or four kind of operations (through the seccomp syscall)
make sense:
* append a rule (for the current process and its future children)
* add a node (insert point), from which the inserted rules will be tied
* insert a rule in the node, which will be inherited by futures children
* (maybe a "lock" command to make a layer immutable for the current
process and its children)

Doing so, a process is only allowed to insert a rule if a node was
previously added. To forbid itself to insert new rules to one of its
children, a process just need to not add a node before forking. Like
this, there is no need for special rule flags nor default behavior,
everything is explicit.

For this series, I will stick to the same behavior as seccomp filter:
only append rules to the current process (and its future children).


>>> 2. The API that adds a layer should be different from the API that
>>> modifies a layer.
>>
>> Right, but it doesn't apply now because we can only add rules.
> 
> That's not what the code appears to do, though.  Sometimes it makes a
> new layer without modifying tasks that share the layer and sometimes
> it modifies the layer.
> 
> Both operations are probably okay, but they're not the same operation
> and they shouldn't pretend to be.

It should be OK with my previous proposal. The other details could be
discussed in a separate future patch series.


>>> 3. The whole modification mechanism should be a separate patch to be
>>> reviewed on its own merits.
>>
>> For a rule *replacement*, sure!
> 
> And for modification of policy for non-current tasks.  That's a big
> departure from normal seccomp and should be reviewed as such.

Agreed



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] netvsc: fix use-after-free in netvsc_change_mtu()

2017-03-02 Thread David Miller
From: Dexuan Cui 
Date: Thu, 2 Mar 2017 13:00:53 +

> 'nvdev' is freed in rndis_filter_device_remove -> netvsc_device_remove ->
> free_netvsc_device, so we mustn't access it, before it's re-created in
> rndis_filter_device_add -> netvsc_device_add.
> 
> Signed-off-by: Dexuan Cui 

Applied.


Re: [PATCH net v4 0/2] net: ethernet: bgmac: bug fixes

2017-03-02 Thread Jon Mason
On Thu, Mar 02, 2017 at 12:56:05PM -0800, David Miller wrote:
> From: David Miller 
> Date: Thu, 02 Mar 2017 12:50:15 -0800 (PST)
> 
> > From: Jon Mason 
> > Date: Tue, 28 Feb 2017 13:41:49 -0500
> > 
> >> Changes in v4:
> >> * Added the udelays from the previous code (per David Miller)
> >> 
> >> Changes in v3:
> >> * Reworked the init sequence patch to only remove the device reset if
> >>   the device is actually in reset.  Given that this code doesn't bear
> >>   much resemblance to the original code, I'm changing the author of the
> >>   patch.  This was tested on NS2 SVK.
> >> 
> >> Changes in v2:
> >> * Reworked the first match to make it more obvious what portions of the
> >>   register were being preserved (Per Rafal Mileki)
> >> * Style change to reorder the function variables in patch 2 (per Sergei
> >>   Shtylyov)
> >> 
> >> Bug fixes for bgmac driver
> > 
> > Series applied.
> 
> Actually, this doesn't even compile.  Reverted...
> 
> [davem@kkuri net]$ make -s -j4
> drivers/net/ethernet/broadcom/bgmac.c: In function ‘bgmac_set_mac_address’:
> drivers/net/ethernet/broadcom/bgmac.c:1233:23: error: ‘struct bgmac’ has no 
> member named ‘mac_addr’; did you mean ‘phyaddr’?
>   ether_addr_copy(bgmac->mac_addr, sa->sa_data);
>^~
> drivers/net/ethernet/broadcom/bgmac.c:1234:38: error: ‘struct bgmac’ has no 
> member named ‘mac_addr’; did you mean ‘phyaddr’?
>   bgmac_write_mac_address(bgmac, bgmac->mac_addr);
>   ^~

Well this is embarrassing.  I didn't rebase, even though I acked the
patch which changed it out from under me.  Sorry, I should've known
better.

Rebased, compiled, and tested patch coming shortly.  I appreciate your
patience.

Thanks,
Jon


Re: [PATCH 24/26] ocfs2: reduce stack size with KASAN

2017-03-02 Thread Joe Perches
On Thu, 2017-03-02 at 23:22 +0100, Arnd Bergmann wrote:
> On Thu, Mar 2, 2017 at 6:46 PM, Joe Perches  wrote:
> > On Thu, 2017-03-02 at 17:38 +0100, Arnd Bergmann wrote:
> > > The internal logging infrastructure in ocfs2 causes special warning code 
> > > to be
> > > used with KASAN, which produces rather large stack frames:
> > > fs/ocfs2/super.c: In function 'ocfs2_fill_super':
> > > fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger 
> > > than 3072 bytes [-Werror=frame-larger-than=]
> > 
> > At least by default it doesn't seem to.
> > 
> > gcc 6.2 allyesconfig, CONFIG_KASAN=y
> > with either CONFIG_KASAN_INLINE or CONFIG_KASAN_OUTLINE
> > 
> > gcc doesn't emit a stack warning
> 
> The warning is disabled until patch 26/26. which picks the 3072 default.
> The 3264 number was with gcc-7, which is worse than gcc-6 since it enables
> an extra check.
> 
> > > By simply passing the mask by value instead of reference, we can avoid the
> > > problem completely.
> > 
> > Any idea why that's so?
> 
> With KASAN, every time we inline the function, the compiler has to allocate
> space for another copy of the variable plus a redzone to detect whether
> passing it by reference into another function causes an overflow at runtime.

These logging functions aren't inlined.
You're referring to the stack frame?

> > >  On 64-bit architectures, this is also more efficient,
> > 
> > Efficient true, but the same overall stack no?
> 
> Here is what I see with CONFIG_FRAME_WARN=300 and x86_64-linux-gcc-6.3.1:
> 
> before:
[]
> fs/ocfs2/super.c:1219:1: error: the frame size of 552 bytes is larger
> than 300 bytes [-Werror=frame-larger-than=]
> 
> after:
> fs/ocfs2/super.c: In function 'ocfs2_fill_super':
> fs/ocfs2/super.c:1219:1: error: the frame size of 472 bytes is larger
> than 300 bytes [-Werror=frame-larger-than=]
> 
> and with gcc-7.0.1 (including -fsanitize-address-use-after-scope), before:
[]
> fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger
> than 300 bytes [-Werror=frame-larger-than=]
> 
> after:
> fs/ocfs2/super.c: In function 'ocfs2_fill_super':
> fs/ocfs2/super.c:1219:1: error: the frame size of 704 bytes is larger
> than 300 bytes [-Werror=frame-larger-than=]

Still doesn't make sense to me.

None of the logging functions are inlined as they are all
EXPORT_SYMBOL.

This just changes a pointer to a u64, which is the same
size on x86-64 (and is of course larger on x86-32).

Perhaps KASAN has the odd behavior and working around
KASAN's behavior may not be the proper thing to do.

Maybe if CONFIG_KASAN is set, the minimum stack should
be increased via THREAD_SIZE_ORDER or some such.



[PATCH net 2/2] nfp: correct DMA direction in XDP DMA sync

2017-03-02 Thread Jakub Kicinski
dma_sync_single_for_*() takes the direction in which the buffer
was mapped, not the direction of the sync.  We should sync XDP
buffers bidirectionally.

Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver")
Signed-off-by: Jakub Kicinski 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 00a83218857a..9179a99563af 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1498,7 +1498,7 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct 
nfp_net_rx_ring *rx_ring,
txbuf->real_len = pkt_len;
 
dma_sync_single_for_device(>pdev->dev, rxbuf->dma_addr + pkt_off,
-  pkt_len, DMA_TO_DEVICE);
+  pkt_len, DMA_BIDIRECTIONAL);
 
/* Build TX descriptor */
txd = _ring->txds[wr_idx];
@@ -1611,7 +1611,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, 
int budget)
 
dma_sync_single_for_cpu(>pdev->dev,
rxbuf->dma_addr + pkt_off,
-   pkt_len, DMA_FROM_DEVICE);
+   pkt_len, DMA_BIDIRECTIONAL);
act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
  pkt_len);
switch (act) {
-- 
2.11.0



[PATCH net 1/2] nfp: don't tell FW about the reserved buffer space

2017-03-02 Thread Jakub Kicinski
Since commit c0f031bc8866 ("nfp_net: use alloc_frag() and build_skb()")
we are allocating buffers which have to hold both the data and skb to
be created in place by build_skb().

FW should only be told about the buffer space it can DMA to, that
is without the build_skb() headroom and tailroom.  Note: firmware
applications should validate the buffers against both MTU and
free list buffer size so oversized packets would not pass through
the NIC anyway.

Fixes: c0f031bc8866 ("nfp: use alloc_frag() and build_skb()")
Signed-off-by: Jakub Kicinski 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 074259cc8e06..00a83218857a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2198,7 +2198,8 @@ static int __nfp_net_set_config_and_enable(struct nfp_net 
*nn)
nfp_net_write_mac_addr(nn);
 
nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
-   nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
+   nn_writel(nn, NFP_NET_CFG_FLBUFSZ,
+ nn->fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
 
/* Enable device */
new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
-- 
2.11.0



Re: [PATCH 24/26] ocfs2: reduce stack size with KASAN

2017-03-02 Thread Joe Perches
On Thu, 2017-03-02 at 23:59 +0100, Arnd Bergmann wrote:
> KASAN decides that passing a pointer to _m into an extern function
> (_mlog_printk) is potentially dangerous, as that function might
> keep a reference to that pointer after it goes out of scope,
> or it might not know the correct length of the stack object pointed to.
> 
> We can see from looking at the __mlog_printk() function definition
> that it's actually safe, but the compiler cannot see that when looking
> at another source file.

OK, thanks.

btw:

changing __mlog_printk can save ~11% (90+KB) of object text size
by removing __func__ and __LINE__ and using vsprintf pointer extension
%pS, __builtin_return_address(0) as it is already used in dlmmaster.

(defconfig x86-64, with ocfs2)

$ size fs/ocfs2/built-in.o*
   textdata bss dec hex filename
 759791  111373  105688  976852   ee7d4 fs/ocfs2/built-in.o.new
 852959  111373  105688 1070020  1053c4 fs/ocfs2/built-in.o.old

It's nearly the same output.

---

 fs/ocfs2/cluster/masklog.c | 8 
 fs/ocfs2/cluster/masklog.h | 8 +++-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index d331c2386b94..a3f080f37108 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -64,8 +64,7 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, 
size_t count)
return count;
 }
 
-void __mlog_printk(const u64 *mask, const char *func, int line,
-  const char *fmt, ...)
+void __mlog_printk(const u64 *mask, const char *fmt, ...)
 {
struct va_format vaf;
va_list args;
@@ -90,9 +89,10 @@ void __mlog_printk(const u64 *mask, const char *func, int 
line,
vaf.fmt = fmt;
vaf.va = 
 
-   printk("%s(%s,%u,%u):%s:%d %s%pV",
+   printk("%s(%s,%u,%u):%pS %s%pV",
   level, current->comm, task_pid_nr(current),
-  raw_smp_processor_id(), func, line, prefix, );
+  raw_smp_processor_id(), __builtin_return_address(0),
+  prefix, );
 
va_end(args);
 }
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 3c16da69605d..56ba5baf625b 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -162,9 +162,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 
 #endif
 
-__printf(4, 5) __nocapture(2)
-void __mlog_printk(const u64 *m, const char *func, int line,
-  const char *fmt, ...);
+__printf(2, 3) __nocapture(2)
+void __mlog_printk(const u64 *m, const char *fmt, ...);
 
 /*
  * Testing before the __mlog_printk call lets the compiler eliminate the
@@ -174,8 +173,7 @@ void __mlog_printk(const u64 *m, const char *func, int line,
 do {   \
u64 _m = MLOG_MASK_PREFIX | (mask); \
if (_m & ML_ALLOWED_BITS)   \
-   __mlog_printk(&_m, __func__, __LINE__, fmt, \
- ##__VA_ARGS__);   \
+   __mlog_printk(&_m, fmt, ##__VA_ARGS__); \
 } while (0)
 
 #define mlog_errno(st) ({  \


Re: [PATCH 02/26] rewrite READ_ONCE/WRITE_ONCE

2017-03-02 Thread Christian Borntraeger
On 03/02/2017 06:55 PM, Arnd Bergmann wrote:
> On Thu, Mar 2, 2017 at 5:51 PM, Christian Borntraeger
>  wrote:
>> On 03/02/2017 05:38 PM, Arnd Bergmann wrote:
>>>
>>> This attempts a rewrite of the two macros, using a simpler implementation
>>> for the most common case of having a naturally aligned 1, 2, 4, or (on
>>> 64-bit architectures) 8  byte object that can be accessed with a single
>>> instruction.  For these, we go back to a volatile pointer dereference
>>> that we had with the ACCESS_ONCE macro.
>>
>> We had changed that back then because gcc 4.6 and 4.7 had a bug that could
>> removed the volatile statement on aggregate types like the following one
>>
>> union ipte_control {
>> unsigned long val;
>> struct {
>> unsigned long k  : 1;
>> unsigned long kh : 31;
>> unsigned long kg : 32;
>> };
>> };
>>
>> See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145
>>
>> If I see that right, your __ALIGNED_WORD(x)
>> macro would say that for above structure  sizeof(x) == sizeof(long)) is true,
>> so it would fall back to the old volatile cast and might reintroduce the
>> old compiler bug?

Oh dear, I should double check my sentences in emails before sending...anyway
the full story is referenced in 

commit 60815cf2e05057db5b78e398d9734c493560b11e
Merge tag 'for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux
which has a pointer to
http://marc.info/?i=54611D86.4040306%40de.ibm.com
which contains the full story.

> 
> Ah, right, that's the missing piece. For some reason I didn't find
> the reference in the source or the git log.
> 
>> Could you maybe you fence your simple macro for anything older than 4.9? 
>> After
>> all there was no kasan support anyway on these older gcc version.
> 
> Yes, that should work, thanks!



Re: [Patch net v2] ipv6: ignore null_entry in inet6_rtm_getroute() too

2017-03-02 Thread David Miller
From: Cong Wang 
Date: Wed,  1 Mar 2017 20:48:39 -0800

> Like commit 1f17e2f2c8a8 ("net: ipv6: ignore null_entry on route dumps"),
> we need to ignore null entry in inet6_rtm_getroute() too.
> 
> Return -ENETUNREACH here to sync with IPv4 behavior, as suggested by David.
> 
> Fixes: a1a22c1206 ("net: ipv6: Keep nexthop of multipath route on admin down")
> Reported-by: Dmitry Vyukov 
> Cc: David Ahern 
> Signed-off-by: Cong Wang 

Applied, thanks.


[PATCH net] rxrpc: Fix potential NULL-pointer exception

2017-03-02 Thread David Howells
Fix a potential NULL-pointer exception in rxrpc_do_sendmsg().  The call
state check that I added should have gone into the else-body of the
if-statement where we actually have a call to check.

Found by CoverityScan CID#1414316 ("Dereference after null check").

Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and 
sendmsg/recvmsg")
Reported-by: Colin Ian King 
Signed-off-by: David Howells 
---

 net/rxrpc/sendmsg.c |   15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 75c5179227f2..344c2a60627b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -518,6 +518,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr 
*msg, size_t len)
ret = -EBADSLT;
if (cmd != RXRPC_CMD_SEND_DATA)
goto error_release_sock;
+   call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
+exclusive);
+   /* The socket is now unlocked... */
+   if (IS_ERR(call))
+   return PTR_ERR(call);
+   /* ... and we have the call lock. */
+   } else {
switch (READ_ONCE(call->state)) {
case RXRPC_CALL_UNINITIALISED:
case RXRPC_CALL_CLIENT_AWAIT_CONN:
@@ -529,13 +536,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr 
*msg, size_t len)
default:
break;
}
-   call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
-exclusive);
-   /* The socket is now unlocked... */
-   if (IS_ERR(call))
-   return PTR_ERR(call);
-   /* ... and we have the call lock. */
-   } else {
+
ret = mutex_lock_interruptible(>user_mutex);
release_sock(>sk);
if (ret < 0) {



[PATCH net] rxrpc: Fix potential NULL-pointer exception

2017-03-02 Thread David Howells
Fix a potential NULL-pointer exception in rxrpc_do_sendmsg().  The call
state check that I added should have gone into the else-body of the
if-statement where we actually have a call to check.

Found by CoverityScan CID#1414316 ("Dereference after null check").

Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and 
sendmsg/recvmsg")
Reported-by: Colin Ian King 
Signed-off-by: David Howells 
---

 net/rxrpc/sendmsg.c |   15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 31c1538c1a8d..27685d8cba1a 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -517,13 +517,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr 
*msg, size_t len)
ret = -EBADSLT;
if (cmd != RXRPC_CMD_SEND_DATA)
goto error_release_sock;
-   ret = -EBUSY;
-   if (call->state == RXRPC_CALL_UNINITIALISED ||
-   call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-   call->state == RXRPC_CALL_SERVER_PREALLOC ||
-   call->state == RXRPC_CALL_SERVER_SECURING ||
-   call->state == RXRPC_CALL_SERVER_ACCEPTING)
-   goto error_release_sock;
call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
 exclusive);
/* The socket is now unlocked... */
@@ -531,6 +524,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr 
*msg, size_t len)
return PTR_ERR(call);
/* ... and we have the call lock. */
} else {
+   ret = -EBUSY;
+   if (call->state == RXRPC_CALL_UNINITIALISED ||
+   call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+   call->state == RXRPC_CALL_SERVER_PREALLOC ||
+   call->state == RXRPC_CALL_SERVER_SECURING ||
+   call->state == RXRPC_CALL_SERVER_ACCEPTING)
+   goto error_release_sock;
+
ret = mutex_lock_interruptible(>user_mutex);
release_sock(>sk);
if (ret < 0) {



[PATCH net v5 1/2] net: ethernet: bgmac: init sequence bug

2017-03-02 Thread Jon Mason
Fix a bug in the 'bgmac' driver init sequence that blind writes for init
sequence where it should preserve most bits other than the ones it is
deliberately manipulating.

The code now checks to see if the adapter needs to be brought out of
reset (where as before it was doing an IDM write to bring it out of
reset regardless of whether it was in reset or not).  Also, removed
unnecessary usleeps (as there is already a read present to flush the
IDM writes).

Signed-off-by: Zac Schroff 
Signed-off-by: Jon Mason 
Fixes: f6a95a24957 ("net: ethernet: bgmac: Add platform device support")
---
 drivers/net/ethernet/broadcom/bgmac-platform.c | 27 +-
 drivers/net/ethernet/broadcom/bgmac.h  | 16 +++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c 
b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 7b1af95..da1b8b2 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -51,8 +51,7 @@ static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 
offset, u32 value)
 
 static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 {
-   if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
-(BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+   if ((bgmac_idm_read(bgmac, BCMA_IOCTL) & BGMAC_CLK_EN) != BGMAC_CLK_EN)
return false;
if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
return false;
@@ -61,15 +60,25 @@ static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 
 static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
 {
-   bgmac_idm_write(bgmac, BCMA_IOCTL,
-   (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
-   bgmac_idm_read(bgmac, BCMA_IOCTL);
+   u32 val;
 
-   bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
-   bgmac_idm_read(bgmac, BCMA_RESET_CTL);
-   udelay(1);
+   /* The Reset Control register only contains a single bit to show if the
+* controller is currently in reset.  Do a sanity check here, just in
+* case the bootloader happened to leave the device in reset.
+*/
+   val = bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+   if (val) {
+   bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
+   bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+   udelay(1);
+   }
 
-   bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+   val = bgmac_idm_read(bgmac, BCMA_IOCTL);
+   /* Some bits of BCMA_IOCTL set by HW/ATF and should not change */
+   val |= flags & ~(BGMAC_AWCACHE | BGMAC_ARCACHE | BGMAC_AWUSER |
+BGMAC_ARUSER);
+   val |= BGMAC_CLK_EN;
+   bgmac_idm_write(bgmac, BCMA_IOCTL, val);
bgmac_idm_read(bgmac, BCMA_IOCTL);
udelay(1);
 }
diff --git a/drivers/net/ethernet/broadcom/bgmac.h 
b/drivers/net/ethernet/broadcom/bgmac.h
index 248727d..6d1c6ff 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -213,6 +213,22 @@
 /* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */
 #define BGMAC_BCMA_IOCTL_SW_CLKEN  0x0004  /* PHY Clock 
Enable */
 #define BGMAC_BCMA_IOCTL_SW_RESET  0x0008  /* PHY Reset */
+/* The IOCTL values appear to be different in NS, NSP, and NS2, and do not 
match
+ * the values directly above
+ */
+#define BGMAC_CLK_EN   BIT(0)
+#define BGMAC_RESERVED_0   BIT(1)
+#define BGMAC_SOURCE_SYNC_MODE_EN  BIT(2)
+#define BGMAC_DEST_SYNC_MODE_ENBIT(3)
+#define BGMAC_TX_CLK_OUT_INVERT_EN BIT(4)
+#define BGMAC_DIRECT_GMII_MODE BIT(5)
+#define BGMAC_CLK_250_SEL  BIT(6)
+#define BGMAC_AWCACHE  (0xf << 7)
+#define BGMAC_RESERVED_1   (0x1f << 11)
+#define BGMAC_ARCACHE  (0xf << 16)
+#define BGMAC_AWUSER   (0x3f << 20)
+#define BGMAC_ARUSER   (0x3f << 26)
+#define BGMAC_RESERVED BIT(31)
 
 /* BCMA GMAC core specific IO status (BCMA_IOST) flags */
 #define BGMAC_BCMA_IOST_ATTACHED   0x0800
-- 
2.7.4



[PATCH net v5 2/2] net: ethernet: bgmac: mac address change bug

2017-03-02 Thread Jon Mason
From: Hari Vyas 

ndo_set_mac_address() passes struct sockaddr * as 2nd parameter to
bgmac_set_mac_address() but code assumed u8 *.  This caused two bytes
chopping and the wrong mac address was configured.

Signed-off-by: Hari Vyas 
Signed-off-by: Jon Mason 
Fixes: 4e209001b86 ("bgmac: write mac address to hardware in 
ndo_set_mac_address")
---
 drivers/net/ethernet/broadcom/bgmac.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c 
b/drivers/net/ethernet/broadcom/bgmac.c
index 4150467..fd66fca 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1223,12 +1223,16 @@ static netdev_tx_t bgmac_start_xmit(struct sk_buff *skb,
 static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
 {
struct bgmac *bgmac = netdev_priv(net_dev);
+   struct sockaddr *sa = addr;
int ret;
 
ret = eth_prepare_mac_addr_change(net_dev, addr);
if (ret < 0)
return ret;
-   bgmac_write_mac_address(bgmac, (u8 *)addr);
+
+   ether_addr_copy(net_dev->dev_addr, sa->sa_data);
+   bgmac_write_mac_address(bgmac, net_dev->dev_addr);
+
eth_commit_mac_addr_change(net_dev, addr);
return 0;
 }
-- 
2.7.4



Re: [PATCH 24/26] ocfs2: reduce stack size with KASAN

2017-03-02 Thread Arnd Bergmann
On Thu, Mar 2, 2017 at 11:40 PM, Joe Perches  wrote:
> On Thu, 2017-03-02 at 23:22 +0100, Arnd Bergmann wrote:
>> On Thu, Mar 2, 2017 at 6:46 PM, Joe Perches  wrote:
>> > On Thu, 2017-03-02 at 17:38 +0100, Arnd Bergmann wrote:
>> > > The internal logging infrastructure in ocfs2 causes special warning code 
>> > > to be
>> > > used with KASAN, which produces rather large stack frames:
>> > > fs/ocfs2/super.c: In function 'ocfs2_fill_super':
>> > > fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger 
>> > > than 3072 bytes [-Werror=frame-larger-than=]
>> >
>> > At least by default it doesn't seem to.
>> >
>> > gcc 6.2 allyesconfig, CONFIG_KASAN=y
>> > with either CONFIG_KASAN_INLINE or CONFIG_KASAN_OUTLINE
>> >
>> > gcc doesn't emit a stack warning
>>
>> The warning is disabled until patch 26/26. which picks the 3072 default.
>> The 3264 number was with gcc-7, which is worse than gcc-6 since it enables
>> an extra check.
>>
>> > > By simply passing the mask by value instead of reference, we can avoid 
>> > > the
>> > > problem completely.
>> >
>> > Any idea why that's so?
>>
>> With KASAN, every time we inline the function, the compiler has to allocate
>> space for another copy of the variable plus a redzone to detect whether
>> passing it by reference into another function causes an overflow at runtime.
>
> These logging functions aren't inlined.

Sorry, my mistake. In this case mlog() is a macro, not an inline functions.
The effect is the same though.

> You're referring to the stack frame?

The stack frame of the function that calls mlog(), yes.
>
> Still doesn't make sense to me.
>
> None of the logging functions are inlined as they are all
> EXPORT_SYMBOL.

mlog() is placed in the calling function.

> This just changes a pointer to a u64, which is the same
> size on x86-64 (and is of course larger on x86-32).

KASAN decides that passing a pointer to _m into an extern function
(_mlog_printk) is potentially dangerous, as that function might
keep a reference to that pointer after it goes out of scope,
or it might not know the correct length of the stack object pointed to.

We can see from looking at the __mlog_printk() function definition
that it's actually safe, but the compiler cannot see that when looking
at another source file.

> Perhaps KASAN has the odd behavior and working around
> KASAN's behavior may not be the proper thing to do.

Turning off KASAN fixes the problem, but the entire purpose of
KASAN is to identify code that is potentially dangerous.

> Maybe if CONFIG_KASAN is set, the minimum stack should
> be increased via THREAD_SIZE_ORDER or some such.

This is what happened in 3f181b4d8652 ("lib/Kconfig.debug:
disable -Wframe-larger-than warnings with KASAN=y").

I'm trying to revert that patch so we actually get warnings
again about functions that are still dangerous. I picked 3072
as an arbitrary limit, as there are only a handful of files
that use larger stack frames in the worst case, but we can
only use that limit after fixing up all the warnings it shows.

   Arnd


[PATCH net v5 0/2] net: ethernet: bgmac: bug fixes

2017-03-02 Thread Jon Mason
Changes in v5:
* Rebased to the latest code and fixed up a compile error due to the
  mac_addr struct going away (found by David Miller)

Changes in v4:
* Added the udelays from the previous code (per David Miller)

Changes in v3:
* Reworked the init sequence patch to only remove the device reset if
  the device is actually in reset.  Given that this code doesn't bear
  much resemblance to the original code, I'm changing the author of the
  patch.  This was tested on NS2 SVK.

Changes in v2:
* Reworked the first match to make it more obvious what portions of the
  register were being preserved (Per Rafal Mileki)
* Style change to reorder the function variables in patch 2 (per Sergei
  Shtylyov)


Bug fixes for bgmac driver


Hari Vyas (1):
  net: ethernet: bgmac: mac address change bug

Jon Mason (1):
  net: ethernet: bgmac: init sequence bug

 drivers/net/ethernet/broadcom/bgmac-platform.c | 27 +-
 drivers/net/ethernet/broadcom/bgmac.c  |  6 +-
 drivers/net/ethernet/broadcom/bgmac.h  | 16 +++
 3 files changed, 39 insertions(+), 10 deletions(-)

-- 
2.7.4



[PATCH] xen-netback: fix race condition on XenBus disconnect

2017-03-02 Thread Igor Druzhinin
In some cases during XenBus disconnect event handling and subsequent
queue resource release there may be some TX handlers active on
other processors. Use RCU in order to synchronize with them.

Signed-off-by: Igor Druzhinin 
---
 drivers/net/xen-netback/interface.c | 13 -
 drivers/net/xen-netback/xenbus.c| 17 +++--
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c 
b/drivers/net/xen-netback/interface.c
index a2d32676..32e2cc6 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -164,7 +164,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct 
net_device *dev)
 {
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
-   unsigned int num_queues = vif->num_queues;
+   unsigned int num_queues = rcu_dereference(vif)->num_queues;
u16 index;
struct xenvif_rx_cb *cb;
 
@@ -221,18 +221,21 @@ static struct net_device_stats *xenvif_get_stats(struct 
net_device *dev)
 {
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
+   unsigned int num_queues;
u64 rx_bytes = 0;
u64 rx_packets = 0;
u64 tx_bytes = 0;
u64 tx_packets = 0;
unsigned int index;
 
-   spin_lock(>lock);
-   if (vif->queues == NULL)
+   rcu_read_lock();
+
+   num_queues = rcu_dereference(vif)->num_queues;
+   if (num_queues < 1)
goto out;
 
/* Aggregate tx and rx stats from each queue */
-   for (index = 0; index < vif->num_queues; ++index) {
+   for (index = 0; index < num_queues; ++index) {
queue = >queues[index];
rx_bytes += queue->stats.rx_bytes;
rx_packets += queue->stats.rx_packets;
@@ -241,7 +244,7 @@ static struct net_device_stats *xenvif_get_stats(struct 
net_device *dev)
}
 
 out:
-   spin_unlock(>lock);
+   rcu_read_unlock();
 
vif->dev->stats.rx_bytes = rx_bytes;
vif->dev->stats.rx_packets = rx_packets;
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d2d7cd9..76efb01 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,23 @@ static void backend_disconnect(struct backend_info *be)
struct xenvif *vif = be->vif;
 
if (vif) {
+   unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
-   struct xenvif_queue *queues;
 
xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
xenvif_disconnect_data(vif);
-   for (queue_index = 0;
-queue_index < vif->num_queues;
-++queue_index)
-   xenvif_deinit_queue(>queues[queue_index]);
 
-   spin_lock(>lock);
-   queues = vif->queues;
vif->num_queues = 0;
-   vif->queues = NULL;
-   spin_unlock(>lock);
+   synchronize_net();
 
-   vfree(queues);
+   for (queue_index = 0; queue_index < num_queues; ++queue_index)
+   xenvif_deinit_queue(>queues[queue_index]);
+
+   vfree(vif->queues);
+   vif->queues = NULL;
 
xenvif_disconnect_ctrl(vif);
}
-- 
1.8.3.1



Re: [PATCH v2] net: pch_gbe: Fix TX RX descriptor accesses for big endian systems

2017-03-02 Thread David Miller
From: Hassan Naveed 
Date: Tue, 28 Feb 2017 18:13:22 -0800

> Fix pch_gbe driver for ethernet operations for a big endian CPU.
> Values written to and read from transmit and receive descriptors
> in the pch_gbe driver are byte swapped from the perspective of a
> big endian CPU, since the ethernet controller always operates in
> little endian mode. Rectify this by appropriately byte swapping
> these descriptor field values in the driver software.
> 
> Signed-off-by: Hassan Naveed 
> Reviewed-by: Paul Burton 
> Reviewed-by: Matt Redfearn 
> Cc: Paul Burton 
> Cc: Matt Redfearn 
> Cc: David S. Miller 
> Cc: Florian Westphal 
> Cc: françois romieu 
> ---
> Changes in v2: Additionally changed transmit and receive descriptors field
> types to __le{16,32}. Ran sparse with endianness checking enabled and no
> new warnings were generated.

This doesn't apply cleanly to the 'net' tree, please respin.


Re: pull-request: wireless-drivers 2017-03-02

2017-03-02 Thread David Miller
From: Kalle Valo 
Date: Thu, 02 Mar 2017 11:15:42 +0200

> only one patch this time, the new version of ath10k patch we reverted
> earlier. I was supposed to send you this earlier but it got delayed
> because I'm on vacation, so sorry about that. I'm hoping this is ok to
> take also even the merge window is closed as the changes to the first
> version of the patch are minimal.
> 
> Please let me know if there are any problems.

Pulled, thanks.


Re: [PATCH 24/26] ocfs2: reduce stack size with KASAN

2017-03-02 Thread Arnd Bergmann
On Thu, Mar 2, 2017 at 6:46 PM, Joe Perches  wrote:
> On Thu, 2017-03-02 at 17:38 +0100, Arnd Bergmann wrote:
>> The internal logging infrastructure in ocfs2 causes special warning code to 
>> be
>> used with KASAN, which produces rather large stack frames:
>
>> fs/ocfs2/super.c: In function 'ocfs2_fill_super':
>> fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger than 
>> 3072 bytes [-Werror=frame-larger-than=]
>
> At least by default it doesn't seem to.
>
> gcc 6.2 allyesconfig, CONFIG_KASAN=y
> with either CONFIG_KASAN_INLINE or CONFIG_KASAN_OUTLINE
>
> gcc doesn't emit a stack warning

The warning is disabled until patch 26/26. which picks the 3072 default.
The 3264 number was with gcc-7, which is worse than gcc-6 since it enables
an extra check.

>> By simply passing the mask by value instead of reference, we can avoid the
>> problem completely.
>
> Any idea why that's so?

With KASAN, every time we inline the function, the compiler has to allocate
space for another copy of the variable plus a redzone to detect whether
passing it by reference into another function causes an overflow at runtime.

>>  On 64-bit architectures, this is also more efficient,
>
> Efficient true, but the same overall stack no?

Here is what I see with CONFIG_FRAME_WARN=300 and x86_64-linux-gcc-6.3.1:

before:
fs/ocfs2/super.c: In function 'ocfs2_parse_options.isra.3':
fs/ocfs2/super.c:1508:1: error: the frame size of 352 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_enable_quotas':
fs/ocfs2/super.c:974:1: error: the frame size of 344 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_fill_super':
fs/ocfs2/super.c:1219:1: error: the frame size of 552 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]

after:
fs/ocfs2/super.c: In function 'ocfs2_fill_super':
fs/ocfs2/super.c:1219:1: error: the frame size of 472 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]

and with gcc-7.0.1 (including -fsanitize-address-use-after-scope), before:
fs/ocfs2/super.c: In function 'ocfs2_check_volume':
fs/ocfs2/super.c:2512:1: error: the frame size of 768 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_statfs':
fs/ocfs2/super.c:1717:1: error: the frame size of 320 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_parse_options.isra.3':
fs/ocfs2/super.c:1508:1: error: the frame size of 464 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_enable_quotas':
fs/ocfs2/super.c:974:1: error: the frame size of 320 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_remount':
fs/ocfs2/super.c:752:1: error: the frame size of 568 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_initialize_super.isra.8':
fs/ocfs2/super.c:2339:1: error: the frame size of 1712 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]
fs/ocfs2/super.c: In function 'ocfs2_fill_super':
fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]

after:
fs/ocfs2/super.c: In function 'ocfs2_fill_super':
fs/ocfs2/super.c:1219:1: error: the frame size of 704 bytes is larger
than 300 bytes [-Werror=frame-larger-than=]

 Arnd


Re: pull-request: mac80211 2017-03-02

2017-03-02 Thread David Miller
From: Johannes Berg 
Date: Thu,  2 Mar 2017 11:34:42 +0100

> As I mentioned in my other pull request, here's the change
> for the average.h to make the precision easier to use.
> 
> Please pull and let me know if there's any problem.

Pulled, thanks Johannes.


Re: [Patch net] bonding: use ETH_MAX_MTU as max mtu

2017-03-02 Thread Jay Vosburgh
Cong Wang  wrote:

>This restores the ability of setting bond device's mtu to 9000.
>
>Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
>Reported-by: daz...@gmail.com
>Reported-by: Brad Campbell 
>Cc: Jarod Wilson 
>Signed-off-by: Cong Wang 
>---
> drivers/net/bonding/bond_main.c | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 6321f12..8a4ba8b 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -4179,6 +4179,7 @@ void bond_setup(struct net_device *bond_dev)
> 
>   /* Initialize the device entry points */
>   ether_setup(bond_dev);
>+  bond_dev->max_mtu = ETH_MAX_MTU;
>   bond_dev->netdev_ops = _netdev_ops;
>   bond_dev->ethtool_ops = _ethtool_ops;

Signed-off-by: Jay Vosburgh 

-J

---
-Jay Vosburgh, jay.vosbu...@canonical.com


Re: [Bug 194749] New: kernel bonding does not work in a network nameservice in versions above 3.10.0-229.20.1

2017-03-02 Thread Dan Geist
- On Mar 2, 2017, at 3:11 PM, Cong Wang xiyou.wangc...@gmail.com wrote

> On Thu, Mar 2, 2017 at 10:32 AM, Stephen Hemminger
>  wrote:
>>
>>
>> Begin forwarded message:
>>
>> Date: Wed, 01 Mar 2017 21:08:01 +
>> From: bugzilla-dae...@bugzilla.kernel.org
>> To: step...@networkplumber.org
>> Subject: [Bug 194749] New: kernel bonding does not work in a network 
>> nameservice
>> in versions above 3.10.0-229.20.1
>>
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=194749
>>
>> Bug ID: 194749
>>Summary: kernel bonding does not work in a network nameservice
>> in versions above 3.10.0-229.20.1
>>Product: Networking
>>Version: 2.5
>> Kernel Version: > 3.10.0-229.20.1
>>   Hardware: x86-64
>> OS: Linux
>>   Tree: Mainline
>> Status: NEW
>>   Severity: blocking
>>   Priority: P1
>>  Component: Other
>>   Assignee: step...@networkplumber.org
>>   Reporter: d...@polter.net
>> Regression: No
>>
>> bond interface is being used in active/standby mode with two physical NICs
>> inside a network nameservice to provide switchpath redundancy.
>>
>> netns is instantiated post-boot with the following:
>>
>> ip netns add vntp
>> ip link set p4p1 netns vntp
>> ip link set p4p2 netns vntp
>> ip link set bond0 netns vntp
>> ip netns exec vntp ip link set lo up
>> ip netns exec vntp ip link set p4p1 up
>> ip netns exec vntp ip link set p4p2 up
>> ip netns exec vntp ip link set bond0 up
>> ip netns exec vntp ifenslave bond0 p4p1 p4p2
> 
> This is due to the following commit:
> 
> commit f9399814927ad9bb995a6e109c2a5f9d8a848209
> Author: Weilong Chen 
> Date:   Wed Jan 22 17:16:30 2014 +0800
> 
>bonding: Don't allow bond devices to change network namespaces.
> 
>Like bridge, bonding as netdevice doesn't cross netns boundaries.
> 
>Bonding ports and bonding itself live in same netns.
> 
>Signed-off-by: Weilong Chen 
>Signed-off-by: David S. Miller 
> 
> 
> NETIF_F_NETNS_LOCAL was introduced for loopback device which
> is created for each netns, it is not clear why we need to add it to bond
> and bridge...

Thank you for tracking this down. Without digging through the code to figure it 
out, does this imply that the existence of a bond interface is not possible AT 
ALL within a netns or simply that it may not be "migrated" between the global 
scope and a netns?

In order for the CentOS network stack to init the interfaces, I've been 
creating them with the standard system configuration files:

[root@01 network-scripts]# cat ifcfg-p4p1 
TYPE=Ethernet
BOOTPROTO=none
DEVICE=p4p1
ONBOOT=yes
MASTER=bond0
SLAVE=yes
UUID=4e6c40ee-cc05-4f88-a851-b3185dbdcd0f
NAME=p4p1

[root@01 network-scripts]# cat ifcfg-p4p2
TYPE=Ethernet
BOOTPROTO=none
DEVICE=p4p2
ONBOOT=yes
MASTER=bond0
SLAVE=yes
UUID=15135328-12b8-4fe3-8940-db02b77b94d9
NAME=p4p2

[root@01 network-scripts]# cat ifcfg-bond0 
DEVICE=bond0
TYPE=Bond
BONDING_MASTER=yes
NAME=bond0
UUID=97674993-0e50-4a00-a210-c66d75481a84
ONBOOT=yes
BONDING_OPTS="updelay=0 resend_igmp=1 use_carrier=1 miimon=100 downdelay=0 
xmit_hash_policy=0 primary_reselect=0 fail_over_mac=0 arp_validate=0 
mode=active-backup lacp_rate=0 arp_interval=0 ad_select=0"
IPV6INIT=no


Perhaps by not instantiating the bond interface until after the netns is 
created, this situation can be avoided?

Thanks.
Dan

-- 
Dan Geist dan(@)polter.net



Re: [PATCH] drivers: net: ethernet: remove incorrect __exit markups

2017-03-02 Thread David Miller
From: Dmitry Torokhov 
Date: Wed, 1 Mar 2017 17:24:47 -0800

> Even if bus is not hot-pluggable, devices can be unbound from the
> driver via sysfs, so we should not be using __exit annotations on
> remove() methods. The only exception is drivers registered with
> platform_driver_probe() which specifically disables sysfs bind/unbind
> attributes.
> 
> Signed-off-by: Dmitry Torokhov 

Applied, thank you.


Re: [PATCH 1/2] dccp: Unlock sock before calling sk_free()

2017-03-02 Thread David Miller
From: Arnaldo Carvalho de Melo 
Date: Wed,  1 Mar 2017 16:35:07 -0300

> From: Arnaldo Carvalho de Melo 
> 
> The code where sk_clone() came from created a new socket and locked it,
> but then, on the error path didn't unlock it.
> 
> This problem stayed there for a long while, till b0691c8ee7c2 ("net:
> Unlock sock before calling sk_free()") fixed it, but unfortunately the
> callers of sk_clone() (now sk_clone_locked()) were not audited and the
> one in dccp_create_openreq_child() remained.
> 
> Now in the age of the syskaller fuzzer, this was finally uncovered, as
> reported by Dmitry:
 ...
> Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling
> sk_free()").
> 
> Reported-by: Dmitry Vyukov 
> Cc: Cong Wang 
> Cc: Eric Dumazet 
> Cc: Gerrit Renker 
> Cc: Thomas Gleixner 
> Link: http://lkml.kernel.org/r/20170301153510.ge15...@kernel.org
> Signed-off-by: Arnaldo Carvalho de Melo 

Applied and queued up for -stable.


Re: [PATCH 2/2] net: Introduce sk_clone_lock() error path routine

2017-03-02 Thread David Miller
From: Arnaldo Carvalho de Melo 
Date: Wed,  1 Mar 2017 16:35:08 -0300

> From: Arnaldo Carvalho de Melo 
> 
> When handling problems in cloning a socket with the sk_clone_locked()
> function we need to perform several steps that were open coded in it and
> its callers, so introduce a routine to avoid this duplication:
> sk_free_unlock_clone().
> 
> Cc: Cong Wang 
> Cc: Dmitry Vyukov 
> Cc: Eric Dumazet 
> Cc: Gerrit Renker 
> Cc: Thomas Gleixner 
> Link: http://lkml.kernel.org/n/net-ui6laqkotycunhtmqryl9...@git.kernel.org
> Signed-off-by: Arnaldo Carvalho de Melo 

Applied.


Re: [PATCH net v1 0/3] amd-xgbe: AMD XGBE driver fixes 2017-02-28

2017-03-02 Thread David Miller
From: Tom Lendacky 
Date: Thu, 2 Mar 2017 15:32:29 -0600

> On 3/2/2017 3:02 PM, David Miller wrote:
>> From: Tom Lendacky 
>> Date: Tue, 28 Feb 2017 15:02:42 -0600
>>
>>> This patch series addresses some issues in the AMD XGBE driver.
>>>
>>> The following fixes are included in this driver update series:
>>>
>>> - Stop the PHY before disabling and releasing device interrupts so that
>>>   MDIO requests issued by the device can be properly handled
>>> - Set the MDIO communication mode on device startup, not just device
>>>   probe
>>> - Do not overwrite SFP settings when mod_absent is detected
>>>
>>> This patch series is based on net.
>>
>> Series applied, thanks.
> 
> Thanks David!
> 
> Could you queue these fixes up against 4.10 stable. Nothing early than
> that is needed.

Ok, done.


Re: [PATCH net v1 0/3] amd-xgbe: AMD XGBE driver fixes 2017-02-28

2017-03-02 Thread Tom Lendacky

On 3/2/2017 3:02 PM, David Miller wrote:

From: Tom Lendacky 
Date: Tue, 28 Feb 2017 15:02:42 -0600


This patch series addresses some issues in the AMD XGBE driver.

The following fixes are included in this driver update series:

- Stop the PHY before disabling and releasing device interrupts so that
  MDIO requests issued by the device can be properly handled
- Set the MDIO communication mode on device startup, not just device
  probe
- Do not overwrite SFP settings when mod_absent is detected

This patch series is based on net.


Series applied, thanks.


Thanks David!

Could you queue these fixes up against 4.10 stable. Nothing early than
that is needed.

Thanks,
Tom





Re: [PATCH 02/26] rewrite READ_ONCE/WRITE_ONCE

2017-03-02 Thread Arnd Bergmann
On Thu, Mar 2, 2017 at 8:00 PM, Christian Borntraeger
 wrote:
> On 03/02/2017 06:55 PM, Arnd Bergmann wrote:
>> On Thu, Mar 2, 2017 at 5:51 PM, Christian Borntraeger
>>  wrote:
>>> On 03/02/2017 05:38 PM, Arnd Bergmann wrote:

 This attempts a rewrite of the two macros, using a simpler implementation
 for the most common case of having a naturally aligned 1, 2, 4, or (on
 64-bit architectures) 8  byte object that can be accessed with a single
 instruction.  For these, we go back to a volatile pointer dereference
 that we had with the ACCESS_ONCE macro.
>>>
>>> We had changed that back then because gcc 4.6 and 4.7 had a bug that could
>>> removed the volatile statement on aggregate types like the following one
>>>
>>> union ipte_control {
>>> unsigned long val;
>>> struct {
>>> unsigned long k  : 1;
>>> unsigned long kh : 31;
>>> unsigned long kg : 32;
>>> };
>>> };
>>>
>>> See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145
>>>
>>> If I see that right, your __ALIGNED_WORD(x)
>>> macro would say that for above structure  sizeof(x) == sizeof(long)) is 
>>> true,
>>> so it would fall back to the old volatile cast and might reintroduce the
>>> old compiler bug?
>
> Oh dear, I should double check my sentences in emails before sending...anyway
> the full story is referenced in
>
> commit 60815cf2e05057db5b78e398d9734c493560b11e
> Merge tag 'for-linus' of 
> git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux
> which has a pointer to
> http://marc.info/?i=54611D86.4040306%40de.ibm.com
> which contains the full story.

Ok, got it. So I guess the behavior of forcing aligned accesses on aligned
data is accidental, and allowing non-power-of-two arguments is also not
the main purpose. Maybe we could just bail out on new compilers if we get
either of those? That might catch code that accidentally does something
that is inherently non-atomic or that causes a trap when the intention was
to have a simple atomic access.

 Arnd


Re: [net/bpf] 3051bf36c2 BUG: unable to handle kernel paging request at 0000a7cf

2017-03-02 Thread Daniel Borkmann

On 03/02/2017 09:23 PM, Fengguang Wu wrote:
[...]

I confirm that the below patch provided by Daniel fixes the above
issues on mainline kernel, too. Where should this patch be sent to?


If nobody objects, I could send it to -net tree via Dave due to being
BPF related, but I don't mind sending it elsewhere too (f.e. Linus
directly?) in order to stop your bot from continuing to send such mails.

The issue seems only related to i386 and doesn't trigger each time with
Fengguang's kernel config and qemu image when I try to reproduce it.
set_memory_ro()/set_memory_rw() on i386 seems to work in general, but
when it's used/reproduced, from time to time (perhaps some corner-case?)
it looks like that memory area can have issues much later on after being
fed back to the allocator which then causes a GPF from random locations.
Gut feeling, it might be an issue in set_memory_*() that my commit
uncovered. Still looking into it, but mean-time I could just send the
below, sure.

Thanks,
Daniel


It'd be very noisy if all these Oops hit the upcoming RC1 kernel.

Daniel thinks there may be deeper problem in i386 set_memory_rw().
However that could take much longer time to debug.

Thanks,
Fengguang
---

Re: [bpf] 9d876e79df:  BUG: unable to handle kernel paging request at 653a8346


On Tue, Feb 28, 2017 at 04:39:36PM +0100, Daniel Borkmann wrote:


I have a rough feeling what it is, but I didn't have cycles to work on
it yet (due to travel, sorry about that). The issue is likely shut down
by just doing:

---
arch/x86/Kconfig |2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

--- linux.orig/arch/x86/Kconfig2017-03-03 03:44:35.962022996 +0800
+++ linux/arch/x86/Kconfig2017-03-03 03:44:35.962022996 +0800
@@ -54,7 +54,7 @@ config X86
 select ARCH_HAS_KCOVif X86_64
 select ARCH_HAS_MMIO_FLUSH
 select ARCH_HAS_PMEM_APIif X86_64
-select ARCH_HAS_SET_MEMORY
+select ARCH_HAS_SET_MEMORYif X86_64
 select ARCH_HAS_SG_CHAIN
 select ARCH_HAS_STRICT_KERNEL_RWX
 select ARCH_HAS_STRICT_MODULE_RWX




Re: [PATCH] Net: openvswitch: actions: fixed a brace coding style warning

2017-03-02 Thread David Miller
From: Peter Downs 
Date: Wed,  1 Mar 2017 01:01:17 -0800

> Fixed a brace coding style warning reported by checkpatch.pl
> 
> Signed-off-by: Peter Downs 

Applied.


Re: [PATCH 0/2] pull request for net: batman-adv 2017-03-01

2017-03-02 Thread David Miller
From: Simon Wunderlich 
Date: Wed,  1 Mar 2017 16:53:31 +0100

> here are two bugfixes which we would like to see integrated into net.
> 
> Please pull or let me know of any problem!

Pulled, thanks Simon.


Re: [PATCH net] cxgb4: update latest firmware version supported

2017-03-02 Thread David Miller
From: Ganesh Goudar 
Date: Wed,  1 Mar 2017 11:18:53 +0530

> Change t4fw_version.h to update latest firmware version
> number to 1.16.33.0.
> 
> Signed-off-by: Ganesh Goudar 

Applied.


Re: [PATCH net v4 0/2] net: ethernet: bgmac: bug fixes

2017-03-02 Thread David Miller
From: David Miller 
Date: Thu, 02 Mar 2017 12:50:15 -0800 (PST)

> From: Jon Mason 
> Date: Tue, 28 Feb 2017 13:41:49 -0500
> 
>> Changes in v4:
>> * Added the udelays from the previous code (per David Miller)
>> 
>> Changes in v3:
>> * Reworked the init sequence patch to only remove the device reset if
>>   the device is actually in reset.  Given that this code doesn't bear
>>   much resemblance to the original code, I'm changing the author of the
>>   patch.  This was tested on NS2 SVK.
>> 
>> Changes in v2:
>> * Reworked the first match to make it more obvious what portions of the
>>   register were being preserved (Per Rafal Mileki)
>> * Style change to reorder the function variables in patch 2 (per Sergei
>>   Shtylyov)
>> 
>> Bug fixes for bgmac driver
> 
> Series applied.

Actually, this doesn't even compile.  Reverted...

[davem@kkuri net]$ make -s -j4
drivers/net/ethernet/broadcom/bgmac.c: In function ‘bgmac_set_mac_address’:
drivers/net/ethernet/broadcom/bgmac.c:1233:23: error: ‘struct bgmac’ has no 
member named ‘mac_addr’; did you mean ‘phyaddr’?
  ether_addr_copy(bgmac->mac_addr, sa->sa_data);
   ^~
drivers/net/ethernet/broadcom/bgmac.c:1234:38: error: ‘struct bgmac’ has no 
member named ‘mac_addr’; did you mean ‘phyaddr’?
  bgmac_write_mac_address(bgmac, bgmac->mac_addr);
  ^~


Re: [PATCH-v4-RESEND 1/4] vsock: track pkt owner vsock

2017-03-02 Thread David Miller
From: Peng Tao 
Date: Wed,  1 Mar 2017 11:56:24 +0800

> So that we can cancel a queued pkt later if necessary.
> 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Peng Tao 
> ---
>  include/linux/virtio_vsock.h| 2 ++
>  net/vmw_vsock/virtio_transport_common.c | 7 +++
>  2 files changed, 9 insertions(+)
> 
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index 9638bfe..193ad3a 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -48,6 +48,7 @@ struct virtio_vsock_pkt {
>   struct virtio_vsock_hdr hdr;
>   struct work_struct work;
>   struct list_head list;
> + void *cancel_token; /* only used for cancellation */

The type here is fixed, you only store vhost_sock object pointers
here, so don't use "void *" please.


Re: [PATCH net v1 0/3] amd-xgbe: AMD XGBE driver fixes 2017-02-28

2017-03-02 Thread David Miller
From: Tom Lendacky 
Date: Tue, 28 Feb 2017 15:02:42 -0600

> This patch series addresses some issues in the AMD XGBE driver.
> 
> The following fixes are included in this driver update series:
> 
> - Stop the PHY before disabling and releasing device interrupts so that
>   MDIO requests issued by the device can be properly handled
> - Set the MDIO communication mode on device startup, not just device
>   probe
> - Do not overwrite SFP settings when mod_absent is detected
> 
> This patch series is based on net.

Series applied, thanks.


Re: [PATCH] drivers: net: xgene: Fix crash on DT systems

2017-03-02 Thread David Miller
From: Alban Bedel 
Date: Tue, 28 Feb 2017 18:08:55 +0100

> On DT systems the driver require a clock, but the probe just print a
> warning and continue, leading to a crash when resetting the device.
> To fix this crash and properly handle probe deferals only ignore the
> missing clock if DT isn't used or if the clock doesn't exist.
> 
> Signed-off-by: Alban Bedel 

Applied, thanks.


Re: [PATCH net v4 0/2] net: ethernet: bgmac: bug fixes

2017-03-02 Thread David Miller
From: Jon Mason 
Date: Tue, 28 Feb 2017 13:41:49 -0500

> Changes in v4:
> * Added the udelays from the previous code (per David Miller)
> 
> Changes in v3:
> * Reworked the init sequence patch to only remove the device reset if
>   the device is actually in reset.  Given that this code doesn't bear
>   much resemblance to the original code, I'm changing the author of the
>   patch.  This was tested on NS2 SVK.
> 
> Changes in v2:
> * Reworked the first match to make it more obvious what portions of the
>   register were being preserved (Per Rafal Mileki)
> * Style change to reorder the function variables in patch 2 (per Sergei
>   Shtylyov)
> 
> Bug fixes for bgmac driver

Series applied.


Re: [Patch net v3] ipv6: check for ip6_null_entry in __ip6_del_rt_siblings()

2017-03-02 Thread David Miller
From: David Ahern 
Date: Wed, 1 Mar 2017 15:03:45 -0800

> On 2/27/17 4:07 PM, Cong Wang wrote:
>> Andrey reported a NULL pointer deref bug in ipv6_route_ioctl()
>> -> ip6_route_del() -> __ip6_del_rt_siblings() code path. This is
>> because ip6_null_entry is returned in this path since ip6_null_entry
>> is kinda default for a ipv6 route table root node. Quote from
>> David Ahern:
>> 
>>  ip6_null_entry is the root of all ipv6 fib tables making it integrated
>>  into the table ...
>> 
>> We should ignore any attempt of trying to delete it, like we do in
>> __ip6_del_rt() path and several others.
>> 
>> Reported-by: Andrey Konovalov 
>> Fixes: 0ae8133586ad ("net: ipv6: Allow shorthand delete of all nexthops in 
>> multipath route")
>> Cc: David Ahern 
>> Cc: Eric Dumazet 
>> Signed-off-by: Cong Wang 
>> ---
>>  net/ipv6/route.c | 14 +-
>>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> 
> Acked-by: David Ahern 

Applied, thanks.


Re: [PATCH 02/26] rewrite READ_ONCE/WRITE_ONCE

2017-03-02 Thread Christian Borntraeger
On 03/02/2017 05:38 PM, Arnd Bergmann wrote:
> When CONFIG_KASAN is enabled, the READ_ONCE/WRITE_ONCE macros cause
> rather large kernel stacks, e.g.:
> 
> mm/vmscan.c: In function 'shrink_page_list':
> mm/vmscan.c:1333:1: error: the frame size of 3456 bytes is larger than 3072 
> bytes [-Werror=frame-larger-than=]
> block/cfq-iosched.c: In function 'cfqg_stats_add_aux':
> block/cfq-iosched.c:750:1: error: the frame size of 4048 bytes is larger than 
> 3072 bytes [-Werror=frame-larger-than=]
> fs/btrfs/disk-io.c: In function 'open_ctree':
> fs/btrfs/disk-io.c:3314:1: error: the frame size of 3136 bytes is larger than 
> 3072 bytes [-Werror=frame-larger-than=]
> fs/btrfs/relocation.c: In function 'build_backref_tree':
> fs/btrfs/relocation.c:1193:1: error: the frame size of 4336 bytes is larger 
> than 3072 bytes [-Werror=frame-larger-than=]
> fs/fscache/stats.c: In function 'fscache_stats_show':
> fs/fscache/stats.c:287:1: error: the frame size of 6512 bytes is larger than 
> 3072 bytes [-Werror=frame-larger-than=]
> fs/jbd2/commit.c: In function 'jbd2_journal_commit_transaction':
> fs/jbd2/commit.c:1139:1: error: the frame size of 3760 bytes is larger than 
> 3072 bytes [-Werror=frame-larger-than=]
> 
> This attempts a rewrite of the two macros, using a simpler implementation
> for the most common case of having a naturally aligned 1, 2, 4, or (on
> 64-bit architectures) 8  byte object that can be accessed with a single
> instruction.  For these, we go back to a volatile pointer dereference
> that we had with the ACCESS_ONCE macro.

We had changed that back then because gcc 4.6 and 4.7 had a bug that could
removed the volatile statement on aggregate types like the following one

union ipte_control {
unsigned long val;
struct {
unsigned long k  : 1;
unsigned long kh : 31;
unsigned long kg : 32;
};
};

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145

If I see that right, your __ALIGNED_WORD(x)
macro would say that for above structure  sizeof(x) == sizeof(long)) is true,
so it would fall back to the old volatile cast and might reintroduce the 
old compiler bug?

Could you maybe you fence your simple macro for anything older than 4.9? After
all there was no kasan support anyway on these older gcc version.

Christian



[PATCH] fjes: Do not load fjes driver if system does not have extended socket device.

2017-03-02 Thread Yasuaki Ishimatsu

The fjes driver is used only by FUJITSU servers and almost of all
servers in the world never use it. But currently if ACPI PNP0C02
is defined in the ACPI table, the following message is always shown:

 "FUJITSU Extended Socket Network Device Driver - version 1.2
  - Copyright (c) 2015 FUJITSU LIMITED"

The message makes users confused because there is no reason that
the message is shown in other vendor servers.

To avoid the confusion, the patch adds a check that the server
has a extended socket device or not.

Signed-off-by: Yasuaki Ishimatsu 
CC: Taku Izumi 
---
 drivers/net/fjes/fjes_main.c | 52 +++-
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index b77e4ecf..fe58c01 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -45,6 +45,8 @@
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);

+#define ACPI_MOTHERBOARD_RESOURCE_HID "PNP0C02"
+
 static int fjes_request_irq(struct fjes_adapter *);
 static void fjes_free_irq(struct fjes_adapter *);

@@ -79,7 +81,7 @@
 static int fjes_poll(struct napi_struct *, int);

 static const struct acpi_device_id fjes_acpi_ids[] = {
-   {"PNP0C02", 0},
+   {ACPI_MOTHERBOARD_RESOURCE_HID, 0},
{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, fjes_acpi_ids);
@@ -116,18 +118,17 @@
},
 };

-static int fjes_acpi_add(struct acpi_device *device)
+static bool is_extended_socket_device(struct acpi_device *device)
 {
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
char str_buf[sizeof(FJES_ACPI_SYMBOL) + 1];
-   struct platform_device *plat_dev;
union acpi_object *str;
acpi_status status;
int result;

status = acpi_evaluate_object(device->handle, "_STR", NULL, );
if (ACPI_FAILURE(status))
-   return -ENODEV;
+   return false;

str = buffer.pointer;
result = utf16s_to_utf8s((wchar_t *)str->string.pointer,
@@ -137,10 +138,21 @@ static int fjes_acpi_add(struct acpi_device *device)

if (strncmp(FJES_ACPI_SYMBOL, str_buf, strlen(FJES_ACPI_SYMBOL)) != 0) {
kfree(buffer.pointer);
-   return -ENODEV;
+   return false;
}
kfree(buffer.pointer);

+   return true;
+}
+
+static int fjes_acpi_add(struct acpi_device *device)
+{
+   struct platform_device *plat_dev;
+   acpi_status status;
+
+   if (!is_extended_socket_device(device))
+   return -ENODEV;
+
status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
 fjes_get_acpi_resource, fjes_resource);
if (ACPI_FAILURE(status))
@@ -1476,10 +1488,40 @@ static void fjes_watch_unshare_task(struct work_struct 
*work)
}
 }

+static acpi_status
+acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
+void *context, void **return_value)
+{
+   struct acpi_device *device;
+   bool *found = context;
+   int result;
+
+   result = acpi_bus_get_device(obj_handle, );
+   if (result)
+   return AE_OK;
+
+   if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
+   return AE_OK;
+
+   if (!is_extended_socket_device(device))
+   return AE_OK;
+
+   *found = true;
+   return AE_CTRL_TERMINATE;
+}
+
 /* fjes_init_module - Driver Registration Routine */
 static int __init fjes_init_module(void)
 {
int result;
+   bool found = false;
+
+   acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+   acpi_find_extended_socket_device, NULL, ,
+   NULL);
+
+   if (!found)
+   return -ENODEV;

pr_info("%s - version %s - %s\n",
fjes_driver_string, fjes_driver_version, fjes_copyright);
--
1.8.3.1



[PATCH net 1/2] xen-netback: keep a local pointer for vif in backend_disconnect()

2017-03-02 Thread Paul Durrant
This patch replaces use of 'be->vif' with 'vif' and hence generally
makes the function look tidier. No semantic change.

Signed-off-by: Paul Durrant 
---
Cc: Wei Liu 
---
 drivers/net/xen-netback/xenbus.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index bb854f9..d82ddc9 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -492,24 +492,28 @@ static int backend_create_xenvif(struct backend_info *be)
 
 static void backend_disconnect(struct backend_info *be)
 {
-   if (be->vif) {
+   struct xenvif *vif = be->vif;
+
+   if (vif) {
unsigned int queue_index;
 
-   xen_unregister_watchers(be->vif);
+   xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
-   xenvif_debugfs_delif(be->vif);
+   xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
-   xenvif_disconnect_data(be->vif);
-   for (queue_index = 0; queue_index < be->vif->num_queues; 
++queue_index)
-   xenvif_deinit_queue(>vif->queues[queue_index]);
-
-   spin_lock(>vif->lock);
-   vfree(be->vif->queues);
-   be->vif->num_queues = 0;
-   be->vif->queues = NULL;
-   spin_unlock(>vif->lock);
-
-   xenvif_disconnect_ctrl(be->vif);
+   xenvif_disconnect_data(vif);
+   for (queue_index = 0;
+queue_index < vif->num_queues;
+++queue_index)
+   xenvif_deinit_queue(>queues[queue_index]);
+
+   spin_lock(>lock);
+   vfree(vif->queues);
+   vif->num_queues = 0;
+   vif->queues = NULL;
+   spin_unlock(>lock);
+
+   xenvif_disconnect_ctrl(vif);
}
 }
 
-- 
2.1.4



[Patch net] bonding: use ETH_MAX_MTU as max mtu

2017-03-02 Thread Cong Wang
This restores the ability of setting bond device's mtu to 9000.

Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
Reported-by: daz...@gmail.com
Reported-by: Brad Campbell 
Cc: Jarod Wilson 
Signed-off-by: Cong Wang 
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6321f12..8a4ba8b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4179,6 +4179,7 @@ void bond_setup(struct net_device *bond_dev)
 
/* Initialize the device entry points */
ether_setup(bond_dev);
+   bond_dev->max_mtu = ETH_MAX_MTU;
bond_dev->netdev_ops = _netdev_ops;
bond_dev->ethtool_ops = _ethtool_ops;
 
-- 
2.5.5



[PATCH 22/26] drm/i915/gvt: don't overflow the kernel stack with KASAN

2017-03-02 Thread Arnd Bergmann
Enabling CONFIG_KASAN can lead to an instant stack overflow:

drivers/gpu/drm/i915/gvt/handlers.c: In function 'init_generic_mmio_info':
drivers/gpu/drm/i915/gvt/handlers.c:2200:1: error: the frame size of 30464 
bytes is larger than 3072 bytes [-Werror=frame-larger-than=]
drivers/gpu/drm/i915/gvt/handlers.c: In function 'init_broadwell_mmio_info':
drivers/gpu/drm/i915/gvt/handlers.c:2402:1: error: the frame size of 5376 bytes 
is larger than 3072 bytes [-Werror=frame-larger-than=]
drivers/gpu/drm/i915/gvt/handlers.c: In function 'init_skl_mmio_info':
drivers/gpu/drm/i915/gvt/handlers.c:2628:1: error: the frame size of 5296 bytes 
is larger than 3072 bytes [-Werror=frame-larger-than=]

The reason is the INTEL_GVT_MMIO_OFFSET() hack that attempts to convert any type
(including i915_reg_t) into a u32 by reading the first four bytes, in 
combination
with the stack sanitizer that adds a redzone around each instance.

Originally, i915_reg_t was introduced to add a little extra type safety by
disallowing simple type casts, and INTEL_GVT_MMIO_OFFSET() goes the opposite
way by allowing any type as input, including those that are not safe in this
context.

I'm replacing it with an implementation that specifically allows the three
types that are actually used as input: 'i915_reg_t' (from _MMIO constants),
'int' (from other constants), and 'unsigned int' (from function arguments),
and any other type should now provoke a build error. This also solves the
stack overflow as we no longer use a local variable for each instance.

Signed-off-by: Arnd Bergmann 
---
 drivers/gpu/drm/i915/gvt/mmio.h | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index 3bc620f56f35..bf40100fc626 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -78,13 +78,20 @@ bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned 
long device);
 int intel_gvt_setup_mmio_info(struct intel_gvt *gvt);
 void intel_gvt_clean_mmio_info(struct intel_gvt *gvt);
 
+static inline u32 intel_gvt_mmio_offset(unsigned int offset)
+{
+   return offset;
+}
+
 struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
 unsigned int offset);
-#define INTEL_GVT_MMIO_OFFSET(reg) ({ \
-   typeof(reg) __reg = reg; \
-   u32 *offset = (u32 *)&__reg; \
-   *offset; \
-})
+#define INTEL_GVT_MMIO_OFFSET(reg) \
+__builtin_choose_expr(__builtin_types_compatible_p(typeof(reg), int), 
intel_gvt_mmio_offset, \
+__builtin_choose_expr(__builtin_types_compatible_p(typeof(reg), unsigned int), 
intel_gvt_mmio_offset, \
+__builtin_choose_expr(__builtin_types_compatible_p(typeof(reg), i915_reg_t), 
i915_mmio_reg_offset, \
+   (void)(0) \
+)))(reg)
+
 
 int intel_vgpu_init_mmio(struct intel_vgpu *vgpu);
 void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu);
-- 
2.9.0



Re: [net/bpf] 3051bf36c2 BUG: unable to handle kernel paging request at 0000a7cf

2017-03-02 Thread Fengguang Wu

On Wed, Mar 01, 2017 at 08:54:26PM +0800, Fengguang Wu wrote:

Hi all,

Is it BPF triggering BUGs all over the places?


It looks so, and here is a fix.


1e74a2eb1f  Merge tag 'gcc-plugins-v4.11-rc1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
005c3490e9  Revert "ath10k: Search SMBIOS for OEM board file extension"
3051bf36c2  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
+---++++
|   | 1e74a2eb1f | 
005c3490e9 | 3051bf36c2 |
+---++++
| boot_successes| 1223   | 1098 
  | 242|
| boot_failures | 1  | 126  
  | 72 |
| BUG:unable_to_handle_kernel   | 1  | 117  
  | 69 |
| Oops  | 1  | 126  
  | 72 |
| EIP:perf_callchain_user   | 1  |  
  ||
| Kernel_panic-not_syncing:Fatal_exception  | 1  | 121  
  | 67 |
| EIP:netlink_release   | 0  | 20   
  | 3  |
| EIP:bpf_prog_free | 0  | 22   
  | 3  |
| EIP:filp_close| 0  | 64   
  | 23 |
| EIP:netlink_update_listeners  | 0  | 10   
  | 9  |
| EIP:security_inode_getattr| 0  | 2
  ||
| EIP:__lock_acquire| 0  | 1
  | 11 |
| Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 0  | 5
  | 4  |
| EIP:__rcu_process_callbacks   | 0  | 2
  ||
| EIP:__fget_light  | 0  | 1
  ||
| EIP:__unix_remove_socket  | 0  | 0
  | 13 |
| INFO:trying_to_register_non-static_key| 0  | 0
  | 2  |
| EIP:mnt_want_write_file   | 0  | 0
  | 1  |
| EIP:skb_dequeue   | 0  | 0
  | 1  |
| EIP:strlen| 0  | 0
  | 1  |
| EIP:__netlink_lookup  | 0  | 0
  | 2  |
| EIP:vfs_fsync_range   | 0  | 0
  | 1  |
| EIP:__unix_find_socket_byname | 0  | 0
  | 1  |
| EIP:release_sock  | 0  | 0
  | 1  |
+---++++


I confirm that the below patch provided by Daniel fixes the above
issues on mainline kernel, too. Where should this patch be sent to?
It'd be very noisy if all these Oops hit the upcoming RC1 kernel.

Daniel thinks there may be deeper problem in i386 set_memory_rw().
However that could take much longer time to debug.

Thanks,
Fengguang
---

Re: [bpf] 9d876e79df:  BUG: unable to handle kernel paging request at 653a8346


On Tue, Feb 28, 2017 at 04:39:36PM +0100, Daniel Borkmann wrote:


I have a rough feeling what it is, but I didn't have cycles to work on
it yet (due to travel, sorry about that). The issue is likely shut down
by just doing:

---
arch/x86/Kconfig |2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

--- linux.orig/arch/x86/Kconfig 2017-03-03 03:44:35.962022996 +0800
+++ linux/arch/x86/Kconfig  2017-03-03 03:44:35.962022996 +0800
@@ -54,7 +54,7 @@ config X86
select ARCH_HAS_KCOVif X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_APIif X86_64
-   select ARCH_HAS_SET_MEMORY
+   select ARCH_HAS_SET_MEMORY  if X86_64
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX


Re: [PATCH] iproute2: show network device dependency tree

2017-03-02 Thread Zaboj Campula
On Wed, 2017-03-01 at 11:22 +0100, Jiri Benc wrote:
> On Tue, 28 Feb 2017 20:07:37 +, Zaboj Campula wrote:
> > Well it is impossible to draw a simple tree showing the configuration
> > exactly with all details. May be it is too ambitious to draw a tree
> > at all.
> 
> I tried that and failed. I didn't want to have something that would
> work only "somehow" as that would create confusion instead of helping.

OK, I give up. My patch was naive and I deleted it.
Nevertheless I still think it would be useful to show network
interfaces dependencies in a pure text format.


> Consider the very simple case of an interface with two vlan interfaces
> and both of them in a bridge.
> 
>  vlan0
>   /\
> eth0 br0
>   \/
>  vlan1
> 
> You can't represent this in a tree view. And this is just a very simple
> example, in reality it tends to be much more complex.

Perhaps something like that:
eth0
vlan0
br0
vlan1
br0


Re: Fw: [Bug 194749] New: kernel bonding does not work in a network nameservice in versions above 3.10.0-229.20.1

2017-03-02 Thread Cong Wang
On Thu, Mar 2, 2017 at 10:32 AM, Stephen Hemminger
 wrote:
>
>
> Begin forwarded message:
>
> Date: Wed, 01 Mar 2017 21:08:01 +
> From: bugzilla-dae...@bugzilla.kernel.org
> To: step...@networkplumber.org
> Subject: [Bug 194749] New: kernel bonding does not work in a network 
> nameservice in versions above 3.10.0-229.20.1
>
>
> https://bugzilla.kernel.org/show_bug.cgi?id=194749
>
> Bug ID: 194749
>Summary: kernel bonding does not work in a network nameservice
> in versions above 3.10.0-229.20.1
>Product: Networking
>Version: 2.5
> Kernel Version: > 3.10.0-229.20.1
>   Hardware: x86-64
> OS: Linux
>   Tree: Mainline
> Status: NEW
>   Severity: blocking
>   Priority: P1
>  Component: Other
>   Assignee: step...@networkplumber.org
>   Reporter: d...@polter.net
> Regression: No
>
> bond interface is being used in active/standby mode with two physical NICs
> inside a network nameservice to provide switchpath redundancy.
>
> netns is instantiated post-boot with the following:
>
> ip netns add vntp
> ip link set p4p1 netns vntp
> ip link set p4p2 netns vntp
> ip link set bond0 netns vntp
> ip netns exec vntp ip link set lo up
> ip netns exec vntp ip link set p4p1 up
> ip netns exec vntp ip link set p4p2 up
> ip netns exec vntp ip link set bond0 up
> ip netns exec vntp ifenslave bond0 p4p1 p4p2

This is due to the following commit:

commit f9399814927ad9bb995a6e109c2a5f9d8a848209
Author: Weilong Chen 
Date:   Wed Jan 22 17:16:30 2014 +0800

bonding: Don't allow bond devices to change network namespaces.

Like bridge, bonding as netdevice doesn't cross netns boundaries.

Bonding ports and bonding itself live in same netns.

Signed-off-by: Weilong Chen 
Signed-off-by: David S. Miller 


NETIF_F_NETNS_LOCAL was introduced for loopback device which
is created for each netns, it is not clear why we need to add it to bond
and bridge...


[PATCH 24/26] ocfs2: reduce stack size with KASAN

2017-03-02 Thread Arnd Bergmann
The internal logging infrastructure in ocfs2 causes special warning code to be
used with KASAN, which produces rather large stack frames:

fs/ocfs2/super.c: In function 'ocfs2_fill_super':
fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger than 
3072 bytes [-Werror=frame-larger-than=]

By simply passing the mask by value instead of reference, we can avoid the
problem completely. On 64-bit architectures, this is also more efficient,
while on the less common (at least among ocfs2 users) 32-bit architectures,
I'm guessing that the resulting code is comparable to what it was before.

The current version was introduced by Joe Perches as an optimization, maybe
he can see if my change regresses compared to his.

Cc: Joe Perches 
Fixes: 7c2bd2f930ae ("ocfs2: reduce object size of mlog uses")
Signed-off-by: Arnd Bergmann 
---
 fs/ocfs2/cluster/masklog.c | 10 +-
 fs/ocfs2/cluster/masklog.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index d331c2386b94..9720c5443e4d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -64,7 +64,7 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, 
size_t count)
return count;
 }
 
-void __mlog_printk(const u64 *mask, const char *func, int line,
+void __mlog_printk(const u64 mask, const char *func, int line,
   const char *fmt, ...)
 {
struct va_format vaf;
@@ -72,14 +72,14 @@ void __mlog_printk(const u64 *mask, const char *func, int 
line,
const char *level;
const char *prefix = "";
 
-   if (!__mlog_test_u64(*mask, mlog_and_bits) ||
-   __mlog_test_u64(*mask, mlog_not_bits))
+   if (!__mlog_test_u64(mask, mlog_and_bits) ||
+   __mlog_test_u64(mask, mlog_not_bits))
return;
 
-   if (*mask & ML_ERROR) {
+   if (mask & ML_ERROR) {
level = KERN_ERR;
prefix = "ERROR: ";
-   } else if (*mask & ML_NOTICE) {
+   } else if (mask & ML_NOTICE) {
level = KERN_NOTICE;
} else {
level = KERN_INFO;
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 308ea0eb35fd..0d0f4bf2c3d8 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -163,7 +163,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 #endif
 
 __printf(4, 5)
-void __mlog_printk(const u64 *m, const char *func, int line,
+void __mlog_printk(const u64 m, const char *func, int line,
   const char *fmt, ...);
 
 /*
@@ -174,7 +174,7 @@ void __mlog_printk(const u64 *m, const char *func, int line,
 do {   \
u64 _m = MLOG_MASK_PREFIX | (mask); \
if (_m & ML_ALLOWED_BITS)   \
-   __mlog_printk(&_m, __func__, __LINE__, fmt, \
+   __mlog_printk(_m, __func__, __LINE__, fmt,  \
  ##__VA_ARGS__);   \
 } while (0)
 
-- 
2.9.0



Re: net/sctp: use-after-free in sctp_association_put

2017-03-02 Thread Dmitry Vyukov
On Thu, Mar 2, 2017 at 9:06 AM, Xin Long  wrote:
> On Thu, Mar 2, 2017 at 3:18 AM, Dmitry Vyukov  wrote:
>> Hello,
>>
>> I've got the following report while running syzkaller fuzzer on
>> linux-next/8813198236a044b76e251dcae937b180dd527999:
>>
>> BUG: KASAN: use-after-free in sctp_association_destroy
>> net/sctp/associola.c:416 [inline] at addr 8801c0fa415c
>> BUG: KASAN: use-after-free in sctp_association_put+0x294/0x300
>> net/sctp/associola.c:881 at addr 8801c0fa415c
>> Read of size 1 by task syz-executor1/10956
>> CPU: 1 PID: 10956 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170213 #1
>> Hardware name: Google Google Compute Engine/Google Compute Engine,
>> BIOS Google 01/01/2011
>> Call Trace:
>>  
>>  __dump_stack lib/dump_stack.c:15 [inline]
>>  dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
>>  kasan_object_err+0x1c/0x70 mm/kasan/report.c:162
>>  print_address_description mm/kasan/report.c:200 [inline]
>>  kasan_report_error mm/kasan/report.c:289 [inline]
>>  kasan_report.part.2+0x1e5/0x4b0 mm/kasan/report.c:311
>>  kasan_report mm/kasan/report.c:329 [inline]
>>  __asan_report_load1_noabort+0x29/0x30 mm/kasan/report.c:329
>>  sctp_association_destroy net/sctp/associola.c:416 [inline]
>>  sctp_association_put+0x294/0x300 net/sctp/associola.c:881
>>  sctp_generate_timeout_event+0x115/0x360 net/sctp/sm_sideeffect.c:317
>>  sctp_generate_t1_init_event+0x1a/0x20 net/sctp/sm_sideeffect.c:329
>>  call_timer_fn+0x241/0x820 kernel/time/timer.c:1308
>>  expire_timers kernel/time/timer.c:1348 [inline]
>>  __run_timers+0x9e7/0xe90 kernel/time/timer.c:1642
>>  run_timer_softirq+0x21/0x80 kernel/time/timer.c:1655
>>  __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
>>  invoke_softirq kernel/softirq.c:364 [inline]
>>  irq_exit+0x1cc/0x200 kernel/softirq.c:405
>>  exiting_irq arch/x86/include/asm/apic.h:658 [inline]
>>  smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
>>  apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
>> RIP: 0010:arch_local_irq_enable arch/x86/include/asm/paravirt.h:788 [inline]
>> RIP: 0010:__raw_spin_unlock_irq include/linux/spinlock_api_smp.h:168 [inline]
>> RIP: 0010:_raw_spin_unlock_irq+0x56/0x70 kernel/locking/spinlock.c:199
>> RSP: 0018:8801c280f178 EFLAGS: 0286 ORIG_RAX: ff10
>> RAX: dc00 RBX: 8801dbf24a00 RCX: 0006
>> RDX: 10a18d03 RSI: 8801d71c88e0 RDI: 850c6818
>> RBP: 8801c280f180 R08: 0002 R09: 
>> R10: 0006 R11:  R12: 8801c0f3a4c0
>> R13: 110038501e38 R14: 8801d71c80c0 R15: 8801d71c80c0
>>  
>>  finish_lock_switch kernel/sched/sched.h:1248 [inline]
>>  finish_task_switch+0x1c2/0x720 kernel/sched/core.c:2792
>>  context_switch kernel/sched/core.c:2928 [inline]
>>  __schedule+0x893/0x2290 kernel/sched/core.c:3468
>>  preempt_schedule_common+0x35/0x60 kernel/sched/core.c:3579
>>  _cond_resched+0x17/0x20 kernel/sched/core.c:4977
>>  slab_pre_alloc_hook mm/slab.h:427 [inline]
>>  slab_alloc mm/slab.c:3390 [inline]
>>  __do_kmalloc mm/slab.c:3730 [inline]
>>  __kmalloc_track_caller+0x26a/0x690 mm/slab.c:3747
>>  kstrdup+0x39/0x70 mm/util.c:54
>>  snd_timer_instance_new+0xfc/0x5d0 sound/core/timer.c:110
>>  snd_timer_open+0x878/0x1740 sound/core/timer.c:290
>>  snd_timer_user_tselect sound/core/timer.c:1621 [inline]
>>  __snd_timer_user_ioctl sound/core/timer.c:1901 [inline]
>>  snd_timer_user_ioctl+0x9b1/0x34a0 sound/core/timer.c:1931
>>  vfs_ioctl fs/ioctl.c:43 [inline]
>>  do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683
>>  SYSC_ioctl fs/ioctl.c:698 [inline]
>>  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689
>>  entry_SYSCALL_64_fastpath+0x1f/0xc2
>> RIP: 0033:0x44fb59
>> RSP: 002b:7f0dc184db58 EFLAGS: 0212 ORIG_RAX: 0010
>> RAX: ffda RBX: 40345410 RCX: 0044fb59
>> RDX: 20001000 RSI: 40345410 RDI: 0005
>> RBP: 0005 R08:  R09: 
>> R10:  R11: 0212 R12: 00708000
>> R13: 00a5fc57 R14: 7f0dc184e9c0 R15: 
>> Object at 8801c0fa4140, in cache kmalloc-4096 size: 4096
>> Allocated:
>> PID = 10965
>>  save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
>>  save_stack+0x43/0xd0 mm/kasan/kasan.c:504
>>  set_track mm/kasan/kasan.c:516 [inline]
>>  kasan_kmalloc+0xaa/0xd0 mm/kasan/kasan.c:607
>>  kmem_cache_alloc_trace+0x10b/0x670 mm/slab.c:3634
>>  kmalloc include/linux/slab.h:490 [inline]
>>  kzalloc include/linux/slab.h:663 [inline]
>>  sctp_association_new+0x114/0x2120 net/sctp/associola.c:306
>>  sctp_sendmsg+0x1585/0x38f0 net/sctp/socket.c:1835
>>  inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
>>  sock_sendmsg_nosec net/socket.c:633 [inline]
>>  sock_sendmsg+0xca/0x110 net/socket.c:643
>>  ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1985
>>  __sys_sendmsg+0x138/0x300 net/socket.c:2019
>>  SYSC_sendmsg 

Re: [PATCH 7/7] net: stmmac: dwc-qos: Add Tegra186 support

2017-03-02 Thread Joao Pinto
Às 5:24 PM de 2/23/2017, Thierry Reding escreveu:
> From: Thierry Reding 
> 
> The NVIDIA Tegra186 SoC contains an instance of the Synopsys DWC
> ethernet QOS IP core. The binding that it uses is slightly different
> from existing ones because of the integration (clocks, resets, ...).
> 
> Signed-off-by: Thierry Reding 
> ---
>  .../ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c| 252 
> +
>  1 file changed, 252 insertions(+)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c 
> b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
> index 5071d3c15adc..54dfbdc48f6d 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
> @@ -14,6 +14,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -22,10 +23,24 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "stmmac_platform.h"
>  
> +struct tegra_eqos {
> + struct device *dev;
> + void __iomem *regs;
> +
> + struct reset_control *rst;
> + struct clk *clk_master;
> + struct clk *clk_slave;
> + struct clk *clk_tx;
> + struct clk *clk_rx;
> +
> + struct gpio_desc *reset;
> +};
> +
>  static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
>  struct plat_stmmacenet_data *plat_dat)
>  {
> @@ -148,6 +163,237 @@ static int dwc_qos_remove(struct platform_device *pdev)
>   return 0;
>  }
>  
> +#define SDMEMCOMPPADCTRL 0x8800
> +#define  SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD BIT(31)
> +
> +#define AUTO_CAL_CONFIG 0x8804
> +#define  AUTO_CAL_CONFIG_START BIT(31)
> +#define  AUTO_CAL_CONFIG_ENABLE BIT(29)
> +
> +#define AUTO_CAL_STATUS 0x880c
> +#define  AUTO_CAL_STATUS_ACTIVE BIT(31)
> +
> +static void tegra_eqos_fix_speed(void *priv, unsigned int speed)
> +{
> + struct tegra_eqos *eqos = priv;
> + unsigned long rate = 12500;
> + bool needs_calibration = false;
> + unsigned int i;
> + u32 value;
> +
> + switch (speed) {
> + case SPEED_1000:
> + needs_calibration = true;
> + rate = 12500;
> + break;
> +
> + case SPEED_100:
> + needs_calibration = true;
> + rate = 2500;
> + break;
> +
> + case SPEED_10:
> + rate = 250;
> + break;
> +
> + default:
> + dev_err(eqos->dev, "invalid speed %u\n", speed);
> + break;
> + }
> +
> + if (needs_calibration) {
> + /* calibrate */
> + value = readl(eqos->regs + SDMEMCOMPPADCTRL);
> + value |= SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD;
> + writel(value, eqos->regs + SDMEMCOMPPADCTRL);
> +
> + udelay(1);
> +
> + value = readl(eqos->regs + AUTO_CAL_CONFIG);
> + value |= AUTO_CAL_CONFIG_START | AUTO_CAL_CONFIG_ENABLE;
> + writel(value, eqos->regs + AUTO_CAL_CONFIG);
> +
> + for (i = 0; i <= 10; i++) {
> + value = readl(eqos->regs + AUTO_CAL_STATUS);
> + if (value & AUTO_CAL_STATUS_ACTIVE)
> + break;
> +
> + udelay(1);
> + }
> +
> + if ((value & AUTO_CAL_STATUS_ACTIVE) == 0) {
> + dev_err(eqos->dev, "calibration did not start\n");
> + goto failed;
> + }
> +
> + for (i = 0; i <= 10; i++) {
> + value = readl(eqos->regs + AUTO_CAL_STATUS);
> + if ((value & AUTO_CAL_STATUS_ACTIVE) == 0)
> + break;
> +
> + udelay(20);
> + }
> +
> + if (value & AUTO_CAL_STATUS_ACTIVE) {
> + dev_err(eqos->dev, "calibration didn't finish\n");
> + goto failed;
> + }
> +
> + failed:
> + value = readl(eqos->regs + SDMEMCOMPPADCTRL);
> + value &= ~SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD;
> + writel(value, eqos->regs + SDMEMCOMPPADCTRL);
> + } else {
> + value = readl(eqos->regs + AUTO_CAL_CONFIG);
> + value &= ~AUTO_CAL_CONFIG_ENABLE;
> + writel(value, eqos->regs + AUTO_CAL_CONFIG);
> + }
> +
> + clk_set_rate(eqos->clk_tx, rate);
> +}
> +
> +static int tegra_eqos_init(struct platform_device *pdev, void *priv)
> +{
> + struct tegra_eqos *eqos = priv;
> + unsigned long rate;
> + u32 value;
> +
> + rate = clk_get_rate(eqos->clk_slave);
> +
> + value = readl(eqos->regs + 0xdc);
> + value = (rate / 100) - 1;
> + writel(value, eqos->regs + 0xdc);
> +
> + return 0;
> +}
> +
> +static void *tegra_eqos_probe(struct platform_device *pdev,
> +   struct plat_stmmacenet_data *data,
> +   

Re: [PATCH RFC v2 00/12] socket sendmsg MSG_ZEROCOPY

2017-03-02 Thread Andy Lutomirski
On Tue, Feb 28, 2017 at 7:28 PM, David Miller  wrote:
> From: Andy Lutomirski 
> Date: Tue, 28 Feb 2017 13:06:49 -0800
>
>> On Tue, Feb 28, 2017 at 12:43 PM, Willem de Bruijn
>>  wrote:
>>> On Tue, Feb 28, 2017 at 2:46 PM, Andy Lutomirski  
>>> wrote:
 On Mon, Feb 27, 2017 at 10:57 AM, Michael Kerrisk
  wrote:
> [CC += linux-...@vger.kernel.org]
>
> Hi Willem
>

>> On a send call with MSG_ZEROCOPY, the kernel pins the user pages and
>> creates skbuff fragments directly from these pages. On tx completion,
>> it notifies the socket owner that it is safe to modify memory by
>> queuing a completion notification onto the socket error queue.

 What happens if the user writes to the pages while it's not safe?

 How about if you're writing to an interface or a route that has crypto
 involved and a malicious user can make the data change in the middle
 of a crypto operation, thus perhaps leaking the entire key?  (I
 wouldn't be at all surprised if a lot of provably secure AEAD
 constructions are entirely compromised if an attacker can get the
 ciphertext and tag computed from a message that changed during the
 computation.
>>>
>>> Operations that read or write payload, such as this crypto example,
>>> but also ebpf in tc or iptables, for instance, demand a deep copy using
>>> skb_copy_ubufs before the operation.
>>>
>>> This blacklist approach requires caution, but these paths should be
>>> few and countable. It is not possible to predict at the socket layer
>>> whether a packet will encounter any such operation, so white-listing
>>> a subset of end-to-end paths is not practical.
>>
>> How about hardware that malfunctions if the packet changes out from
>> under it?  A whitelist seems quite a bit safer.
>
> These device are already choking, because as I stated this can already
> be done via sendfile().
>
> Networking card wise this isn't an issue, chips bring the entire packet
> into their FIFO, compute checksums on the fly mid-stream, and then write
> the 16-bit checksum field before starting to write the packet onto the
> wire.
>
> I think this is completely a non-issue, and we thought about this right
> from the start when sendfile() support was added nearly two decades ago.
> If network cards from back then didn't crap out in this situation I
> think the ones out there now are probably ok.

Fair enough.


Re: commit a52ad514fdf3b8a57ca4322c92d2d8d5c6182485 net: deprecate eth_change_mtu, remove usage breaks bonding on my machine

2017-03-02 Thread Cong Wang
On Wed, Mar 1, 2017 at 8:11 AM, Brad Campbell  wrote:
> G'day Jarod,
>
> I have a pair of machines that are linked by a pair of quad port e1000 cards
> with all 4 ports bonded. The network is configured with an mtu of 9000.
>
> Kernel 4.10 fails to bring these interfaces up as it fails when trying to
> set the mtu on the bond interface higher than 1500. A bisect between 4.9 &
> 4.10 winds up identifying this commit as where it all goes wrong. If I
> modify the network config to not touch the mtu (ie leave it at 1500) then it
> comes up ok.
>
> I can individually configure each port with an mtu of 9000, so the e1000
> driver is ok, but there appears to be breakage in the bonding driver related
> to your mtu api changes.
>
> I've just reverted to an older kernel, so it's no biggie. And as it's still
> a problem in the latest git head I assume nobody else has encountered it. I
> thought it worth reporting in case it triggers a quick lightbulb.

I think we need the following patch:

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6321f12..de47006 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4179,6 +4179,7 @@ void bond_setup(struct net_device *bond_dev)

/* Initialize the device entry points */
ether_setup(bond_dev);
+   dev->max_mtu = ETH_MAX_MTU;
bond_dev->netdev_ops = _netdev_ops;
bond_dev->ethtool_ops = _ethtool_ops;


[PATCH 23/26] mtd: cfi: reduce stack size with KASAN

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is used, we consume a lot of extra stack space:

drivers/mtd/chips/cfi_cmdset_0020.c: In function 'do_write_buffer':
drivers/mtd/chips/cfi_cmdset_0020.c:603:1: error: the frame size of 2080 bytes 
is larger than 1536 bytes [-Werror=frame-larger-than=]
drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_erase_varsize':
drivers/mtd/chips/cfi_cmdset_0020.c:972:1: error: the frame size of 1936 bytes 
is larger than 1536 bytes [-Werror=frame-larger-than=]
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
drivers/mtd/chips/cfi_cmdset_0001.c:1841:1: error: the frame size of 1776 bytes 
is larger than 1536 bytes [-Werror=frame-larger-than=]

This marks some functions as noinline_for_kasan to keep reduce the
overall stack size.

Signed-off-by: Arnd Bergmann 
---
 drivers/mtd/chips/cfi_cmdset_0020.c | 8 
 include/linux/mtd/map.h | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c 
b/drivers/mtd/chips/cfi_cmdset_0020.c
index 94d3eb42c4d5..8a21e030829c 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -244,7 +244,7 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map)
 }
 
 
-static inline int do_read_onechip(struct map_info *map, struct flchip *chip, 
loff_t adr, size_t len, u_char *buf)
+static noinline_for_kasan int do_read_onechip(struct map_info *map, struct 
flchip *chip, loff_t adr, size_t len, u_char *buf)
 {
map_word status, status_OK;
unsigned long timeo;
@@ -728,7 +728,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec 
*vecs,
 }
 
 
-static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, 
unsigned long adr)
+static noinline_for_kasan int do_erase_oneblock(struct map_info *map, struct 
flchip *chip, unsigned long adr)
 {
struct cfi_private *cfi = map->fldrv_priv;
map_word status, status_OK;
@@ -1029,7 +1029,7 @@ static void cfi_staa_sync (struct mtd_info *mtd)
}
 }
 
-static inline int do_lock_oneblock(struct map_info *map, struct flchip *chip, 
unsigned long adr)
+static noinline_for_kasan int do_lock_oneblock(struct map_info *map, struct 
flchip *chip, unsigned long adr)
 {
struct cfi_private *cfi = map->fldrv_priv;
map_word status, status_OK;
@@ -1175,7 +1175,7 @@ static int cfi_staa_lock(struct mtd_info *mtd, loff_t 
ofs, uint64_t len)
}
return 0;
 }
-static inline int do_unlock_oneblock(struct map_info *map, struct flchip 
*chip, unsigned long adr)
+static noinline_for_kasan int do_unlock_oneblock(struct map_info *map, struct 
flchip *chip, unsigned long adr)
 {
struct cfi_private *cfi = map->fldrv_priv;
map_word status, status_OK;
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 3aa56e3104bb..8c2e241f45c7 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -316,7 +316,7 @@ static inline map_word map_word_or(struct map_info *map, 
map_word val1, map_word
return r;
 }
 
-static inline int map_word_andequal(struct map_info *map, map_word val1, 
map_word val2, map_word val3)
+static noinline_for_kasan int map_word_andequal(struct map_info *map, map_word 
val1, map_word val2, map_word val3)
 {
int i;
 
@@ -328,7 +328,7 @@ static inline int map_word_andequal(struct map_info *map, 
map_word val1, map_wor
return 1;
 }
 
-static inline int map_word_bitsset(struct map_info *map, map_word val1, 
map_word val2)
+static noinline_for_kasan int map_word_bitsset(struct map_info *map, map_word 
val1, map_word val2)
 {
int i;
 
@@ -362,7 +362,7 @@ static inline map_word map_word_load(struct map_info *map, 
const void *ptr)
return r;
 }
 
-static inline map_word map_word_load_partial(struct map_info *map, map_word 
orig, const unsigned char *buf, int start, int len)
+static noinline_for_kasan map_word map_word_load_partial(struct map_info *map, 
map_word orig, const unsigned char *buf, int start, int len)
 {
int i;
 
@@ -392,7 +392,7 @@ static inline map_word map_word_load_partial(struct 
map_info *map, map_word orig
 #define MAP_FF_LIMIT 8
 #endif
 
-static inline map_word map_word_ff(struct map_info *map)
+static noinline_for_kasan map_word map_word_ff(struct map_info *map)
 {
map_word r;
int i;
-- 
2.9.0



Re: Removing GENL_ID_GENERATE breaks userspace API

2017-03-02 Thread Johannes Berg
On Thu, 2017-03-02 at 17:50 +0100, Marcel Holtmann wrote:
> Hi Johannes,
> 
> you have removed GENL_ID_GENERATE in 4.10, but that is actually
> breaking userspace API.
> 
> commit a07ea4d9941af5a0c6f0be2a71b51ac9c083c5e5
> Author: Johannes Berg 
> Date:   Mon Oct 24 14:40:02 2016 +0200
> 
> genetlink: no longer support using static family IDs
> 
> diff --git a/include/uapi/linux/genetlink.h
> b/include/uapi/linux/genetlink.h
> index 5512c90af7e3..d9b2db4a29c6 100644
> --- a/include/uapi/linux/genetlink.h
> +++ b/include/uapi/linux/genetlink.h
> @@ -26,7 +26,6 @@ struct genlmsghdr {
>  /*
>   * List of reserved static generic netlink identifiers:
>   */
> -#define GENL_ID_GENERATE   0
>  #define GENL_ID_CTRL   NLMSG_MIN_TYPE
> 
> Since the GENL_ID_GENERATE is in include/uapi/ I would have expected
> that you leave this in. For us, you just broke userspace API with
> this change.

Huh. It makes no sense to be using it nor ever did, but I guess we can
add it back with a comment saying that it's actually needed - send a
patch?

johannes


Fw: [Bug 194763] New: bond0 fails to accept MTU change.

2017-03-02 Thread Stephen Hemminger


Begin forwarded message:

Date: Thu, 02 Mar 2017 11:22:24 +
From: bugzilla-dae...@bugzilla.kernel.org
To: step...@networkplumber.org
Subject: [Bug 194763] New: bond0 fails to accept MTU change.


https://bugzilla.kernel.org/show_bug.cgi?id=194763

Bug ID: 194763
   Summary: bond0 fails to accept MTU change.
   Product: Networking
   Version: 2.5
Kernel Version: 4.10.1
  Hardware: Intel
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: Other
  Assignee: step...@networkplumber.org
  Reporter: daz...@gmail.com
Regression: No

Bond0 interface doesn't accept MTU higher than 1500.I can set MTU perfectly
fine for the slave devices. Recreating the bond0 didn't help to up the MTU. 



05:00.0 Ethernet controller: Intel Corporation 82599ES 10-Gigabit SFI/SFP+
Network Connection (rev 01)
05:00.1 Ethernet controller: Intel Corporation 82599ES 10-Gigabit SFI/SFP+
Network Connection (rev 01)

[ 1626.840818] bond0: Invalid MTU 9000 requested, hw max 1500
[ 1668.913497] ixgbe :05:00.0 enp5s0f0: changing MTU from 1500 to 9000
[ 1669.176015] ixgbe :05:00.0 enp5s0f0: speed changed to 0 for port
enp5s0f0
[ 1669.237523] ixgbe :05:00.0 enp5s0f0: detected SFP+: 5
[ 1669.487920] ixgbe :05:00.0 enp5s0f0: NIC Link is Up 10 Gbps, Flow
Control: RX/TX
[ 1675.591234] ixgbe :05:00.1 enp5s0f1: changing MTU from 1500 to 9000
[ 1675.855232] ixgbe :05:00.1 enp5s0f1: speed changed to 0 for port
enp5s0f1
[ 1675.911259] ixgbe :05:00.1 enp5s0f1: detected SFP+: 6
[ 1676.159087] ixgbe :05:00.1 enp5s0f1: NIC Link is Up 10 Gbps, Flow
Control: RX/TX
[ 1684.745389] bond0: Invalid MTU 9000 requested, hw max 1500




Bonding Mode: IEEE 802.3ad Dynamic link aggregation
Transmit Hash Policy: layer3+4 (1)
MII Status: up
MII Polling Interval (ms): 0
Up Delay (ms): 0
Down Delay (ms): 0

802.3ad info
LACP rate: slow
Min links: 0
Aggregator selection policy (ad_select): stable
System priority: 65535
System MAC address: 90:e2:ba:15:68:20
Active Aggregator Info:
Aggregator ID: 2
Number of ports: 2
Actor Key: 13
Partner Key: 221
Partner Mac Address: 02:1c:73:b2:9a:29

Slave Interface: enp5s0f0
MII Status: up
Speed: 1 Mbps
Duplex: full
Link Failure Count: 0
Permanent HW addr: 90:e2:ba:15:68:20
Slave queue ID: 0
Aggregator ID: 2
Actor Churn State: none
Partner Churn State: none
Actor Churned Count: 0
Partner Churned Count: 0
details actor lacp pdu:
system priority: 65535
system mac address: 90:e2:ba:15:68:20
port key: 13
port priority: 255
port number: 1
port state: 61
details partner lacp pdu:
system priority: 32768
system mac address: 02:1c:73:b2:9a:29
oper key: 221
port priority: 16000
port number: 373
port state: 61

Slave Interface: enp5s0f1
MII Status: up
Speed: 1 Mbps
Duplex: full
Link Failure Count: 0
Permanent HW addr: 90:e2:ba:15:68:21
Slave queue ID: 0
Aggregator ID: 2
Actor Churn State: none
Partner Churn State: none
Actor Churned Count: 0
Partner Churned Count: 0
details actor lacp pdu:
system priority: 65535
system mac address: 90:e2:ba:15:68:20
port key: 13
port priority: 255
port number: 2
port state: 61
details partner lacp pdu:
system priority: 32768
system mac address: 02:1c:73:b2:9a:29
oper key: 221
port priority: 17000
port number: 33141
port state: 61

Settings for enp5s0f0:
Supported ports: [ FIBRE ]
Supported link modes:   1000baseT/Full 
1baseT/Full 
Supported pause frame use: Symmetric
Supports auto-negotiation: Yes
Advertised link modes:  1000baseT/Full 
1baseT/Full 
Advertised pause frame use: Symmetric
Advertised auto-negotiation: Yes
Speed: 1Mb/s
Duplex: Full
Port: FIBRE
PHYAD: 0
Transceiver: external
Auto-negotiation: on
Supports Wake-on: d
Wake-on: d
Current message level: 0x0007 (7)
   drv probe link
Link detected: yes

Settings for enp5s0f1:
Supported ports: [ FIBRE ]
Supported link modes:   1000baseT/Full 
1baseT/Full 
Supported pause frame use: Symmetric
Supports auto-negotiation: Yes
Advertised link modes:  1000baseT/Full 
1baseT/Full 
Advertised pause frame use: Symmetric
Advertised auto-negotiation: Yes
Speed: 1Mb/s
Duplex: Full
Port: FIBRE
PHYAD: 0
Transceiver: external
Auto-negotiation: on
Supports Wake-on: d
Wake-on: d
Current message level: 0x0007 (7)
   drv probe link
Link detected: yes

-- 
You 

Fw: [Bug 194749] New: kernel bonding does not work in a network nameservice in versions above 3.10.0-229.20.1

2017-03-02 Thread Stephen Hemminger


Begin forwarded message:

Date: Wed, 01 Mar 2017 21:08:01 +
From: bugzilla-dae...@bugzilla.kernel.org
To: step...@networkplumber.org
Subject: [Bug 194749] New: kernel bonding does not work in a network 
nameservice in versions above 3.10.0-229.20.1


https://bugzilla.kernel.org/show_bug.cgi?id=194749

Bug ID: 194749
   Summary: kernel bonding does not work in a network nameservice
in versions above 3.10.0-229.20.1
   Product: Networking
   Version: 2.5
Kernel Version: > 3.10.0-229.20.1
  Hardware: x86-64
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: blocking
  Priority: P1
 Component: Other
  Assignee: step...@networkplumber.org
  Reporter: d...@polter.net
Regression: No

bond interface is being used in active/standby mode with two physical NICs
inside a network nameservice to provide switchpath redundancy.

netns is instantiated post-boot with the following:

ip netns add vntp
ip link set p4p1 netns vntp
ip link set p4p2 netns vntp
ip link set bond0 netns vntp
ip netns exec vntp ip link set lo up
ip netns exec vntp ip link set p4p1 up
ip netns exec vntp ip link set p4p2 up
ip netns exec vntp ip link set bond0 up
ip netns exec vntp ifenslave bond0 p4p1 p4p2

This works as one would expect in kernel versions up to 3.10.0-229.20.1 (CentOS
7 packages). At the next patchlevel and all subsequent versions released by the
packager, the following appears in system logs and the bond0 instantiation
fails:

Mar  1 19:33:42 fed1ntpi01 rc.local: Cannot find device "bond0"
Mar  1 19:33:42 fed1ntpi01 rc.local: Master 'bond0': Error: handshake with
driver failed. Aborting
Mar  1 19:33:42 fed1ntpi01 rc.local: Cannot find device "bond0"
Mar  1 19:33:42 fed1ntpi01 rc.local: Cannot find device "bond0"

-- 
You are receiving this mail because:
You are the assignee for the bug.


[PATCH net 2/2] xen-netback: don't vfree() queues under spinlock

2017-03-02 Thread Paul Durrant
This leads to a BUG of the following form:

[  174.512861] switch: port 2(vif3.0) entered disabled state
[  174.522735] BUG: sleeping function called from invalid context at
/home/build/linux-linus/mm/vmalloc.c:1441
[  174.523451] in_atomic(): 1, irqs_disabled(): 0, pid: 28, name: xenwatch
[  174.524131] CPU: 1 PID: 28 Comm: xenwatch Tainted: GW
4.10.0upstream-11073-g4977ab6-dirty #1
[  174.524819] Hardware name: MSI MS-7680/H61M-P23 (MS-7680), BIOS V17.0
03/14/2011
[  174.525517] Call Trace:
[  174.526217]  show_stack+0x23/0x60
[  174.526899]  dump_stack+0x5b/0x88
[  174.527562]  ___might_sleep+0xde/0x130
[  174.528208]  __might_sleep+0x35/0xa0
[  174.528840]  ? _raw_spin_unlock_irqrestore+0x13/0x20
[  174.529463]  ? __wake_up+0x40/0x50
[  174.530089]  remove_vm_area+0x20/0x90
[  174.530724]  __vunmap+0x1d/0xc0
[  174.531346]  ? delete_object_full+0x13/0x20
[  174.531973]  vfree+0x40/0x80
[  174.532594]  set_backend_state+0x18a/0xa90
[  174.533221]  ? dwc_scan_descriptors+0x24d/0x430
[  174.533850]  ? kfree+0x5b/0xc0
[  174.534476]  ? xenbus_read+0x3d/0x50
[  174.535101]  ? xenbus_read+0x3d/0x50
[  174.535718]  ? xenbus_gather+0x31/0x90
[  174.536332]  ? ___might_sleep+0xf6/0x130
[  174.536945]  frontend_changed+0x6b/0xd0
[  174.537565]  xenbus_otherend_changed+0x7d/0x80
[  174.538185]  frontend_changed+0x12/0x20
[  174.538803]  xenwatch_thread+0x74/0x110
[  174.539417]  ? woken_wake_function+0x20/0x20
[  174.540049]  kthread+0xe5/0x120
[  174.540663]  ? xenbus_printf+0x50/0x50
[  174.541278]  ? __kthread_init_worker+0x40/0x40
[  174.541898]  ret_from_fork+0x21/0x2c
[  174.548635] switch: port 2(vif3.0) entered disabled state

This patch defers the vfree() until after the spinlock is released.

Reported-by: Juergen Gross 
Signed-off-by: Paul Durrant 
---
Cc: Juergen Gross 
Cc: Wei Liu 
---
 drivers/net/xen-netback/xenbus.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d82ddc9..d2d7cd9 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -496,6 +496,7 @@ static void backend_disconnect(struct backend_info *be)
 
if (vif) {
unsigned int queue_index;
+   struct xenvif_queue *queues;
 
xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
@@ -508,11 +509,13 @@ static void backend_disconnect(struct backend_info *be)
xenvif_deinit_queue(>queues[queue_index]);
 
spin_lock(>lock);
-   vfree(vif->queues);
+   queues = vif->queues;
vif->num_queues = 0;
vif->queues = NULL;
spin_unlock(>lock);
 
+   vfree(queues);
+
xenvif_disconnect_ctrl(vif);
}
 }
-- 
2.1.4



[PATCH net 0/2] xen-netback: update memory leak fix to avoid BUG

2017-03-02 Thread Paul Durrant
Commit 9a6cdf52b85e "xen-netback: fix memory leaks on XenBus disconnect"
added missing code to fix a memory leak by calling vfree() in the
appropriate place.
Unfortunately subsequent commit f16f1df65f1c "xen-netback: protect
resource cleaning on XenBus disconnect" then wrapped this call to vfree()
in a spin lock, leading to a BUG due to incorrect context.

Patch #1 makes the existing code more readable
Patch #2 fixes the problem

Paul Durrant (2):
  xen-netback: keep a local pointer for vif in backend_disconnect()
  xen-netback: don't vfree() queues under spinlock

 drivers/net/xen-netback/xenbus.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

-- 
2.1.4



Fw: [Bug 194723] connect() to localhost stalls after 4.9 -> 4.10 upgrade

2017-03-02 Thread Stephen Hemminger


Begin forwarded message:

Date: Wed, 01 Mar 2017 12:04:45 +
From: bugzilla-dae...@bugzilla.kernel.org
To: step...@networkplumber.org
Subject: [Bug 194723] connect() to localhost stalls after 4.9 -> 4.10 upgrade


https://bugzilla.kernel.org/show_bug.cgi?id=194723

--- Comment #2 from Lutz Vieweg (l...@5t9.de) ---
Using tcpdump I found that when the connect() stalls, the initial SYN packet
appears at the "lo" interface, and is re-sent multiple times, but no ACK packet
is ever returned.

Error case with linux-4.10:

> 12:57:25.685640 IP 127.0.0.1.44074 > 127.0.0.1.dnp-sec: Flags [S], seq
> 1952288470, win 43690, options [mss 65495,sackOK,TS val 1942998659 ecr
> 0,nop,wscale 7], length 0
> 12:57:26.728890 IP 127.0.0.1.44074 > 127.0.0.1.dnp-sec: Flags [S], seq
> 1952288470, win 43690, options [mss 65495,sackOK,TS val 1942999703 ecr
> 0,nop,wscale 7], length 0
> 12:57:28.776935 IP 127.0.0.1.44074 > 127.0.0.1.dnp-sec: Flags [S], seq
> 1952288470, win 43690, options [mss 65495,sackOK,TS val 1943001751 ecr
> 0,nop,wscale 7], length 0  
...

Normal case:

> 13:01:43.037135 IP 127.0.0.1.44362 > 127.0.0.1.dnp-sec: Flags [S], seq
> 3181010757, win 43690, options [mss 65495,sackOK,TS val 3314900273 ecr
> 0,nop,wscale 7], length 0
> 13:01:43.037171 IP 127.0.0.1.dnp-sec > 127.0.0.1.44362: Flags [S.], seq
> 1934682061, ack 3181010758, win 43690, options [mss 65495,sackOK,TS val
> 2947413993 ecr 3314900273,nop,wscale 7], length 0
> 13:01:43.037196 IP 127.0.0.1.44362 > 127.0.0.1.dnp-sec: Flags [.], ack 1, win
> 342, options [nop,nop,TS val 3314900273 ecr 2947413993], length 0  


According to strace, the listening process does not even leave the select()
call it uses to wait for incoming connections to accept in the error case.

-- 
You are receiving this mail because:
You are the assignee for the bug.


[PATCH 20/26] [media] em28xx: split up em28xx_dvb_init to reduce stack size

2017-03-02 Thread Arnd Bergmann
With CONFIG_KASAN, the init function uses a large amount of kernel stack:

drivers/media/usb/em28xx/em28xx-dvb.c: In function 'em28xx_dvb_init':
drivers/media/usb/em28xx/em28xx-dvb.c:2069:1: error: the frame size of 4280 
bytes is larger than 3072 bytes [-Werror=frame-larger-than=]

By splitting out each part of the switch/case statement that has its own local
variables into a separate function, no single one of them uses more than 500 
bytes,
and with a noinline_for_kasan annotation we can ensure that they are not merged
back together.

Signed-off-by: Arnd Bergmann 
---
 drivers/media/usb/em28xx/em28xx-dvb.c | 947 ++
 1 file changed, 508 insertions(+), 439 deletions(-)

diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c 
b/drivers/media/usb/em28xx/em28xx-dvb.c
index 82edd37f0d73..83125a05918a 100644
--- a/drivers/media/usb/em28xx/em28xx-dvb.c
+++ b/drivers/media/usb/em28xx/em28xx-dvb.c
@@ -934,7 +934,7 @@ static struct lgdt3306a_config 
hauppauge_01595_lgdt3306a_config = {
 
 /* -- */
 
-static int em28xx_attach_xc3028(u8 addr, struct em28xx *dev)
+static noinline_for_kasan int em28xx_attach_xc3028(u8 addr, struct em28xx *dev)
 {
struct dvb_frontend *fe;
struct xc2028_config cfg;
@@ -1126,6 +1126,492 @@ static void em28xx_unregister_dvb(struct em28xx_dvb 
*dvb)
dvb_unregister_adapter(>adapter);
 }
 
+static noinline_for_kasan int em28174_dvb_init_pctv_460e(struct em28xx *dev)
+{
+   struct em28xx_dvb *dvb = dev->dvb;
+   struct i2c_client *client;
+   struct i2c_board_info board_info;
+   struct tda10071_platform_data tda10071_pdata = {};
+   struct a8293_platform_data a8293_pdata = {};
+   int result;
+
+   /* attach demod + tuner combo */
+   tda10071_pdata.clk = 40444000, /* 40.444 MHz */
+   tda10071_pdata.i2c_wr_max = 64,
+   tda10071_pdata.ts_mode = TDA10071_TS_SERIAL,
+   tda10071_pdata.pll_multiplier = 20,
+   tda10071_pdata.tuner_i2c_addr = 0x14,
+   memset(_info, 0, sizeof(board_info));
+   strlcpy(board_info.type, "tda10071_cx24118", I2C_NAME_SIZE);
+   board_info.addr = 0x55;
+   board_info.platform_data = _pdata;
+   request_module("tda10071");
+   client = i2c_new_device(>i2c_adap[dev->def_i2c_bus], _info);
+   if (client == NULL || client->dev.driver == NULL) {
+   result = -ENODEV;
+   goto out_free;
+   }
+   if (!try_module_get(client->dev.driver->owner)) {
+   i2c_unregister_device(client);
+   result = -ENODEV;
+   goto out_free;
+   }
+   dvb->fe[0] = tda10071_pdata.get_dvb_frontend(client);
+   dvb->i2c_client_demod = client;
+
+   /* attach SEC */
+   a8293_pdata.dvb_frontend = dvb->fe[0];
+   memset(_info, 0, sizeof(board_info));
+   strlcpy(board_info.type, "a8293", I2C_NAME_SIZE);
+   board_info.addr = 0x08;
+   board_info.platform_data = _pdata;
+   request_module("a8293");
+   client = i2c_new_device(>i2c_adap[dev->def_i2c_bus], _info);
+   if (client == NULL || client->dev.driver == NULL) {
+   module_put(dvb->i2c_client_demod->dev.driver->owner);
+   i2c_unregister_device(dvb->i2c_client_demod);
+   result = -ENODEV;
+   goto out_free;
+   }
+   if (!try_module_get(client->dev.driver->owner)) {
+   i2c_unregister_device(client);
+   module_put(dvb->i2c_client_demod->dev.driver->owner);
+   i2c_unregister_device(dvb->i2c_client_demod);
+   result = -ENODEV;
+   goto out_free;
+   }
+   dvb->i2c_client_sec = client;
+   result = 0;
+out_free:
+   return result;
+}
+
+static noinline_for_kasan int em28178_dvb_init_pctv_461e(struct em28xx *dev)
+{
+   struct em28xx_dvb *dvb = dev->dvb;
+   struct i2c_client *client;
+   struct i2c_adapter *i2c_adapter;
+   struct i2c_board_info board_info;
+   struct m88ds3103_platform_data m88ds3103_pdata = {};
+   struct ts2020_config ts2020_config = {};
+   struct a8293_platform_data a8293_pdata = {};
+   int result;
+
+   /* attach demod */
+   m88ds3103_pdata.clk = 2700;
+   m88ds3103_pdata.i2c_wr_max = 33;
+   m88ds3103_pdata.ts_mode = M88DS3103_TS_PARALLEL;
+   m88ds3103_pdata.ts_clk = 16000;
+   m88ds3103_pdata.ts_clk_pol = 1;
+   m88ds3103_pdata.agc = 0x99;
+   memset(_info, 0, sizeof(board_info));
+   strlcpy(board_info.type, "m88ds3103", I2C_NAME_SIZE);
+   board_info.addr = 0x68;
+   board_info.platform_data = _pdata;
+   request_module("m88ds3103");
+   client = i2c_new_device(>i2c_adap[dev->def_i2c_bus], _info);
+   if (client == NULL || client->dev.driver == NULL) {
+   result = -ENODEV;
+   goto out_free;
+   }
+   if 

[PATCH 18/26] [media] i2c: cx25840: avoid stack overflow with KASAN

2017-03-02 Thread Arnd Bergmann
With CONFIG_KASAN, this driver has shown a ridiculously large stack frame
in one configuration:

drivers/media/i2c/cx25840/cx25840-core.c:4960:1: error: the frame size of 94000 
bytes is larger than 2048 bytes [-Werror=frame-larger-than=]

In most builds, it's only about 3300 bytes, but that's still large anough to
risk a kernel stack overflow.

Marking the two register access functions as noinline_for_kasan avoids
the problem and brings the largest stack frame size down to 232 bytes.

Signed-off-by: Arnd Bergmann 
---
 drivers/media/i2c/cx25840/cx25840-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/cx25840/cx25840-core.c 
b/drivers/media/i2c/cx25840/cx25840-core.c
index b8d3c070bfc1..fd72e5a11cb9 100644
--- a/drivers/media/i2c/cx25840/cx25840-core.c
+++ b/drivers/media/i2c/cx25840/cx25840-core.c
@@ -81,7 +81,7 @@ MODULE_PARM_DESC(debug, "Debugging messages [0=Off (default) 
1=On]");
 /* --- */
 static void cx23888_std_setup(struct i2c_client *client);
 
-int cx25840_write(struct i2c_client *client, u16 addr, u8 value)
+noinline_for_kasan int cx25840_write(struct i2c_client *client, u16 addr, u8 
value)
 {
u8 buffer[3];
buffer[0] = addr >> 8;
@@ -90,7 +90,7 @@ int cx25840_write(struct i2c_client *client, u16 addr, u8 
value)
return i2c_master_send(client, buffer, 3);
 }
 
-int cx25840_write4(struct i2c_client *client, u16 addr, u32 value)
+noinline_for_kasan int cx25840_write4(struct i2c_client *client, u16 addr, u32 
value)
 {
u8 buffer[6];
buffer[0] = addr >> 8;
-- 
2.9.0



Re: BUG due to "xen-netback: protect resource cleaning on XenBus disconnect"

2017-03-02 Thread Boris Ostrovsky
On 03/02/2017 06:56 AM, Juergen Gross wrote:
> With commits f16f1df65 and 9a6cdf52b we get in our Xen testing:
>
> [  174.512861] switch: port 2(vif3.0) entered disabled state
> [  174.522735] BUG: sleeping function called from invalid context at
> /home/build/linux-linus/mm/vmalloc.c:1441
> [  174.523451] in_atomic(): 1, irqs_disabled(): 0, pid: 28, name: xenwatch
> [  174.524131] CPU: 1 PID: 28 Comm: xenwatch Tainted: GW
> 4.10.0upstream-11073-g4977ab6-dirty #1
> [  174.524819] Hardware name: MSI MS-7680/H61M-P23 (MS-7680), BIOS V17.0
> 03/14/2011
> [  174.525517] Call Trace:
> [  174.526217]  show_stack+0x23/0x60
> [  174.526899]  dump_stack+0x5b/0x88
> [  174.527562]  ___might_sleep+0xde/0x130
> [  174.528208]  __might_sleep+0x35/0xa0
> [  174.528840]  ? _raw_spin_unlock_irqrestore+0x13/0x20
> [  174.529463]  ? __wake_up+0x40/0x50
> [  174.530089]  remove_vm_area+0x20/0x90
> [  174.530724]  __vunmap+0x1d/0xc0
> [  174.531346]  ? delete_object_full+0x13/0x20
> [  174.531973]  vfree+0x40/0x80
> [  174.532594]  set_backend_state+0x18a/0xa90
> [  174.533221]  ? dwc_scan_descriptors+0x24d/0x430
> [  174.533850]  ? kfree+0x5b/0xc0
> [  174.534476]  ? xenbus_read+0x3d/0x50
> [  174.535101]  ? xenbus_read+0x3d/0x50
> [  174.535718]  ? xenbus_gather+0x31/0x90
> [  174.536332]  ? ___might_sleep+0xf6/0x130
> [  174.536945]  frontend_changed+0x6b/0xd0
> [  174.537565]  xenbus_otherend_changed+0x7d/0x80
> [  174.538185]  frontend_changed+0x12/0x20
> [  174.538803]  xenwatch_thread+0x74/0x110
> [  174.539417]  ? woken_wake_function+0x20/0x20
> [  174.540049]  kthread+0xe5/0x120
> [  174.540663]  ? xenbus_printf+0x50/0x50
> [  174.541278]  ? __kthread_init_worker+0x40/0x40
> [  174.541898]  ret_from_fork+0x21/0x2c
> [  174.548635] switch: port 2(vif3.0) entered disabled state
>
> I believe calling vfree() when holding a spin_lock isn't a good idea.
>
> Boris, this is the dumpdata failure:
> FAILURE 4.10.0upstream-11073-g4977ab6-dirty(x86_64)
> 4.10.0upstream-11073-g4977ab6-dirty(i386)\: 2017-03-02 (tst007)


That's not the cause of the test failure though --- it's "just" a warning.

The problem here was that 64- and 32-bit build trees got out of sync
(which is my fault, I switched the former to staging but forgot to do
the same for the latter). We have in  the log:

libxl: error: libxl_create.c:564:libxl__domain_make: domain creation
fail: Operation not supported
libxl: error: libxl_create.c:931:initiate_domain_create: cannot make
domain: -3

I now have both trees use staging.

-boris




Re: [PATCH 02/26] rewrite READ_ONCE/WRITE_ONCE

2017-03-02 Thread Arnd Bergmann
On Thu, Mar 2, 2017 at 5:51 PM, Christian Borntraeger
 wrote:
> On 03/02/2017 05:38 PM, Arnd Bergmann wrote:
>>
>> This attempts a rewrite of the two macros, using a simpler implementation
>> for the most common case of having a naturally aligned 1, 2, 4, or (on
>> 64-bit architectures) 8  byte object that can be accessed with a single
>> instruction.  For these, we go back to a volatile pointer dereference
>> that we had with the ACCESS_ONCE macro.
>
> We had changed that back then because gcc 4.6 and 4.7 had a bug that could
> removed the volatile statement on aggregate types like the following one
>
> union ipte_control {
> unsigned long val;
> struct {
> unsigned long k  : 1;
> unsigned long kh : 31;
> unsigned long kg : 32;
> };
> };
>
> See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145
>
> If I see that right, your __ALIGNED_WORD(x)
> macro would say that for above structure  sizeof(x) == sizeof(long)) is true,
> so it would fall back to the old volatile cast and might reintroduce the
> old compiler bug?

Ah, right, that's the missing piece. For some reason I didn't find
the reference in the source or the git log.

> Could you maybe you fence your simple macro for anything older than 4.9? After
> all there was no kasan support anyway on these older gcc version.

Yes, that should work, thanks!

 Arnd


Re: [PATCH 24/26] ocfs2: reduce stack size with KASAN

2017-03-02 Thread Joe Perches
On Thu, 2017-03-02 at 17:38 +0100, Arnd Bergmann wrote:
> The internal logging infrastructure in ocfs2 causes special warning code to be
> used with KASAN, which produces rather large stack frames:

> fs/ocfs2/super.c: In function 'ocfs2_fill_super':
> fs/ocfs2/super.c:1219:1: error: the frame size of 3264 bytes is larger than 
> 3072 bytes [-Werror=frame-larger-than=]

At least by default it doesn't seem to.

gcc 6.2 allyesconfig, CONFIG_KASAN=y
with either CONFIG_KASAN_INLINE or CONFIG_KASAN_OUTLINE

gcc doesn't emit a stack warning

> By simply passing the mask by value instead of reference, we can avoid the
> problem completely.

Any idea why that's so?
 
>  On 64-bit architectures, this is also more efficient,

Efficient true, but the same overall stack no?

> while on the less common (at least among ocfs2 users) 32-bit architectures,
> I'm guessing that the resulting code is comparable to what it was before.
> 
> The current version was introduced by Joe Perches as an optimization, maybe
> he can see if my change regresses compared to his.

I don't see it.

> Cc: Joe Perches 
> Fixes: 7c2bd2f930ae ("ocfs2: reduce object size of mlog uses")
> Signed-off-by: Arnd Bergmann 
> ---
>  fs/ocfs2/cluster/masklog.c | 10 +-
>  fs/o cfs2/cluster/masklog.h |  4 ++--
>  2 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
> index d331c2386b94..9720c5443e4d 100644
> --- a/fs/ocfs2/cluster/masklog.c
> +++ b/fs/ocfs2/cluster/masklog.c
> @@ -64,7 +64,7 @@ static ssize_t mlog_mask_store(u64 mask, const char *buf, 
> size_t count)
>   return count;
>  }
>  
> -void __mlog_printk(const u64 *mask, const char *func, int line,
> +void __mlog_printk(const u64 mask, const char *func, int line,
>  const char *fmt, ...)
>  {
>   struct va_format vaf;
> @@ -72,14 +72,14 @@ void __mlog_printk(const u64 *mask, const char *func, int 
> line,
>   const char *level;
>   const char *prefix = "";
>  
> - if (!__mlog_test_u64(*mask, mlog_and_bits) ||
> - __mlog_test_u64(*mask, mlog_not_bits))
> + if (!__mlog_test_u64(mask, mlog_and_bits) ||
> + __mlog_test_u64(mask, mlog_not_bits))
>   return;
>  
> - if (*mask & ML_ERROR) {
> + if (mask & ML_ERROR) {
>   level = KERN_ERR;
>   prefix = "ERROR: ";
> - } else if (*mask & ML_NOTICE) {
> + } else if (mask & ML_NOTICE) {
>   level = KERN_NOTICE;
>   } else {
>   level = KERN_INFO;
> diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
> index 308ea0eb35fd..0d0f4bf2c3d8 100644
> --- a/fs/ocfs2/cluster/masklog.h
> +++ b/fs/ocfs2/cluster/masklog.h
> @@ -163,7 +163,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
>  #endif
>  
>  __printf(4, 5)
> -void __mlog_printk(const u64 *m, const char *func, int line,
> +void __mlog_printk(const u64 m, const char *func, int line,
>  const char *fmt, ...);
>  
>  /*
> @@ -174,7 +174,7 @@ void __mlog_printk(const u64 *m, const char *func, int 
> line,
>  do { \
>   u64 _m = MLOG_MASK_PREFIX | (mask); \
>   if (_m & ML_ALLOWED_BITS)   \
> - __mlog_printk(&_m, __func__, __LINE__, fmt, \
> + __mlog_printk(_m, __func__, __LINE__, fmt,  \
> ##__VA_ARGS__);   \
>  } while (0)
>  


[PATCH 16/26] [media] i2c: adv7604: mark register access as noinline_for_kasan

2017-03-02 Thread Arnd Bergmann
When building with KASAN, we get a stack frame size warning about a function
that could potentially cause a stack overflow:

drivers/media/i2c/adv7604.c: In function 'adv76xx_log_status':
drivers/media/i2c/adv7604.c:2615:1: error: the frame size of 3248 bytes is 
larger than 3072 bytes [-Werror=frame-larger-than=]

This is caused by adv76xx_read_check() being inlined repeatedly, and
marking this function as noinline_for_kasan solves the problem
completely.

Signed-off-by: Arnd Bergmann 
---
 drivers/media/i2c/adv7604.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index d8bf435db86d..176f46ac85fd 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -339,8 +339,8 @@ static inline unsigned vtotal(const struct v4l2_bt_timings 
*t)
 
 /* --- */
 
-static int adv76xx_read_check(struct adv76xx_state *state,
-int client_page, u8 reg)
+static noinline_for_kasan int adv76xx_read_check(struct adv76xx_state *state,
+int client_page, u8 reg)
 {
struct i2c_client *client = state->i2c_clients[client_page];
int err;
-- 
2.9.0



[PATCH 10/26] brcmsmac: reindent split functions

2017-03-02 Thread Arnd Bergmann
In the previous commit I left the indentation alone to help reviewing
the patch, this one now runs the three new functions through 'indent -kr -8'
with some manual fixups to avoid silliness.

No changes other than whitespace are intended here.

Signed-off-by: Arnd Bergmann 
---
 .../broadcom/brcm80211/brcmsmac/phy/phy_n.c| 1507 +---
 1 file changed, 697 insertions(+), 810 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
index d76c092bb6b4..9b39789c673d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
@@ -16074,7 +16074,8 @@ static void wlc_phy_workarounds_nphy_rev7(struct 
brcms_phy *pi)
NPHY_REV3_RFSEQ_CMD_INT_PA_PU,
NPHY_REV3_RFSEQ_CMD_END
};
-   static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 
1, 1 };
+   static const u8 rfseq_rx2tx_dlys_rev3_ipa[] =
+   { 8, 6, 6, 4, 4, 16, 43, 1, 1 };
static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f };
u32 leg_data_weights;
u8 chan_freq_range = 0;
@@ -16114,526 +16115,452 @@ static void wlc_phy_workarounds_nphy_rev7(struct 
brcms_phy *pi)
int coreNum;
 
 
-   if (NREV_IS(pi->pubpi.phy_rev, 7)) {
-   mod_phy_reg(pi, 0x221, (0x1 << 4), (1 << 4));
-
-   mod_phy_reg(pi, 0x160, (0x7f << 0), (32 << 0));
-   mod_phy_reg(pi, 0x160, (0x7f << 8), (39 << 8));
-   mod_phy_reg(pi, 0x161, (0x7f << 0), (46 << 0));
-   mod_phy_reg(pi, 0x161, (0x7f << 8), (51 << 8));
-   mod_phy_reg(pi, 0x162, (0x7f << 0), (55 << 0));
-   mod_phy_reg(pi, 0x162, (0x7f << 8), (58 << 8));
-   mod_phy_reg(pi, 0x163, (0x7f << 0), (60 << 0));
-   mod_phy_reg(pi, 0x163, (0x7f << 8), (62 << 8));
-   mod_phy_reg(pi, 0x164, (0x7f << 0), (62 << 0));
-   mod_phy_reg(pi, 0x164, (0x7f << 8), (63 << 8));
-   mod_phy_reg(pi, 0x165, (0x7f << 0), (63 << 0));
-   mod_phy_reg(pi, 0x165, (0x7f << 8), (64 << 8));
-   mod_phy_reg(pi, 0x166, (0x7f << 0), (64 << 0));
-   mod_phy_reg(pi, 0x166, (0x7f << 8), (64 << 8));
-   mod_phy_reg(pi, 0x167, (0x7f << 0), (64 << 0));
-   mod_phy_reg(pi, 0x167, (0x7f << 8), (64 << 8));
-   }
-
-   if (NREV_LE(pi->pubpi.phy_rev, 8)) {
-   write_phy_reg(pi, 0x23f, 0x1b0);
-   write_phy_reg(pi, 0x240, 0x1b0);
-   }
+   if (NREV_IS(pi->pubpi.phy_rev, 7)) {
+   mod_phy_reg(pi, 0x221, (0x1 << 4), (1 << 4));
+
+   mod_phy_reg(pi, 0x160, (0x7f << 0), (32 << 0));
+   mod_phy_reg(pi, 0x160, (0x7f << 8), (39 << 8));
+   mod_phy_reg(pi, 0x161, (0x7f << 0), (46 << 0));
+   mod_phy_reg(pi, 0x161, (0x7f << 8), (51 << 8));
+   mod_phy_reg(pi, 0x162, (0x7f << 0), (55 << 0));
+   mod_phy_reg(pi, 0x162, (0x7f << 8), (58 << 8));
+   mod_phy_reg(pi, 0x163, (0x7f << 0), (60 << 0));
+   mod_phy_reg(pi, 0x163, (0x7f << 8), (62 << 8));
+   mod_phy_reg(pi, 0x164, (0x7f << 0), (62 << 0));
+   mod_phy_reg(pi, 0x164, (0x7f << 8), (63 << 8));
+   mod_phy_reg(pi, 0x165, (0x7f << 0), (63 << 0));
+   mod_phy_reg(pi, 0x165, (0x7f << 8), (64 << 8));
+   mod_phy_reg(pi, 0x166, (0x7f << 0), (64 << 0));
+   mod_phy_reg(pi, 0x166, (0x7f << 8), (64 << 8));
+   mod_phy_reg(pi, 0x167, (0x7f << 0), (64 << 0));
+   mod_phy_reg(pi, 0x167, (0x7f << 8), (64 << 8));
+   }
 
-   if (NREV_GE(pi->pubpi.phy_rev, 8))
-   mod_phy_reg(pi, 0xbd, (0xff << 0), (114 << 0));
+   if (NREV_LE(pi->pubpi.phy_rev, 8)) {
+   write_phy_reg(pi, 0x23f, 0x1b0);
+   write_phy_reg(pi, 0x240, 0x1b0);
+   }
 
-   wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x00, 16,
-_control);
-   wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_AFECTRL, 1, 0x10, 16,
-_control);
+   if (NREV_GE(pi->pubpi.phy_rev, 8))
+   mod_phy_reg(pi, 0xbd, (0xff << 0), (114 << 0));
 
-   wlc_phy_table_read_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL,
-   1, 0, 32, _data_weights);
-   leg_data_weights = leg_data_weights & 0xff;
-   wlc_phy_table_write_nphy(pi, NPHY_TBL_ID_CMPMETRICDATAWEIGHTTBL,
-1, 0, 32, 

[PATCH 11/26] rtlwifi: reduce stack usage for KASAN

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is set, we use a large amount of stack in the btcoexist code,
presumably due to lots of inlining of functions that each add to the kernel
stack.

net/wireless/realtek/rtlwifi/btcoexist/halbtc8192e2ant.c:3762:1: error: the 
frame size of 4032 bytes is larger than 3072 bytes
net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c:3076:1: error: the 
frame size of 4104 bytes is larger than 3072 bytes
net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c:3740:1: error: the 
frame size of 3408 bytes is larger than 3072 bytes

I went through these recursively and marked functions as noinline_for_kasan
until no function used more than a kilobyte. While I saw the warning only for
three of the five files, I'm changing all five the same way for consistency.
This should help in case gcc later makes different inlining decisions.

Signed-off-by: Arnd Bergmann 
---
 .../realtek/rtlwifi/btcoexist/halbtc8192e2ant.c| 41 +++---
 .../realtek/rtlwifi/btcoexist/halbtc8723b1ant.c| 26 +++---
 .../realtek/rtlwifi/btcoexist/halbtc8723b2ant.c| 34 +-
 .../realtek/rtlwifi/btcoexist/halbtc8821a1ant.c| 36 +--
 .../realtek/rtlwifi/btcoexist/halbtc8821a2ant.c| 38 ++--
 5 files changed, 88 insertions(+), 87 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8192e2ant.c 
b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8192e2ant.c
index ffa1f438424d..8433c406a3c0 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8192e2ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8192e2ant.c
@@ -455,7 +455,7 @@ static void halbtc8192e2ant_querybt_info(struct btc_coexist 
*btcoexist)
btcoexist->btc_fill_h2c(btcoexist, 0x61, 1, h2c_parameter);
 }
 
-static void halbtc8192e2ant_update_btlink_info(struct btc_coexist *btcoexist)
+static noinline_for_kasan void halbtc8192e2ant_update_btlink_info(struct 
btc_coexist *btcoexist)
 {
struct btc_bt_link_info *bt_link_info = >bt_link_info;
bool bt_hson = false;
@@ -751,7 +751,7 @@ static void halbtc8192e2ant_set_fwdec_btpwr(struct 
btc_coexist *btcoexist,
btcoexist->btc_fill_h2c(btcoexist, 0x62, 1, h2c_parameter);
 }
 
-static void halbtc8192e2ant_dec_btpwr(struct btc_coexist *btcoexist,
+static noinline_for_kasan void halbtc8192e2ant_dec_btpwr(struct btc_coexist 
*btcoexist,
  bool force_exec, u8 dec_btpwr_lvl)
 {
struct rtl_priv *rtlpriv = btcoexist->adapter;
@@ -817,7 +817,7 @@ static void halbtc8192e2ant_bt_autoreport(struct 
btc_coexist *btcoexist,
coex_dm->pre_bt_auto_report = coex_dm->cur_bt_auto_report;
 }
 
-static void halbtc8192e2ant_fw_dac_swinglvl(struct btc_coexist *btcoexist,
+static noinline_for_kasan void halbtc8192e2ant_fw_dac_swinglvl(struct 
btc_coexist *btcoexist,
bool force_exec, u8 fw_dac_swinglvl)
 {
struct rtl_priv *rtlpriv = btcoexist->adapter;
@@ -1145,8 +1145,9 @@ static void halbtc8192e2ant_IgnoreWlanAct(struct 
btc_coexist *btcoexist,
coex_dm->pre_ignore_wlan_act = coex_dm->cur_ignore_wlan_act;
 }
 
-static void halbtc8192e2ant_SetFwPstdma(struct btc_coexist *btcoexist, u8 
byte1,
-   u8 byte2, u8 byte3, u8 byte4, u8 byte5)
+static noinline_for_kasan void
+halbtc8192e2ant_SetFwPstdma(struct btc_coexist *btcoexist, u8 byte1,
+   u8 byte2, u8 byte3, u8 byte4, u8 byte5)
 {
struct rtl_priv *rtlpriv = btcoexist->adapter;
 
@@ -1328,7 +1329,7 @@ static void halbtc8192e2ant_ps_tdma(struct btc_coexist 
*btcoexist,
coex_dm->pre_ps_tdma = coex_dm->cur_ps_tdma;
 }
 
-static void halbtc8192e2ant_set_switch_sstype(struct btc_coexist *btcoexist,
+static noinline_for_kasan void halbtc8192e2ant_set_switch_sstype(struct 
btc_coexist *btcoexist,
  u8 sstype)
 {
struct rtl_priv *rtlpriv = btcoexist->adapter;
@@ -1365,7 +1366,7 @@ static void halbtc8192e2ant_set_switch_sstype(struct 
btc_coexist *btcoexist,
btcoexist->btc_set(btcoexist, BTC_SET_ACT_SEND_MIMO_PS, );
 }
 
-static void halbtc8192e2ant_switch_sstype(struct btc_coexist *btcoexist,
+static noinline_for_kasan void halbtc8192e2ant_switch_sstype(struct 
btc_coexist *btcoexist,
  bool force_exec, u8 new_sstype)
 {
struct rtl_priv *rtlpriv = btcoexist->adapter;
@@ -1432,7 +1433,7 @@ static void halbtc8192e2ant_action_bt_inquiry(struct 
btc_coexist *btcoexist)
btc8192e2ant_sw_mec2(btcoexist, false, false, false, 0x18);
 }
 
-static bool halbtc8192e2ant_is_common_action(struct btc_coexist *btcoexist)
+static noinline_for_kasan bool halbtc8192e2ant_is_common_action(struct 
btc_coexist *btcoexist)
 {
struct rtl_priv *rtlpriv = btcoexist->adapter;
struct btc_bt_link_info *bt_link_info = >bt_link_info;
@@ -2358,7 

Re: [PATCH 6/7] net: stmmac: dwc-qos: Split out ->probe() and ->remove()

2017-03-02 Thread Joao Pinto
Às 5:24 PM de 2/23/2017, Thierry Reding escreveu:
> From: Thierry Reding 
> 
> Split out the binding specific parts of ->probe() and ->remove() to
> enable the driver to support variants of the binding. This is useful in
> order to keep backwards-compatibility while making it easy for a sub-
> driver to deal only with the updated bindings rather than having to add
> compatibility quirks all over the place.
> 
> Signed-off-by: Thierry Reding 
> ---
>  .../ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c| 114 
> -
>  1 file changed, 88 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c 
> b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
> index 1a3fa3d9f855..5071d3c15adc 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -106,13 +107,70 @@ static int dwc_eth_dwmac_config_dt(struct 
> platform_device *pdev,
>   return 0;
>  }
>  
> +static void *dwc_qos_probe(struct platform_device *pdev,
> +struct plat_stmmacenet_data *plat_dat,
> +struct stmmac_resources *stmmac_res)
> +{
> + int err;
> +
> + plat_dat->stmmac_clk = devm_clk_get(>dev, "apb_pclk");
> + if (IS_ERR(plat_dat->stmmac_clk)) {
> + dev_err(>dev, "apb_pclk clock not found.\n");
> + return ERR_CAST(plat_dat->stmmac_clk);
> + }
> +
> + clk_prepare_enable(plat_dat->stmmac_clk);
> +
> + plat_dat->pclk = devm_clk_get(>dev, "phy_ref_clk");
> + if (IS_ERR(plat_dat->pclk)) {
> + dev_err(>dev, "phy_ref_clk clock not found.\n");
> + err = PTR_ERR(plat_dat->pclk);
> + goto disable;
> + }
> +
> + clk_prepare_enable(plat_dat->pclk);
> +
> + return NULL;
> +
> +disable:
> + clk_disable_unprepare(plat_dat->stmmac_clk);
> + return ERR_PTR(err);
> +}
> +
> +static int dwc_qos_remove(struct platform_device *pdev)
> +{
> + struct net_device *ndev = platform_get_drvdata(pdev);
> + struct stmmac_priv *priv = netdev_priv(ndev);
> +
> + clk_disable_unprepare(priv->plat->pclk);
> + clk_disable_unprepare(priv->plat->stmmac_clk);
> +
> + return 0;
> +}
> +
> +struct dwc_eth_dwmac_data {
> + void *(*probe)(struct platform_device *pdev,
> +struct plat_stmmacenet_data *data,
> +struct stmmac_resources *res);
> + int (*remove)(struct platform_device *pdev);
> +};
> +
> +static const struct dwc_eth_dwmac_data dwc_qos_data = {
> + .probe = dwc_qos_probe,
> + .remove = dwc_qos_remove,
> +};
> +
>  static int dwc_eth_dwmac_probe(struct platform_device *pdev)
>  {
> + const struct dwc_eth_dwmac_data *data;
>   struct plat_stmmacenet_data *plat_dat;
>   struct stmmac_resources stmmac_res;
>   struct resource *res;
> + void *priv;
>   int ret;
>  
> + data = of_device_get_match_data(>dev);
> +
>   memset(_res, 0, sizeof(struct stmmac_resources));
>  
>   /**
> @@ -138,39 +196,26 @@ static int dwc_eth_dwmac_probe(struct platform_device 
> *pdev)
>   if (IS_ERR(plat_dat))
>   return PTR_ERR(plat_dat);
>  
> - plat_dat->stmmac_clk = devm_clk_get(>dev, "apb_pclk");
> - if (IS_ERR(plat_dat->stmmac_clk)) {
> - dev_err(>dev, "apb_pclk clock not found.\n");
> - ret = PTR_ERR(plat_dat->stmmac_clk);
> - plat_dat->stmmac_clk = NULL;
> - goto err_remove_config_dt;
> - }
> - clk_prepare_enable(plat_dat->stmmac_clk);
> -
> - plat_dat->pclk = devm_clk_get(>dev, "phy_ref_clk");
> - if (IS_ERR(plat_dat->pclk)) {
> - dev_err(>dev, "phy_ref_clk clock not found.\n");
> - ret = PTR_ERR(plat_dat->pclk);
> - plat_dat->pclk = NULL;
> - goto err_out_clk_dis_phy;
> + priv = data->probe(pdev, plat_dat, _res);
> + if (IS_ERR(priv)) {
> + ret = PTR_ERR(priv);
> + dev_err(>dev, "failed to probe subdriver: %d\n", ret);
> + goto remove_config;
>   }
> - clk_prepare_enable(plat_dat->pclk);
>  
>   ret = dwc_eth_dwmac_config_dt(pdev, plat_dat);
>   if (ret)
> - goto err_out_clk_dis_aper;
> + goto remove;
>  
>   ret = stmmac_dvr_probe(>dev, plat_dat, _res);
>   if (ret)
> - goto err_out_clk_dis_aper;
> + goto remove;
>  
> - return 0;
> + return ret;
>  
> -err_out_clk_dis_aper:
> - clk_disable_unprepare(plat_dat->pclk);
> -err_out_clk_dis_phy:
> - clk_disable_unprepare(plat_dat->stmmac_clk);
> -err_remove_config_dt:
> +remove:
> + data->remove(pdev);
> +remove_config:
>   stmmac_remove_config_dt(pdev, plat_dat);
>  
>   return ret;
> @@ -178,11 +223,28 @@ 

[PATCH 04/26] tty: kbd: reduce stack size with KASAN

2017-03-02 Thread Arnd Bergmann
As reported by kernelci, some functions in the VT code use significant
amounts of kernel stack when local variables get inlined into the caller
multiple times:

drivers/tty/vt/keyboard.c: In function 'kbd_keycode':
drivers/tty/vt/keyboard.c:1452:1: error: the frame size of 2240 bytes is larger 
than 2048 bytes [-Werror=frame-larger-than=]

Annotating those functions as noinline_for_kasan prevents the inlining
and reduces the overall stack usage in this driver.

Signed-off-by: Arnd Bergmann 
---
 drivers/tty/vt/keyboard.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 397e1509fe51..f8a183c1639f 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -300,13 +300,13 @@ int kbd_rate(struct kbd_repeat *rpt)
 /*
  * Helper Functions.
  */
-static void put_queue(struct vc_data *vc, int ch)
+static noinline_for_kasan void put_queue(struct vc_data *vc, int ch)
 {
tty_insert_flip_char(>port, ch, 0);
tty_schedule_flip(>port);
 }
 
-static void puts_queue(struct vc_data *vc, char *cp)
+static noinline_for_kasan void puts_queue(struct vc_data *vc, char *cp)
 {
while (*cp) {
tty_insert_flip_char(>port, *cp, 0);
@@ -554,7 +554,7 @@ static void fn_inc_console(struct vc_data *vc)
set_console(i);
 }
 
-static void fn_send_intr(struct vc_data *vc)
+static noinline_for_kasan void fn_send_intr(struct vc_data *vc)
 {
tty_insert_flip_char(>port, 0, TTY_BREAK);
tty_schedule_flip(>port);
-- 
2.9.0



[PATCH 08/26] brcmsmac: make some local variables 'static const' to reduce stack size

2017-03-02 Thread Arnd Bergmann
With KASAN and a couple of other patches applied, this driver is one
of the few remaining ones that actually use more than 2048 bytes of
kernel stack:

broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 
'wlc_phy_workarounds_nphy_gainctrl':
broadcom/brcm80211/brcmsmac/phy/phy_n.c:16065:1: warning: the frame size of 
3264 bytes is larger than 2048 bytes [-Wframe-larger-than=]
broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy':
broadcom/brcm80211/brcmsmac/phy/phy_n.c:17138:1: warning: the frame size of 
2864 bytes is larger than 2048 bytes [-Wframe-larger-than=]

Here, I'm reducing the stack size by marking as many local variables as
'static const' as I can without changing the actual code.

Signed-off-by: Arnd Bergmann 
---
 .../broadcom/brcm80211/brcmsmac/phy/phy_n.c| 197 ++---
 1 file changed, 97 insertions(+), 100 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
index 42dc8e1f483d..48a4df488d75 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
@@ -14764,8 +14764,8 @@ static void 
wlc_phy_ipa_restore_tx_digi_filts_nphy(struct brcms_phy *pi)
 }
 
 static void
-wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, u8 *events, u8 *dlys,
-  u8 len)
+wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, const u8 *events,
+  const u8 *dlys, u8 len)
 {
u32 t1_offset, t2_offset;
u8 ctr;
@@ -15240,16 +15240,16 @@ static void 
wlc_phy_workarounds_nphy_gainctrl_2057_rev5(struct brcms_phy *pi)
 static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
 {
u16 currband;
-   s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 };
-   s8 *lna1_gain_db = NULL;
-   s8 *lna1_gain_db_2 = NULL;
-   s8 *lna2_gain_db = NULL;
-   s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 };
-   s8 *tia_gain_db;
-   s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 };
-   s8 *tia_gainbits;
-   u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f };
-   u16 *rfseq_init_gain;
+   static const s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 };
+   const s8 *lna1_gain_db = NULL;
+   const s8 *lna1_gain_db_2 = NULL;
+   const s8 *lna2_gain_db = NULL;
+   static const s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 
};
+   const s8 *tia_gain_db;
+   static const s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 };
+   const s8 *tia_gainbits;
+   static const u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f };
+   const u16 *rfseq_init_gain;
u16 init_gaincode;
u16 clip1hi_gaincode;
u16 clip1md_gaincode = 0;
@@ -15310,10 +15310,9 @@ static void 
wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
 
if ((freq <= 5080) || (freq == 5825)) {
 
-   s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 };
-   s8 lna1A_gain_db_2_rev7[] = {
-   11, 17, 22, 25};
-   s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 };
+   static const s8 lna1A_gain_db_rev7[] = { 11, 
16, 20, 24 };
+   static const s8 lna1A_gain_db_2_rev7[] = { 11, 
17, 22, 25};
+   static const s8 lna2A_gain_db_rev7[] = { -1, 6, 
10, 14 };
 
crsminu_th = 0x3e;
lna1_gain_db = lna1A_gain_db_rev7;
@@ -15321,10 +15320,9 @@ static void 
wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
lna2_gain_db = lna2A_gain_db_rev7;
} else if ((freq >= 5500) && (freq <= 5700)) {
 
-   s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 };
-   s8 lna1A_gain_db_2_rev7[] = {
-   12, 18, 22, 26};
-   s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 };
+   static const s8 lna1A_gain_db_rev7[] = { 11, 
17, 21, 25 };
+   static const s8 lna1A_gain_db_2_rev7[] = { 12, 
18, 22, 26};
+   static const s8 lna2A_gain_db_rev7[] = { 1, 8, 
12, 16 };
 
crsminu_th = 0x45;
clip1md_gaincode_B = 0x14;
@@ -15335,10 +15333,9 @@ static void 
wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
lna2_gain_db = lna2A_gain_db_rev7;
} else {
 
-   s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 };
-   s8 lna1A_gain_db_2_rev7[] = {
-

[PATCH 00/26] bring back stack frame warning with KASAN

2017-03-02 Thread Arnd Bergmann
It took a long while to get this done, but I'm finally ready
to send the first half of the KASAN stack size patches that
I did in response to the kernelci.org warnings.

As before, it's worth mentioning that things are generally worse
with gcc-7.0.1 because of the addition of -fsanitize-address-use-after-scope
that are not present on kernelci, so my randconfig testing found
a lot more than kernelci did.

The main areas are:

- READ_ONCE/WRITE_ONCE cause problems in lots of code
- typecheck() causes huge problems in a few places
- I'm introducing "noinline_for_kasan" and use it in a lot
  of places that suffer from inline functions with local variables
  - netlink, as used in various parts of the kernel
  - a number of drivers/media drivers
  - a handful of wireless network drivers
- kmemcheck conflicts with -fsanitize-address-use-after-scope

This series lets us add back a stack frame warning for 3072 bytes
with -fsanitize-address-use-after-scope, or 2048 bytes without it.

I have a follow-up series that further reduces the stack frame
warning limit to 1280 bytes for all 64-bit architectures, and
1536 bytes with basic KASAN support (no -fsanitize-address-use-after-scope).
For now, I'm only posting the first half, in order to keep
it (barely) reviewable.

Both series are tested with many hundred randconfig builds on both
x86 and arm64, which are the only architectures supporting KASAN.

Arnd 

 [PATCH 01/26] compiler: introduce noinline_for_kasan annotation
 [PATCH 02/26] rewrite READ_ONCE/WRITE_ONCE
 [PATCH 03/26] typecheck.h: avoid local variables in typecheck() macro
 [PATCH 04/26] tty: kbd: reduce stack size with KASAN
 [PATCH 05/26] netlink: mark nla_put_{u8,u16,u32} noinline_for_kasan
 [PATCH 06/26] rocker: mark rocker_tlv_put_* functions as
 [PATCH 07/26] brcmsmac: reduce stack size with KASAN
 [PATCH 08/26] brcmsmac: make some local variables 'static const' to
 [PATCH 09/26] brcmsmac: split up wlc_phy_workarounds_nphy
 [PATCH 10/26] brcmsmac: reindent split functions
 [PATCH 11/26] rtlwifi: reduce stack usage for KASAN
 [PATCH 12/26] wl3501_cs: reduce stack size for KASAN
 [PATCH 13/26] rtl8180: reduce stack size for KASAN
 [PATCH 14/26] [media] dvb-frontends: reduce stack size in i2c access
 [PATCH 15/26] [media] tuners: i2c: reduce stack usage for
 [PATCH 16/26] [media] i2c: adv7604: mark register access as
 [PATCH 17/26] [media] i2c: ks0127: reduce stack frame size for KASAN
 [PATCH 18/26] [media] i2c: cx25840: avoid stack overflow with KASAN
 [PATCH 19/26] [media] r820t: mark register functions as
 [PATCH 20/26] [media] em28xx: split up em28xx_dvb_init to reduce
 [PATCH 21/26] drm/bridge: ps8622: reduce stack size for KASAN
 [PATCH 22/26] drm/i915/gvt: don't overflow the kernel stack with
 [PATCH 23/26] mtd: cfi: reduce stack size with KASAN
 [PATCH 24/26] ocfs2: reduce stack size with KASAN
 [PATCH 25/26] isdn: eicon: mark divascapi incompatible with kasan
 [PATCH 26/26] kasan: rework Kconfig settings

 arch/x86/include/asm/switch_to.h |2 +-
 drivers/gpu/drm/bridge/parade-ps8622.c   |2 +-
 drivers/gpu/drm/i915/gvt/mmio.h  |   17 +-
 drivers/isdn/hardware/eicon/Kconfig  |1 +
 drivers/media/dvb-frontends/ascot2e.c|3 +-
 drivers/media/dvb-frontends/cxd2841er.c  |4 +-
 drivers/media/dvb-frontends/drx39xyj/drxj.c  |   14 +-
 drivers/media/dvb-frontends/helene.c |4 +-
 drivers/media/dvb-frontends/horus3a.c|2 +-
 drivers/media/dvb-frontends/itd1000.c|2 +-
 drivers/media/dvb-frontends/mt312.c  |2 +-
 drivers/media/dvb-frontends/si2165.c |   14 +-
 drivers/media/dvb-frontends/stb0899_drv.c|2 +-
 drivers/media/dvb-frontends/stb6100.c|2 +-
 drivers/media/dvb-frontends/stv0367.c|2 +-
 drivers/media/dvb-frontends/stv090x.c|2 +-
 drivers/media/dvb-frontends/stv6110.c|2 +-
 drivers/media/dvb-frontends/stv6110x.c   |2 +-
 drivers/media/dvb-frontends/tda8083.c|2 +-
 drivers/media/dvb-frontends/zl10039.c|2 +-
 drivers/media/i2c/adv7604.c  |4 +-
 drivers/media/i2c/cx25840/cx25840-core.c |4 +-
 drivers/media/i2c/ks0127.c   |2 +-
 drivers/media/tuners/r820t.c |4 +-
 drivers/media/tuners/tuner-i2c.h |   15 +-
 drivers/media/usb/em28xx/em28xx-dvb.c|  947 
+--
 

Re: [PATCH] netvsc: fix use-after-free in netvsc_change_mtu()

2017-03-02 Thread Stephen Hemminger
On Thu, 2 Mar 2017 13:00:53 +
Dexuan Cui  wrote:

> 'nvdev' is freed in rndis_filter_device_remove -> netvsc_device_remove ->
> free_netvsc_device, so we mustn't access it, before it's re-created in
> rndis_filter_device_add -> netvsc_device_add.
> 
> Signed-off-by: Dexuan Cui 
> Cc: "K. Y. Srinivasan" 
> Cc: Haiyang Zhang 
> Cc: Stephen Hemminger 

Reviewed-by: Stephen Hemminger 


[PATCH 07/26] brcmsmac: reduce stack size with KASAN

2017-03-02 Thread Arnd Bergmann
The wlc_phy_table_write_nphy/wlc_phy_table_read_nphy functions always put an 
object
on the stack, which will each require a redzone with KASAN and lead to possible
stack overflow:

drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 
'wlc_phy_workarounds_nphy':
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c:17135:1: warning: 
the frame size of 6312 bytes is larger than 1000 bytes [-Wframe-larger-than=]

This marks the two functions as noinline_for_kasan, avoiding the problem 
entirely.

Signed-off-by: Arnd Bergmann 
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
index b3aab2fe96eb..42dc8e1f483d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
@@ -14157,7 +14157,7 @@ static void wlc_phy_bphy_init_nphy(struct brcms_phy *pi)
write_phy_reg(pi, NPHY_TO_BPHY_OFF + BPHY_STEP, 0x668);
 }
 
-void
+noinline_for_kasan void
 wlc_phy_table_write_nphy(struct brcms_phy *pi, u32 id, u32 len, u32 offset,
 u32 width, const void *data)
 {
@@ -14171,7 +14171,7 @@ wlc_phy_table_write_nphy(struct brcms_phy *pi, u32 id, 
u32 len, u32 offset,
wlc_phy_write_table_nphy(pi, );
 }
 
-void
+noinline_for_kasan void
 wlc_phy_table_read_nphy(struct brcms_phy *pi, u32 id, u32 len, u32 offset,
u32 width, void *data)
 {
-- 
2.9.0



[PATCH 12/26] wl3501_cs: reduce stack size for KASAN

2017-03-02 Thread Arnd Bergmann
Inlining functions with local variables can lead to excessive stack usage
with KASAN:

drivers/net/wireless/wl3501_cs.c: In function 'wl3501_rx_interrupt':
drivers/net/wireless/wl3501_cs.c:1103:1: error: the frame size of 2232 bytes is 
larger than 1536 bytes [-Werror=frame-larger-than=]

Marking a few functions as noinline_for_kasan avoids the problem

Signed-off-by: Arnd Bergmann 
---
 drivers/net/wireless/wl3501_cs.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index acec0d9ec422..15dd8e31d373 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -242,8 +242,8 @@ static int wl3501_get_flash_mac_addr(struct wl3501_card 
*this)
  *
  * Move 'size' bytes from PC to card. (Shouldn't be interrupted)
  */
-static void wl3501_set_to_wla(struct wl3501_card *this, u16 dest, void *src,
- int size)
+static noinline_for_kasan void wl3501_set_to_wla(struct wl3501_card *this,
+u16 dest, void *src, int size)
 {
/* switch to SRAM Page 0 */
wl3501_switch_page(this, (dest & 0x8000) ? WL3501_BSS_SPAGE1 :
@@ -264,8 +264,8 @@ static void wl3501_set_to_wla(struct wl3501_card *this, u16 
dest, void *src,
  *
  * Move 'size' bytes from card to PC. (Shouldn't be interrupted)
  */
-static void wl3501_get_from_wla(struct wl3501_card *this, u16 src, void *dest,
-   int size)
+static noinline_for_kasan void wl3501_get_from_wla(struct wl3501_card *this,
+   u16 src, void *dest, int size)
 {
/* switch to SRAM Page 0 */
wl3501_switch_page(this, (src & 0x8000) ? WL3501_BSS_SPAGE1 :
@@ -1037,7 +1037,7 @@ static inline void wl3501_auth_confirm_interrupt(struct 
wl3501_card *this,
wl3501_mgmt_resync(this);
 }
 
-static inline void wl3501_rx_interrupt(struct net_device *dev)
+static noinline_for_kasan void wl3501_rx_interrupt(struct net_device *dev)
 {
int morepkts;
u16 addr;
-- 
2.9.0



[PATCH 03/26] typecheck.h: avoid local variables in typecheck() macro

2017-03-02 Thread Arnd Bergmann
With KASAN enabled, the typecheck macro leads to some serious stack memory,
as seen in the rt2xxx drivers:

drivers/net/wireless/ralink/rt2x00/rt2800lib.c: In function 
'rt2800_init_registers':
drivers/net/wireless/ralink/rt2x00/rt2800lib.c:5068:1: error: the frame size of 
23768 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
drivers/net/wireless/ralink/rt2x00/rt2800lib.c: In function 
'rt2800_config_txpower_rt3593.isra.1':
drivers/net/wireless/ralink/rt2x00/rt2800lib.c:4126:1: error: the frame size of 
14184 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
drivers/net/wireless/ralink/rt2x00/rt2800lib.c: In function 
'rt2800_config_channel_rf3053.isra.5':
drivers/net/wireless/ralink/rt2x00/rt2800lib.c:2585:1: error: the frame size of 
7632 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]

If we express the macro in a way that avoids the local variables, this goes
away and the stacks are comparable to building without KASAN.

Signed-off-by: Arnd Bergmann 
---
 include/linux/typecheck.h | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
index eb5b74a575be..adb1579fa5f0 100644
--- a/include/linux/typecheck.h
+++ b/include/linux/typecheck.h
@@ -5,12 +5,7 @@
  * Check at compile time that something is of a particular type.
  * Always evaluates to 1 so you may use it easily in comparisons.
  */
-#define typecheck(type,x) \
-({ type __dummy; \
-   typeof(x) __dummy2; \
-   (void)(&__dummy == &__dummy2); \
-   1; \
-})
+#define typecheck(type,x) ({(void)((typeof(type) *)NULL == (typeof(x) *)NULL); 
1;})
 
 /*
  * Check at compile time that 'function' is a certain type, or is a pointer
-- 
2.9.0



[PATCH 06/26] rocker: mark rocker_tlv_put_* functions as noinline_for_kasan

2017-03-02 Thread Arnd Bergmann
Inlining these functions creates lots of stack variables when KASAN is
enabled, leading to this warning about potential stack overflow:

drivers/net/ethernet/rocker/rocker_ofdpa.c: In function 
'ofdpa_cmd_flow_tbl_add':
drivers/net/ethernet/rocker/rocker_ofdpa.c:621:1: error: the frame size of 2752 
bytes is larger than 1536 bytes [-Werror=frame-larger-than=]

This marks all of them noinline_for_kasan, which solves the problem by
keeping the redzone inside of the separate stack frames.

Signed-off-by: Arnd Bergmann 
---
 drivers/net/ethernet/rocker/rocker_tlv.h | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker_tlv.h 
b/drivers/net/ethernet/rocker/rocker_tlv.h
index a63ef82e7c72..3a9573fe0191 100644
--- a/drivers/net/ethernet/rocker/rocker_tlv.h
+++ b/drivers/net/ethernet/rocker/rocker_tlv.h
@@ -139,38 +139,38 @@ rocker_tlv_start(struct rocker_desc_info *desc_info)
 int rocker_tlv_put(struct rocker_desc_info *desc_info,
   int attrtype, int attrlen, const void *data);
 
-static inline int rocker_tlv_put_u8(struct rocker_desc_info *desc_info,
-   int attrtype, u8 value)
+static noinline_for_kasan int
+rocker_tlv_put_u8(struct rocker_desc_info *desc_info, int attrtype, u8 value)
 {
return rocker_tlv_put(desc_info, attrtype, sizeof(u8), );
 }
 
-static inline int rocker_tlv_put_u16(struct rocker_desc_info *desc_info,
-int attrtype, u16 value)
+static noinline_for_kasan int
+rocker_tlv_put_u16(struct rocker_desc_info *desc_info, int attrtype, u16 value)
 {
return rocker_tlv_put(desc_info, attrtype, sizeof(u16), );
 }
 
-static inline int rocker_tlv_put_be16(struct rocker_desc_info *desc_info,
- int attrtype, __be16 value)
+static noinline_for_kasan int
+rocker_tlv_put_be16(struct rocker_desc_info *desc_info, int attrtype, __be16 
value)
 {
return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), );
 }
 
-static inline int rocker_tlv_put_u32(struct rocker_desc_info *desc_info,
-int attrtype, u32 value)
+static noinline_for_kasan int
+rocker_tlv_put_u32(struct rocker_desc_info *desc_info, int attrtype, u32 value)
 {
return rocker_tlv_put(desc_info, attrtype, sizeof(u32), );
 }
 
-static inline int rocker_tlv_put_be32(struct rocker_desc_info *desc_info,
- int attrtype, __be32 value)
+static noinline_for_kasan int
+rocker_tlv_put_be32(struct rocker_desc_info *desc_info, int attrtype, __be32 
value)
 {
return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), );
 }
 
-static inline int rocker_tlv_put_u64(struct rocker_desc_info *desc_info,
-int attrtype, u64 value)
+static noinline_for_kasan int
+rocker_tlv_put_u64(struct rocker_desc_info *desc_info, int attrtype, u64 value)
 {
return rocker_tlv_put(desc_info, attrtype, sizeof(u64), );
 }
-- 
2.9.0



[PATCH 05/26] netlink: mark nla_put_{u8,u16,u32} noinline_for_kasan

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is enabled, the "--param asan-stack=1" causes rather large
stack frames in some functions. This goes unnoticed normally because
CONFIG_FRAME_WARN is disabled with CONFIG_KASAN by default as of commit
3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with
KASAN=y").

The kernelci.org build bot however has the warning enabled and that led
me to investigate it a little further, as every build produces these warnings:

net/wireless/nl80211.c:4389:1: warning: the frame size of 2240 bytes is larger 
than 2048 bytes [-Wframe-larger-than=]
net/wireless/nl80211.c:1895:1: warning: the frame size of 3776 bytes is larger 
than 2048 bytes [-Wframe-larger-than=]
net/wireless/nl80211.c:1410:1: warning: the frame size of 2208 bytes is larger 
than 2048 bytes [-Wframe-larger-than=]
net/bridge/br_netlink.c:1282:1: warning: the frame size of 2544 bytes is larger 
than 2048 bytes [-Wframe-larger-than=]

With the new noinline_for_kasan annotation, we can avoid the problem
when KASAN is enabled but not change anything otherwise.

Cc: Andrey Ryabinin 
Cc: Alexander Potapenko 
Cc: Dmitry Vyukov 
Cc: kasan-...@googlegroups.com
Signed-off-by: Arnd Bergmann 
---
 include/net/netlink.h | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index b239fcd33d80..d84878d8347f 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -755,7 +755,7 @@ static inline int nla_parse_nested(struct nlattr *tb[], int 
maxtype,
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value)
+static noinline_for_kasan int nla_put_u8(struct sk_buff *skb, int attrtype, u8 
value)
 {
return nla_put(skb, attrtype, sizeof(u8), );
 }
@@ -766,7 +766,7 @@ static inline int nla_put_u8(struct sk_buff *skb, int 
attrtype, u8 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value)
+static noinline_for_kasan int nla_put_u16(struct sk_buff *skb, int attrtype, 
u16 value)
 {
return nla_put(skb, attrtype, sizeof(u16), );
 }
@@ -777,7 +777,7 @@ static inline int nla_put_u16(struct sk_buff *skb, int 
attrtype, u16 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
+static noinline_for_kasan int nla_put_be16(struct sk_buff *skb, int attrtype, 
__be16 value)
 {
return nla_put(skb, attrtype, sizeof(__be16), );
 }
@@ -788,7 +788,7 @@ static inline int nla_put_be16(struct sk_buff *skb, int 
attrtype, __be16 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 
value)
+static noinline_for_kasan int nla_put_net16(struct sk_buff *skb, int attrtype, 
__be16 value)
 {
return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, value);
 }
@@ -799,7 +799,7 @@ static inline int nla_put_net16(struct sk_buff *skb, int 
attrtype, __be16 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value)
+static noinline_for_kasan int nla_put_le16(struct sk_buff *skb, int attrtype, 
__le16 value)
 {
return nla_put(skb, attrtype, sizeof(__le16), );
 }
@@ -810,7 +810,7 @@ static inline int nla_put_le16(struct sk_buff *skb, int 
attrtype, __le16 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
+static noinline_for_kasan int nla_put_u32(struct sk_buff *skb, int attrtype, 
u32 value)
 {
return nla_put(skb, attrtype, sizeof(u32), );
 }
@@ -821,7 +821,7 @@ static inline int nla_put_u32(struct sk_buff *skb, int 
attrtype, u32 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
+static noinline_for_kasan int nla_put_be32(struct sk_buff *skb, int attrtype, 
__be32 value)
 {
return nla_put(skb, attrtype, sizeof(__be32), );
 }
@@ -832,7 +832,7 @@ static inline int nla_put_be32(struct sk_buff *skb, int 
attrtype, __be32 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 
value)
+static noinline_for_kasan int nla_put_net32(struct sk_buff *skb, int attrtype, 
__be32 value)
 {
return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, value);
 }
@@ -843,7 +843,7 @@ static inline int nla_put_net32(struct sk_buff *skb, int 
attrtype, __be32 value)
  * @attrtype: attribute type
  * @value: numeric value
  */
-static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value)
+static noinline_for_kasan int 

[PATCH 01/26] compiler: introduce noinline_for_kasan annotation

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is set, we can run into some code that uses incredible
amounts of kernel stack:

drivers/staging/dgnc/dgnc_neo.c:1056:1: error: the frame size of 2 bytes is 
larger than 2048 bytes [-Werror=frame-larger-than=]
drivers/media/i2c/cx25840/cx25840-core.c:4960:1: error: the frame size of 94000 
bytes is larger than 2048 bytes [-Werror=frame-larger-than=]
drivers/media/dvb-frontends/stv090x.c:3430:1: error: the frame size of 5312 
bytes is larger than 3072 bytes [-Werror=frame-larger-than=]

This happens when a sanitizer uses stack memory each time an inline function
gets called. This introduces a new annotation for those functions to make
them either 'inline' or 'noinline' dependning on the CONFIG_KASAN symbol.

Signed-off-by: Arnd Bergmann 
---
 include/linux/compiler.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f8110051188f..56b90897a459 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -416,6 +416,17 @@ static __always_inline void __write_once_size(volatile 
void *p, void *res, int s
  */
 #define noinline_for_stack noinline
 
+/*
+ * CONFIG_KASAN can lead to extreme stack usage with certain patterns when
+ * one function gets inlined many times and each instance requires a stack
+ * ckeck.
+ */
+#ifdef CONFIG_KASAN
+#define noinline_for_kasan noinline __maybe_unused
+#else
+#define noinline_for_kasan inline
+#endif
+
 #ifndef __always_inline
 #define __always_inline inline
 #endif
-- 
2.9.0



[PATCH 09/26] brcmsmac: split up wlc_phy_workarounds_nphy

2017-03-02 Thread Arnd Bergmann
The stack consumption in this driver is still relatively high, with one
remaining warning if the warning level is lowered to 1536 bytes:

drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c:17135:1: error: 
the frame size of 1880 bytes is larger than 1536 bytes 
[-Werror=frame-larger-than=]

The affected function is actually a collection of three separate 
implementations,
and each of them is fairly large by itself. Splitting them up is done easily
and improves readability at the same time.

I'm leaving the original indentation to make the review easier.

Signed-off-by: Arnd Bergmann 
---
 .../broadcom/brcm80211/brcmsmac/phy/phy_n.c| 178 -
 1 file changed, 104 insertions(+), 74 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
index 48a4df488d75..d76c092bb6b4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
@@ -16061,52 +16061,8 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct 
brcms_phy *pi)
}
 }
 
-static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
+static void wlc_phy_workarounds_nphy_rev7(struct brcms_phy *pi)
 {
-   static const u8 rfseq_rx2tx_events[] = {
-   NPHY_RFSEQ_CMD_NOP,
-   NPHY_RFSEQ_CMD_RXG_FBW,
-   NPHY_RFSEQ_CMD_TR_SWITCH,
-   NPHY_RFSEQ_CMD_CLR_HIQ_DIS,
-   NPHY_RFSEQ_CMD_RXPD_TXPD,
-   NPHY_RFSEQ_CMD_TX_GAIN,
-   NPHY_RFSEQ_CMD_EXT_PA
-   };
-   u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 };
-   static const u8 rfseq_tx2rx_events[] = {
-   NPHY_RFSEQ_CMD_NOP,
-   NPHY_RFSEQ_CMD_EXT_PA,
-   NPHY_RFSEQ_CMD_TX_GAIN,
-   NPHY_RFSEQ_CMD_RXPD_TXPD,
-   NPHY_RFSEQ_CMD_TR_SWITCH,
-   NPHY_RFSEQ_CMD_RXG_FBW,
-   NPHY_RFSEQ_CMD_CLR_HIQ_DIS
-   };
-   static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 };
-   static const u8 rfseq_tx2rx_events_rev3[] = {
-   NPHY_REV3_RFSEQ_CMD_EXT_PA,
-   NPHY_REV3_RFSEQ_CMD_INT_PA_PU,
-   NPHY_REV3_RFSEQ_CMD_TX_GAIN,
-   NPHY_REV3_RFSEQ_CMD_RXPD_TXPD,
-   NPHY_REV3_RFSEQ_CMD_TR_SWITCH,
-   NPHY_REV3_RFSEQ_CMD_RXG_FBW,
-   NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS,
-   NPHY_REV3_RFSEQ_CMD_END
-   };
-   static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 };
-   u8 rfseq_rx2tx_events_rev3[] = {
-   NPHY_REV3_RFSEQ_CMD_NOP,
-   NPHY_REV3_RFSEQ_CMD_RXG_FBW,
-   NPHY_REV3_RFSEQ_CMD_TR_SWITCH,
-   NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS,
-   NPHY_REV3_RFSEQ_CMD_RXPD_TXPD,
-   NPHY_REV3_RFSEQ_CMD_TX_GAIN,
-   NPHY_REV3_RFSEQ_CMD_INT_PA_PU,
-   NPHY_REV3_RFSEQ_CMD_EXT_PA,
-   NPHY_REV3_RFSEQ_CMD_END
-   };
-   u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 };
-
static const u8 rfseq_rx2tx_events_rev3_ipa[] = {
NPHY_REV3_RFSEQ_CMD_NOP,
NPHY_REV3_RFSEQ_CMD_RXG_FBW,
@@ -16120,29 +16076,15 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy 
*pi)
};
static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 
1, 1 };
static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f };
-
-   s16 alpha0, alpha1, alpha2;
-   s16 beta0, beta1, beta2;
-   u32 leg_data_weights, ht_data_weights, nss1_data_weights,
-   stbc_data_weights;
+   u32 leg_data_weights;
u8 chan_freq_range = 0;
static const u16 dac_control = 0x0002;
u16 aux_adc_vmid_rev7_core0[] = { 0x8e, 0x96, 0x96, 0x96 };
u16 aux_adc_vmid_rev7_core1[] = { 0x8f, 0x9f, 0x9f, 0x96 };
-   u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 };
-   u16 aux_adc_vmid_rev3[] = { 0xa2, 0xb4, 0xb4, 0x89 };
-   u16 *aux_adc_vmid;
u16 aux_adc_gain_rev7[] = { 0x02, 0x02, 0x02, 0x02 };
-   u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 };
-   u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 };
-   u16 *aux_adc_gain;
-   static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 };
-   static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 };
s32 min_nvar_val = 0x18d;
s32 min_nvar_offset_6mbps = 20;
u8 pdetrange;
-   u8 triso;
-   u16 regval;
u16 afectrl_adc_ctrl1_rev7 = 0x20;
u16 afectrl_adc_ctrl2_rev7 = 0x0;
u16 rfseq_rx2tx_lpf_h_hpc_rev7 = 0x77;
@@ -16171,17 +16113,6 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy 
*pi)
u16 freq;
int coreNum;
 
-   if (CHSPEC_IS5G(pi->radio_chanspec))
-   wlc_phy_classifier_nphy(pi, NPHY_ClassifierCtrl_cck_en, 0);
-   else
- 

[PATCH 15/26] [media] tuners: i2c: reduce stack usage for tuner_i2c_xfer_*

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is enabled, we see very large stack usage in some
functions, e.g.:

drivers/media/tuners/tda8290.c: In function 'tda8290_set_params':
drivers/media/tuners/tda8290.c:310:1: warning: the frame size of 3184 bytes is 
larger than 1024 bytes [-Wframe-larger-than=]
drivers/media/tuners/tda8290.c: In function 'tda829x_probe':
drivers/media/tuners/tda8290.c:878:1: warning: the frame size of 1088 bytes is 
larger than 1024 bytes [-Wframe-larger-than=]

By annotating the helpers as noinline_for_kasan, we can easily avoid this.

Signed-off-by: Arnd Bergmann 
---
 drivers/media/tuners/tuner-i2c.h | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/media/tuners/tuner-i2c.h b/drivers/media/tuners/tuner-i2c.h
index bda67a5a76f2..c8970299799c 100644
--- a/drivers/media/tuners/tuner-i2c.h
+++ b/drivers/media/tuners/tuner-i2c.h
@@ -33,8 +33,8 @@ struct tuner_i2c_props {
char *name;
 };
 
-static inline int tuner_i2c_xfer_send(struct tuner_i2c_props *props,
- unsigned char *buf, int len)
+static noinline_for_kasan int
+tuner_i2c_xfer_send(struct tuner_i2c_props *props, unsigned char *buf, int len)
 {
struct i2c_msg msg = { .addr = props->addr, .flags = 0,
   .buf = buf, .len = len };
@@ -43,8 +43,8 @@ static inline int tuner_i2c_xfer_send(struct tuner_i2c_props 
*props,
return (ret == 1) ? len : ret;
 }
 
-static inline int tuner_i2c_xfer_recv(struct tuner_i2c_props *props,
- unsigned char *buf, int len)
+static noinline_for_kasan int
+tuner_i2c_xfer_recv(struct tuner_i2c_props *props, unsigned char *buf, int len)
 {
struct i2c_msg msg = { .addr = props->addr, .flags = I2C_M_RD,
   .buf = buf, .len = len };
@@ -53,9 +53,10 @@ static inline int tuner_i2c_xfer_recv(struct tuner_i2c_props 
*props,
return (ret == 1) ? len : ret;
 }
 
-static inline int tuner_i2c_xfer_send_recv(struct tuner_i2c_props *props,
-  unsigned char *obuf, int olen,
-  unsigned char *ibuf, int ilen)
+static noinline_for_kasan int
+tuner_i2c_xfer_send_recv(struct tuner_i2c_props *props,
+unsigned char *obuf, int olen,
+unsigned char *ibuf, int ilen)
 {
struct i2c_msg msg[2] = { { .addr = props->addr, .flags = 0,
.buf = obuf, .len = olen },
-- 
2.9.0



[PATCH 02/26] rewrite READ_ONCE/WRITE_ONCE

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is enabled, the READ_ONCE/WRITE_ONCE macros cause
rather large kernel stacks, e.g.:

mm/vmscan.c: In function 'shrink_page_list':
mm/vmscan.c:1333:1: error: the frame size of 3456 bytes is larger than 3072 
bytes [-Werror=frame-larger-than=]
block/cfq-iosched.c: In function 'cfqg_stats_add_aux':
block/cfq-iosched.c:750:1: error: the frame size of 4048 bytes is larger than 
3072 bytes [-Werror=frame-larger-than=]
fs/btrfs/disk-io.c: In function 'open_ctree':
fs/btrfs/disk-io.c:3314:1: error: the frame size of 3136 bytes is larger than 
3072 bytes [-Werror=frame-larger-than=]
fs/btrfs/relocation.c: In function 'build_backref_tree':
fs/btrfs/relocation.c:1193:1: error: the frame size of 4336 bytes is larger 
than 3072 bytes [-Werror=frame-larger-than=]
fs/fscache/stats.c: In function 'fscache_stats_show':
fs/fscache/stats.c:287:1: error: the frame size of 6512 bytes is larger than 
3072 bytes [-Werror=frame-larger-than=]
fs/jbd2/commit.c: In function 'jbd2_journal_commit_transaction':
fs/jbd2/commit.c:1139:1: error: the frame size of 3760 bytes is larger than 
3072 bytes [-Werror=frame-larger-than=]

This attempts a rewrite of the two macros, using a simpler implementation
for the most common case of having a naturally aligned 1, 2, 4, or (on
64-bit architectures) 8  byte object that can be accessed with a single
instruction.  For these, we go back to a volatile pointer dereference
that we had with the ACCESS_ONCE macro.

READ_ONCE/WRITE_ONCE also try to handle unaligned objects and objects
of other sizes by forcing either a word-size access (which may trap
on some architectures) or doing a non-atomic memcpy. I could not figure
out what these are actually used for, but they appear to be done
intentionally, so I'm leaving that code untouched.

I had to fix up a couple of files that either use WRITE_ONCE() as an
implicit typecast, or ignore the result of READ_ONCE(). In all cases,
the modified code seems no worse to me than the original.

Cc: Christian Borntraeger 
Cc: Paul McKenney 
Signed-off-by: Arnd Bergmann 
---
 arch/x86/include/asm/switch_to.h |  2 +-
 fs/overlayfs/util.c  |  6 ++---
 include/linux/compiler.h | 47 
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index fcc5cd387fd1..0c243dd569fe 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -30,7 +30,7 @@ static inline void prepare_switch_to(struct task_struct *prev,
 *
 * To minimize cache pollution, just follow the stack pointer.
 */
-   READ_ONCE(*(unsigned char *)next->thread.sp);
+   (void)READ_ONCE(*(unsigned char *)next->thread.sp);
 #endif
 }
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826c..1c10632a48bb 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -222,8 +222,8 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry 
*upperdentry)
 
 void ovl_inode_init(struct inode *inode, struct inode *realinode, bool 
is_upper)
 {
-   WRITE_ONCE(inode->i_private, (unsigned long) realinode |
-  (is_upper ? OVL_ISUPPER_MASK : 0));
+   WRITE_ONCE(inode->i_private, (void *)((unsigned long) realinode |
+  (is_upper ? OVL_ISUPPER_MASK : 0)));
 }
 
 void ovl_inode_update(struct inode *inode, struct inode *upperinode)
@@ -231,7 +231,7 @@ void ovl_inode_update(struct inode *inode, struct inode 
*upperinode)
WARN_ON(!upperinode);
WARN_ON(!inode_unhashed(inode));
WRITE_ONCE(inode->i_private,
-  (unsigned long) upperinode | OVL_ISUPPER_MASK);
+  (void *)((unsigned long) upperinode | OVL_ISUPPER_MASK));
if (!S_ISDIR(upperinode->i_mode))
__insert_inode_hash(inode, (unsigned long) upperinode);
 }
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 56b90897a459..b619f5853af8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -288,6 +288,10 @@ static __always_inline void __write_once_size(volatile 
void *p, void *res, int s
}
 }
 
+#define __ALIGNED_WORD(x)  \
+   ((sizeof(x) == 1 || sizeof(x) == 2 || sizeof(x) == 4 || \
+ sizeof(x) == sizeof(long)) && (sizeof(x) == __alignof__(x)))  \
+
 /*
  * Prevent the compiler from merging or refetching reads or writes. The
  * compiler is also forbidden from reordering successive instances of
@@ -309,8 +313,13 @@ static __always_inline void __write_once_size(volatile 
void *p, void *res, int s
  * mutilate accesses that either do not require ordering or that interact
  * with an explicit memory barrier or atomic instruction that provides the
  * required ordering.
+ *
+ * Unaligned data is particularly tricky here: if the type that gets
+ * passed in is not 

[PATCH 13/26] rtl8180: reduce stack size for KASAN

2017-03-02 Thread Arnd Bergmann
When CONFIG_KASAN is set, we see overly large stack frames from inlining
functions with local variables:

drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c: In function 
'rtl8225se_rf_init':
drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c:431:1: warning: the 
frame size of 4384 bytes is larger than 3072 bytes [-Wframe-larger-than=]

This marks them noinline_for_kasan.

Signed-off-by: Arnd Bergmann 
---
 drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c 
b/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c
index fde89866fa8d..1efa098a2e32 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8225se.c
@@ -174,14 +174,14 @@ static void rtl8187se_three_wire_io(struct ieee80211_hw 
*dev, u8 *data,
} while (0);
 }
 
-static u32 rtl8187se_rf_readreg(struct ieee80211_hw *dev, u8 addr)
+static noinline_for_kasan u32 rtl8187se_rf_readreg(struct ieee80211_hw *dev, 
u8 addr)
 {
u32 dataread = addr & 0x0F;
rtl8187se_three_wire_io(dev, (u8 *), 16, 0);
return dataread;
 }
 
-static void rtl8187se_rf_writereg(struct ieee80211_hw *dev, u8 addr, u32 data)
+static noinline_for_kasan void rtl8187se_rf_writereg(struct ieee80211_hw *dev, 
u8 addr, u32 data)
 {
u32 outdata = (data << 4) | (u32)(addr & 0x0F);
rtl8187se_three_wire_io(dev, (u8 *), 16, 1);
-- 
2.9.0



  1   2   >