date:20180526

Re: net-next boot error: KASAN: use-after-free Write in call_usermodehelper_exec_work

2018-05-26 Thread Dmitry Vyukov

On Sun, May 27, 2018 at 7:34 AM, syzbot
 wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:5b79c2af667c Merge git://git.kernel.org/pub/scm/linux/kern..
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=16087fa780
> kernel config:  https://syzkaller.appspot.com/x/.config?x=e4078980b886800c
> dashboard link: https://syzkaller.appspot.com/bug?extid=9269ae80345087b898d0
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+9269ae80345087b89...@syzkaller.appspotmail.com


This first happened just now on net-next, so +net maintainers.
This happened during boot, so no separate reproducer.


> FS-Cache: Loaded
> CacheFiles: Loaded
> pnp: PnP ACPI init
> pnp: PnP ACPI: found 7 devices
> ==
> BUG: KASAN: use-after-free in call_usermodehelper_exec_work+0x2d3/0x310
> kernel/umh.c:195
> Write of size 4 at addr 8801d63bd370 by task kworker/u4:0/6
>
> CPU: 0 PID: 6 Comm: kworker/u4:0 Not tainted 4.17.0-rc6+ #65
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Workqueue: events_unbound call_usermodehelper_exec_work
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>  print_address_description+0x6c/0x20b mm/kasan/report.c:256
>  kasan_report_error mm/kasan/report.c:354 [inline]
>  kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
>  __asan_report_store4_noabort+0x17/0x20 mm/kasan/report.c:437
>  call_usermodehelper_exec_work+0x2d3/0x310 kernel/umh.c:195
>  process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
>  worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
>  kthread+0x345/0x410 kernel/kthread.c:240
>  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> Allocated by task 1:
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
>  set_track mm/kasan/kasan.c:460 [inline]
>  kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
>  kmem_cache_alloc_trace+0x152/0x780 mm/slab.c:3620
>  kmalloc include/linux/slab.h:512 [inline]
>  kzalloc include/linux/slab.h:701 [inline]
>  call_usermodehelper_setup+0xe8/0x400 kernel/umh.c:382
> clocksource: acpi_pm: mask: 0xff max_cycles: 0xff, max_idle_ns:
> 2085701024 ns
>  kobject_uevent_env+0xb21/0x1110 lib/kobject_uevent.c:608
>  kobject_uevent+0x1f/0x30 lib/kobject_uevent.c:636
>  device_add+0xb01/0x16d0 drivers/base/core.c:1843
>  device_create_groups_vargs+0x1ff/0x270 drivers/base/core.c:2439
>  device_create_vargs drivers/base/core.c:2479 [inline]
>  device_create+0xd3/0x100 drivers/base/core.c:2515
>  chr_dev_init+0x120/0x158 drivers/char/mem.c:938
>  do_one_initcall+0x127/0x913 init/main.c:884
>  do_initcall_level init/main.c:952 [inline]
>  do_initcalls init/main.c:960 [inline]
>  do_basic_setup init/main.c:978 [inline]
>  kernel_init_freeable+0x49b/0x58e init/main.c:1135
>  kernel_init+0x11/0x1b3 init/main.c:1061
>  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> Freed by task 1296:
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> NET: Registered protocol family 2
>  set_track mm/kasan/kasan.c:460 [inline]
>  __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
>  kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
>  __cache_free mm/slab.c:3498 [inline]
>  kfree+0xd9/0x260 mm/slab.c:3813
>  call_usermodehelper_freeinfo kernel/umh.c:45 [inline]
>  umh_complete+0x7b/0x90 kernel/umh.c:59
>  call_usermodehelper_exec_async+0x6e8/0x9e0 kernel/umh.c:116
> tcp_listen_portaddr_hash hash table entries: 4096 (order: 6, 294912 bytes)
>  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> The buggy address belongs to the object at 8801d63bd300
>  which belongs to the cache kmalloc-192 of size 192
> The buggy address is located 112 bytes inside of
>  192-byte region [8801d63bd300, 8801d63bd3c0)
> The buggy address belongs to the page:
> TCP established hash table entries: 65536 (order: 7, 524288 bytes)
> page:ea000758ef40 count:1 mapcount:0 mapping:8801d63bd000 index:0x0
> flags: 0x2fffc000100(slab)
> raw: 02fffc000100 8801d63bd000  00010010
> TCP bind hash table entries: 65536 (order: 10, 4194304 bytes)
> raw: ea000759c2e0 ea0007521be0 8801da800040 
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
>  8801d63bd200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>  8801d63bd280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>>
>> 8801d63bd300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>
> TCP: Hash tables configured (established 65536 bind 65536)
>  ^
>  8801d63bd380: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>

Re: [PATCH 3/4] cpsw_switchdev: add switchdev support files

2018-05-26 Thread kbuild test robot

Hi Ilias,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net/master]
[also build test ERROR on v4.17-rc6]
[cannot apply to net-next/master]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Ilias-Apalodimas/RFC-CPSW-switchdev-mode/20180527-102334
config: arm-omap2plus_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   drivers/net/ethernet/ti/cpsw_switchdev.c: In function 
'cpsw_port_switchdev_init':
>> drivers/net/ethernet/ti/cpsw_switchdev.c:298:8: error: 'struct net_device' 
>> has no member named 'switchdev_ops'; did you mean 'netdev_ops'?
 ndev->switchdev_ops = _port_switchdev_ops;
   ^
   netdev_ops

vim +298 drivers/net/ethernet/ti/cpsw_switchdev.c

   295  
   296  void cpsw_port_switchdev_init(struct net_device *ndev)
   297  {
 > 298  ndev->switchdev_ops = _port_switchdev_ops;

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: [bpf-next PATCH] bpf: sockhash fix race with bpf_tcp_close and map delete

2018-05-26 Thread John Fastabend

On 05/26/2018 01:30 AM, Daniel Borkmann wrote:
> Hi John,
> 
> On 05/25/2018 07:37 PM, John Fastabend wrote:
>> syzbot reported two related splats, a use after free and null
>> pointer dereference, when a TCP socket is closed while the map is
>> also being removed.
>>
>> The psock keeps a reference to all map slots that have a reference
>> to the sock so that when the sock is closed we can clean up any
>> outstanding sock{map|hash} entries. This avoids pinning a sock
>> forever if the map owner fails to do proper cleanup. However, the
>> result is we have two paths that can free an entry in the map. Even
>> the comment in the sock{map|hash} tear down function, sock_hash_free()
>> notes this:
>>
>>  At this point no update, lookup or delete operations can happen.
>>  However, be aware we can still get a socket state event updates,
>>  and data ready callbacks that reference the psock from sk_user_data.
>>
>> Both removal paths omitted taking the hash bucket lock resulting
>> in the case where we have two references that are in the process
>> of being free'd.
>>
>> Reported-by: syzbot+a761b81c211794fa1...@syzkaller.appspotmail.com
>> Signed-off-by: John Fastabend 
> 

Fixes: 81110384441a ("bpf: sockmap, add hash map support")

Re: [PATCH] net: netsec: reduce DMA mask to 40 bits

2018-05-26 Thread Jassi Brar

On 26 May 2018 at 11:46, Ard Biesheuvel  wrote:
> On 26 May 2018 at 05:44, Jassi Brar  wrote:
>> On 26 May 2018 at 08:56, Jassi Brar  wrote:
>>> On 26 May 2018 at 01:07, Robin Murphy  wrote:
 On Sat, 26 May 2018 00:33:05 +0530
 Jassi Brar  wrote:

> On 25 May 2018 at 18:20, Ard Biesheuvel 
> wrote:
> > The netsec network controller IP can drive 64 address bits for DMA,
> > and the DMA mask is set accordingly in the driver. However, the
> > SynQuacer SoC, which is the only silicon incorporating this IP at
> > the moment, integrates this IP in a manner that leaves address bits
> > [63:40] unconnected.
> >
> > Up until now, this has not resulted in any problems, given that the
> > DDR controller doesn't decode those bits to begin with. However,
> > recent firmware updates for platforms incorporating this SoC allow
> > the IOMMU to be enabled, which does decode address bits [47:40],
> > and allocates top down from the IOVA space, producing DMA addresses
> > that have bits set that have been left unconnected.
> >
> > Both the DT and ACPI (IORT) descriptions of the platform take this
> > into account, and only describe a DMA address space of 40 bits
> > (using either dma-ranges DT properties, or DMA address limits in
> > IORT named component nodes). However, even though our IOMMU and bus
> > layers may take such limitations into account by setting a narrower
> > DMA mask when creating the platform device, the netsec probe()
> > entrypoint follows the common practice of setting the DMA mask
> > uncondionally, according to the capabilities of the IP block itself
> > rather than to its integration into the chip.
> >
> > It is currently unclear what the correct fix is here. We could hack
> > around it by only setting the DMA mask if it deviates from its
> > default value of DMA_BIT_MASK(32). However, this makes it
> > impossible for the bus layer to use DMA_BIT_MASK(32) as the bus
> > limit, and so it appears that a more comprehensive approach is
> > required to take DMA limits imposed by the SoC as a whole into
> > account.
> >
> > In the mean time, let's limit the DMA mask to 40 bits. Given that
> > there is currently only one SoC that incorporates this IP, this is
> > a reasonable approach that can be backported to -stable and buys us
> > some time to come up with a proper fix going forward.
> >
> I am sure you already thought about it, but why not let the platform
> specify the bit mask for the driver (via some "bus-width" property),
> to override the default 64 bit mask?

 Because lack of a property to describe the integration is not the
 problem. There are already at least two ways: the general DT/IORT
 properties for describing DMA addressing - which it would be a bit
 ungainly for a driver to parse for this reason, but not impossible -
>>> 
>>>
>>>
 and inferring it from a SoC-specific compatible - which is more
 appropriate, and what we happen to be able to do here.

>>> Sorry, I am not sure I follow. This patch changes from 64-bits default
>>> to 40-bits capability without checking for the parent SoC. If the next
>>> generation implements the full 64-bit or just 32-bit bus, we'll be
>>> back in the pit again. No?
>>>
>> Probably you meant we'll change the ethernet compatible string for
>> differently capable SoC. OK, but here it is more of integration issue
>> than controller version.
>>
>> Which makes me realise the extant compatible property for netsec is
>> not so correct (it embeds the platform name). So I am ok either way.
>>
>
> The platform in question has a dma-ranges DT property at the root
> level that only describes 40 bits' worth of DMA. Also, the ACPI
> description in the IORT table of the IOMMU integration of the netsec
> controller limits DMA to 40 bits. In the latter case, we actually
> enter netsec_probe() with the correct value already assigned to the
> DMA mask fields. (In the former case, the DMA limit is ignored
> entirely)
>
> In other words, we can already describe these SoC limitations and
> distinguish them from device limitations. The problem is that drivers
> ignore the existing values of DMA mask.
>
> Robin has volunteered to look into fixing this, but this cannot be
> done in a way that is suitable for -stable. In the mean time, we have
> a single platform using this network IP in the field that cannot
> upgrade its firmware to a version that describes the IOMMU, because
> the existing DMA layer code will start driving address bits that are
> correctly described as unconnected by the DT/ACPI tables.
>
> So as a a workaround, until Robin fixes things properly, let's reduce
> the DMA mask to 40 bits.
>
Yeah no

Re: [PATCH, net-next] qcom-emag: hide ACPI specific functions

2018-05-26 Thread Timur Tabi


On 5/25/18 7:22 PM, Timur Tabi wrote:


-phy->open = emac_sgmii_open;
-phy->close = emac_sgmii_close;
-phy->link_up = emac_sgmii_link_up;
-phy->link_down = emac_sgmii_link_down;

I'll take it look at it next week when I'm back in the office.


I posted a patch that fixes this problem and also retains device-tree
support.

--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.  Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

[PATCH] net: qcom/emac: fix device tree initialization

2018-05-26 Thread Timur Tabi

Commit "net: qcom/emac: Encapsulate sgmii ops under one structure"
introduced the sgmii_ops structure, but did not correctly initialize
it on device tree platforms.  This resulted in compiler warnings when
ACPI is not enabled.

Reported-by: Arnd Bergmann 
Signed-off-by: Timur Tabi 
---
 drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c 
b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 562420b834df..e78e5db39458 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -273,6 +273,14 @@ static int emac_sgmii_common_link_change(struct 
emac_adapter *adpt, bool linkup)
return 0;
 }
 
+static struct sgmii_ops fsm9900_ops = {
+   .init = emac_sgmii_init_fsm9900,
+   .open = emac_sgmii_common_open,
+   .close = emac_sgmii_common_close,
+   .link_change = emac_sgmii_common_link_change,
+   .reset = emac_sgmii_common_reset,
+};
+
 static struct sgmii_ops qdf2432_ops = {
.init = emac_sgmii_init_qdf2432,
.open = emac_sgmii_common_open,
@@ -281,6 +289,7 @@ static int emac_sgmii_common_link_change(struct 
emac_adapter *adpt, bool linkup)
.reset = emac_sgmii_common_reset,
 };
 
+#ifdef CONFIG_ACPI
 static struct sgmii_ops qdf2400_ops = {
.init = emac_sgmii_init_qdf2400,
.open = emac_sgmii_common_open,
@@ -288,6 +297,7 @@ static int emac_sgmii_common_link_change(struct 
emac_adapter *adpt, bool linkup)
.link_change = emac_sgmii_common_link_change,
.reset = emac_sgmii_common_reset,
 };
+#endif
 
 static int emac_sgmii_acpi_match(struct device *dev, void *data)
 {
@@ -335,11 +345,11 @@ static int emac_sgmii_acpi_match(struct device *dev, void 
*data)
 static const struct of_device_id emac_sgmii_dt_match[] = {
{
.compatible = "qcom,fsm9900-emac-sgmii",
-   .data = emac_sgmii_init_fsm9900,
+   .data = _ops,
},
{
.compatible = "qcom,qdf2432-emac-sgmii",
-   .data = emac_sgmii_init_qdf2432,
+   .data = _ops,
},
{}
 };
@@ -386,7 +396,7 @@ int emac_sgmii_config(struct platform_device *pdev, struct 
emac_adapter *adpt)
goto error_put_device;
}
 
-   phy->sgmii_ops->init = match->data;
+   phy->sgmii_ops = (struct sgmii_ops *)match->data;
}
 
/* Base address is the first address */
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.  Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

[PATCH net] VSOCK: check sk state before receive

2018-05-26 Thread Hangbin Liu

Since vmci_transport_recv_dgram_cb is a callback function and we access the
socket struct without holding the lock here, there is a possibility that
sk has been released and we use it again. This may cause a NULL pointer
dereference later, while receiving. Here is the call trace:

[  389.486319] BUG: unable to handle kernel NULL pointer dereference at 
0010
[  389.494148] PGD 0 P4D 0
[  389.496687] Oops:  [#1] SMP PTI
[  389.500170] Modules linked in: vhost_net vmw_vsock_vmci_transport tun vsock 
vhost vmw_vmci tap iptable_security iptable_raw iptable_mangle iptable_nat 
nf_conntrack_ipv4 nf_defrag_ipv4 nf_s
[  389.510984] Failed to add new resource (handle=0x2:0x2711), error: -22
[  389.543309] Failed to add new resource (handle=0x2:0x2711), error: -22
[  389.570936]  ttm drm crc32c_intel mptsas scsi_transport_sas serio_raw 
ata_piix mptscsih libata i2c_core mptbase bnx2 dm_mirror dm_region_hash dm_log 
dm_mod
[  389.597899] CPU: 3 PID: 113 Comm: kworker/3:2 Tainted: G  I   
4.17.0-rc6.latest+ #25
[  389.606673] Hardware name: Dell Inc. PowerEdge R710/0XDX06, BIOS 6.1.0 
10/18/2011
[  389.614158] Workqueue: events dg_delayed_dispatch [vmw_vmci]
[  389.619820] RIP: 0010:selinux_socket_sock_rcv_skb+0x46/0x270
[  389.625475] RSP: 0018:bcb5416b7ce0 EFLAGS: 00010293
[  389.630698] RAX:  RBX: 0028 RCX: 0007
[  389.637825] RDX:  RSI: 94a29feec500 RDI: bcb5416b7d18
[  389.644953] RBP: 94a29bd9a640 R08: 0001 R09: 94a187c03080
[  389.652080] R10: bcb5416b7d80 R11:  R12: bcb5416b7d18
[  389.659206] R13: 94a29feec500 R14: 94a2afda5e00 R15: 094a2afda5e0
[  389.666336] FS:  () GS:94a2afd8() 
knlGS:
[  389.674419] CS:  0010 DS:  ES:  CR0: 80050033
[  389.680160] CR2: 0010 CR3: 4320a003 CR4: 000206e0
[  389.687283] Call Trace:
[  389.689738]  ? __alloc_skb+0xa0/0x230
[  389.693407]  security_sock_rcv_skb+0x32/0x60
[  389.697679]  ? __alloc_skb+0xa0/0x230
[  389.701343]  sk_filter_trim_cap+0x4e/0x1f0
[  389.705442]  __sk_receive_skb+0x32/0x290
[  389.709372]  vmci_transport_recv_dgram_cb+0xa7/0xd0 
[vmw_vsock_vmci_transport]
[  389.716593]  dg_delayed_dispatch+0x22/0x50 [vmw_vmci]
[  389.721648]  process_one_work+0x1f2/0x4a0
[  389.725662]  worker_thread+0x38/0x4c0
[  389.729329]  ? process_one_work+0x4a0/0x4a0
[  389.733512]  kthread+0x12f/0x150
[  389.736743]  ? kthread_create_worker_on_cpu+0x90/0x90
[  389.741796]  ret_from_fork+0x35/0x40
[  389.745370] Code: 8b 04 25 28 00 00 00 48 89 44 24 70 31 c0 e8 42 15 db ff 
0f b7 5d 10 48 8b 85 70 02 00 00 4c 8d 64 24 38 b9 07 00 00 00 4c 89 e7 <44> 8b 
70 10 31 c0 41 89 df 41 83 e7 f7
[  389.764342] RIP: selinux_socket_sock_rcv_skb+0x46/0x270 RSP: bcb5416b7ce0
[  389.771467] CR2: 0010
[  389.774784] ---[ end trace e83d65291a15ae6a ]---

Fix it by checking sk state before using it.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Signed-off-by: Hangbin Liu 
---
 net/vmw_vsock/vmci_transport.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index a7a73ff..0d26040 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -612,6 +612,13 @@ static int vmci_transport_recv_dgram_cb(void *data, struct 
vmci_datagram *dg)
if (!vmci_transport_allow_dgram(vsk, dg->src.context))
return VMCI_ERROR_NO_ACCESS;
 
+   bh_lock_sock(sk);
+   if (sk->sk_state == TCP_CLOSE) {
+   bh_unlock_sock(sk);
+   return VMCI_ERROR_DATAGRAM_FAILED;
+   }
+   bh_unlock_sock(sk);
+
size = VMCI_DG_SIZE(dg);
 
/* Attach the packet to the socket's receive queue as an sk_buff. */
-- 
1.8.3.1

Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs

2018-05-26 Thread Neil Horman

On Sat, May 26, 2018 at 05:50:39PM +0200, Dmitry Vyukov wrote:
> On Sat, May 26, 2018 at 5:42 PM, Michael Tuexen
>  wrote:
> >> On 25. May 2018, at 21:13, Neil Horman  wrote:
> >>
> >> On Sat, May 26, 2018 at 01:41:02AM +0800, Xin Long wrote:
> >>> syzbot reported a rcu_sched self-detected stall on CPU which is caused
> >>> by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
> >>> value, hb_timer will get stuck there, as in its timer handler it starts
> >>> this timer again with this value, then goes to the timer handler again.
> >>>
> >>> This problem is there since very beginning, and thanks to Eric for the
> >>> reproducer shared from a syzbot mail.
> >>>
> >>> This patch fixes it by not allowing to set rto_min with a value below
> >>> 200 msecs, which is based on TCP's, by either setsockopt or sysctl.
> >>>
> >>> Reported-by: syzbot+3dcd59a1f907245f8...@syzkaller.appspotmail.com
> >>> Suggested-by: Marcelo Ricardo Leitner 
> >>> Signed-off-by: Xin Long 
> >>> ---
> >>> include/net/sctp/constants.h |  1 +
> >>> net/sctp/socket.c| 10 +++---
> >>> net/sctp/sysctl.c|  3 ++-
> >>> 3 files changed, 10 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
> >>> index 20ff237..2ee7a7b 100644
> >>> --- a/include/net/sctp/constants.h
> >>> +++ b/include/net/sctp/constants.h
> >>> @@ -277,6 +277,7 @@ enum { SCTP_MAX_GABS = 16 };
> >>> #define SCTP_RTO_INITIAL (3 * 1000)
> >>> #define SCTP_RTO_MIN (1 * 1000)
> >>> #define SCTP_RTO_MAX (60 * 1000)
> >>> +#define SCTP_RTO_HARD_MIN   200
> >>>
> >>> #define SCTP_RTO_ALPHA  3   /* 1/8 when converted to right 
> >>> shifts. */
> >>> #define SCTP_RTO_BETA   2   /* 1/4 when converted to right 
> >>> shifts. */
> >>> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> >>> index ae7e7c6..6ef12c7 100644
> >>> --- a/net/sctp/socket.c
> >>> +++ b/net/sctp/socket.c
> >>> @@ -3029,7 +3029,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, 
> >>> char __user *optval,
> >>>  * be changed.
> >>>  *
> >>>  */
> >>> -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, 
> >>> unsigned int optlen)
> >>> +static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval,
> >>> +   unsigned int optlen)
> >>> {
> >>>  struct sctp_rtoinfo rtoinfo;
> >>>  struct sctp_association *asoc;
> >>> @@ -3056,10 +3057,13 @@ static int sctp_setsockopt_rtoinfo(struct sock 
> >>> *sk, char __user *optval, unsigne
> >>>  else
> >>>  rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max;
> >>>
> >>> -if (rto_min)
> >>> +if (rto_min) {
> >>> +if (rto_min < SCTP_RTO_HARD_MIN)
> >>> +return -EINVAL;
> >>>  rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min;
> >>> -else
> >>> +} else {
> >>>  rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min;
> >>> +}
> >>>
> >>>  if (rto_min > rto_max)
> >>>  return -EINVAL;
> >>> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> >>> index 33ca5b7..7ec854a 100644
> >>> --- a/net/sctp/sysctl.c
> >>> +++ b/net/sctp/sysctl.c
> >>> @@ -52,6 +52,7 @@ static int rto_alpha_min = 0;
> >>> static int rto_beta_min = 0;
> >>> static int rto_alpha_max = 1000;
> >>> static int rto_beta_max = 1000;
> >>> +static int rto_hard_min = SCTP_RTO_HARD_MIN;
> >>>
> >>> static unsigned long max_autoclose_min = 0;
> >>> static unsigned long max_autoclose_max =
> >>> @@ -116,7 +117,7 @@ static struct ctl_table sctp_net_table[] = {
> >>>  .maxlen = sizeof(unsigned int),
> >>>  .mode   = 0644,
> >>>  .proc_handler   = proc_sctp_do_rto_min,
> >>> -.extra1 = ,
> >>> +.extra1 = _hard_min,
> >>>  .extra2 = _net.sctp.rto_max
> >>>  },
> >>>  {
> >>> --
> >>> 2.1.0
> >>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> >>> the body of a message to majord...@vger.kernel.org
> >>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>>
> >> Patch looks fine, you probably want to note this hard minimum in man(7) 
> >> sctp as
> >> well
> >>
> > I'm aware of some signalling networks which use RTO.min of smaller values 
> > than 200ms.
> > So could this be reduced?
> 
> Hi Michael,
> 
> What value do they use?
> 
> Xin, Neil, is there more principled way of ensuring that a timer won't
> cause a hard CPU stall? There are slow machines and there are slow
> kernels (in particular syzbot kernel has tons of debug configs
> enabled). 200ms _should_ not cause problems because we did not see
> them with tcp. But it's hard to say what's the low limit as we are
> trying to put a hard upper bound on execution time of

Re: [PATCH 3/6] ravb: remove custom .set_link_ksettings from ethtool ops

2018-05-26 Thread Sergei Shtylyov

On 05/24/2018 02:11 PM, Vladimir Zapolskiy wrote:

> The change replaces a custom implementation of .set_link_ksettings
> callback with a shared phy_ethtool_set_link_ksettings(), this fixes
> sleep in atomic context bug, which is encountered every time when link
> settings are changed by ethtool.

   Seeing it now...

> Now duplex mode setting is enforced in ravb_adjust_link() only, also
> now TX/RX is disabled when link is put down or modifications to E-MAC
> registers ECMR and GECMR are expected for both cases of checked and
> ignored link status pin state from E-MAC interrupt handler.
> 
> Signed-off-by: Vladimir Zapolskiy 
> ---
>  drivers/net/ethernet/renesas/ravb_main.c | 58 
> +---
>  1 file changed, 15 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/net/ethernet/renesas/ravb_main.c 
> b/drivers/net/ethernet/renesas/ravb_main.c
> index 3d91caa44176..0d811c02ff34 100644
> --- a/drivers/net/ethernet/renesas/ravb_main.c
> +++ b/drivers/net/ethernet/renesas/ravb_main.c
> @@ -980,6 +980,13 @@ static void ravb_adjust_link(struct net_device *ndev)
>   struct ravb_private *priv = netdev_priv(ndev);
>   struct phy_device *phydev = ndev->phydev;
>   bool new_state = false;
> + unsigned long flags;
> +
> + spin_lock_irqsave(>lock, flags);
> +
> + /* Disable TX and RX right over here, if E-MAC change is ignored */
> + if (priv->no_avb_link)
> + ravb_rcv_snd_disable(ndev);
>  
>   if (phydev->link) {
>   if (phydev->duplex != priv->duplex) {
> @@ -997,18 +1004,21 @@ static void ravb_adjust_link(struct net_device *ndev)
>   ravb_modify(ndev, ECMR, ECMR_TXF, 0);
>   new_state = true;
>   priv->link = phydev->link;
> - if (priv->no_avb_link)
> - ravb_rcv_snd_enable(ndev);
>   }
>   } else if (priv->link) {
>   new_state = true;
>   priv->link = 0;
>   priv->speed = 0;
>   priv->duplex = -1;
> - if (priv->no_avb_link)
> - ravb_rcv_snd_disable(ndev);
>   }
>  
> + /* Enable TX and RX right over here, if E-MAC change is ignored */
> + if (priv->no_avb_link && phydev->link)
> + ravb_rcv_snd_enable(ndev);
> +
> + mmiowb();
> + spin_unlock_irqrestore(>lock, flags);
> +

   I like this part. :-)

>   if (new_state && netif_msg_link(priv))
>   phy_print_status(phydev);
>  }
> @@ -1096,44 +1106,6 @@ static int ravb_phy_start(struct net_device *ndev)
>   return 0;
>  }
>  
> -static int ravb_set_link_ksettings(struct net_device *ndev,
> -const struct ethtool_link_ksettings *cmd)
> -{
> - struct ravb_private *priv = netdev_priv(ndev);
> - unsigned long flags;
> - int error;
> -
> - if (!ndev->phydev)
> - return -ENODEV;
> -
> - spin_lock_irqsave(>lock, flags);
> -
> - /* Disable TX and RX */
> - ravb_rcv_snd_disable(ndev);
> -
> - error = phy_ethtool_ksettings_set(ndev->phydev, cmd);
> - if (error)
> - goto error_exit;
> -
> - if (cmd->base.duplex == DUPLEX_FULL)
> - priv->duplex = 1;
> - else
> - priv->duplex = 0;
> -
> - ravb_set_duplex(ndev);
> -
> -error_exit:
> - mdelay(1);
> -
> - /* Enable TX and RX */
> - ravb_rcv_snd_enable(ndev);
> -
> - mmiowb();
> - spin_unlock_irqrestore(>lock, flags);
> -
> - return error;
> -}
> -

   But this part is clearly lumping it all together... 

[...]
> @@ -1357,7 +1329,7 @@ static const struct ethtool_ops ravb_ethtool_ops = {
>   .set_ringparam  = ravb_set_ringparam,
>   .get_ts_info= ravb_get_ts_info,
>   .get_link_ksettings = phy_ethtool_get_link_ksettings,
> - .set_link_ksettings = ravb_set_link_ksettings,
> + .set_link_ksettings = phy_ethtool_set_link_ksettings,

   Should have been a part of the final patch in the fix/enhancement chain...

>   .get_wol= ravb_get_wol,
>   .set_wol= ravb_set_wol,
>  };

MBR, Sergei

Re: [PATCH net] net: sched: check netif_xmit_frozen_or_stopped() in sch_direct_xmit()

2018-05-26 Thread John Fastabend

On 05/25/2018 12:46 PM, Song Liu wrote:
> On Fri, May 25, 2018 at 11:11 AM, Song Liu  wrote:
>> Summary:
>>
>> At the end of sch_direct_xmit(), we are in the else path of
>> !dev_xmit_complete(ret), which means ret == NETDEV_TX_OK. The following
>> condition will always fail and netif_xmit_frozen_or_stopped() is not
>> checked at all.
>>
>> if (ret && netif_xmit_frozen_or_stopped(txq))
>>  return false;
>>
>> In this patch, this condition is fixed as:
>>
>> if (netif_xmit_frozen_or_stopped(txq))
>>  return false;
>>
>> and further simplifies the code as:
>>
>> return !netif_xmit_frozen_or_stopped(txq);
>>
>> Fixes: 29b86cdac00a ("net: sched: remove remaining uses for qdisc_qlen in 
>> xmit path")
>> Cc: John Fastabend 
>> Cc: David S. Miller 
>> Signed-off-by: Song Liu 
>> ---
>>  net/sched/sch_generic.c | 5 +
>>  1 file changed, 1 insertion(+), 4 deletions(-)
>>
>> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
>> index 39c144b..8261d48 100644
>> --- a/net/sched/sch_generic.c
>> +++ b/net/sched/sch_generic.c
>> @@ -346,10 +346,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc 
>> *q,
>> return false;
>> }
>>
>> -   if (ret && netif_xmit_frozen_or_stopped(txq))
>> -   return false;
>> -
>> -   return true;
>> +   return !netif_xmit_frozen_or_stopped(txq);
>>  }
>>
>>  /*
>> --
>> 2.9.5
>>
> 
> Alexei and I discussed about this offline. We would like to share our
> discussion here to
> clarify the motivation.
> 
> Before 29b86cdac00a, ret in condition "if (ret &&
> netif_xmit_frozen_or_stopped()" is not
> the value from dev_hard_start_xmit(), because ret is overwritten by
> either qdisc_qlen()
> or dev_requeue_skb(). Therefore, 29b86cdac00a changed the behavior of
> this condition.
> 
> For ret from dev_hard_start_xmit(), I dig into the function and found
> it is from return value
> of ndo_start_xmit(). Per netdevice.h, ndo_start_xmit() should only
> return NETDEV_TX_OK
> or NETDEV_TX_BUSY. I survey many drivers, and they all follow the rule. The 
> only
> exception is vlan.
> 
> Given ret could only be NETDEV_TX_OK or NETDEV_TX_BUSY (ignore vlan for now),
> if it fails condition "if (!dev_xmit_complete(ret))", ret must be
> NETDEV_TX_OK == 0. So
> netif_xmit_frozen_or_stopped() will always be bypassed.
> 
> It is probably OK to ignore netif_xmit_frozen_or_stopped(), and return true 
> from
> sch_direct_xmit(), as I didn't see that break any functionality. But
> it is more like "correct
> by accident" to me. This is the motivation of my original patch.
> 
> Alexei pointed out that, the following condition is more like original logic:
> 
>   if (qdisc_qlen(q) && netif_xmit_frozen_or_stopped(txq))
> return false;
> 
> However, I think John would like to remove qdisc_qlen() from the tx
> path. I didn't see

Yep qdisc_qlen() is not very friendly for lockless users. At
some point we will get around to writing a distributed rate
limiter qdisc and it will be nice to not have to work-around
qdisc_qlen().

> any issue without the extra qdisc_qlen() check, so the patch is
> probably good AS-IS.
> 
> Please share your comments and feedback on this.
> 

Thanks for the detailed analysis. The above patch looks OK
to me. Actually I'm debating if we should just drop the check.
But, there looks to be a case where drivers return NETDEV_TX_OK
and then stop the queue because it is nearly overrun. By putting
the check there we stop early instead of doing some extra work
before realizing the driver ring is full.

Still this overrun case should be rare so removing the check
should be OK. Plus as you note its not been running anyways. My
current recommendation is just remove the check altogether.

Thanks,
John 

> Thanks,
> Song
>

[PATCH v4 net] stmmac: 802.1ad tag stripping support fix

2018-05-26 Thread Elad Nachman

stmmac reception handler calls stmmac_rx_vlan() to strip the vlan before 
calling napi_gro_receive().

The function assumes VLAN tagged frames are always tagged with 802.1Q protocol,
and assigns ETH_P_8021Q to the skb by hard-coding the parameter on call to 
__vlan_hwaccel_put_tag() .

This causes packets not to be passed to the VLAN slave if it was created with 
802.1AD protocol
(ip link add link eth0 eth0.100 type vlan proto 802.1ad id 100).

This fix passes the protocol from the VLAN header into __vlan_hwaccel_put_tag()
instead of using the hard-coded value of ETH_P_8021Q.
NETIF_F_HW_VLAN_CTAG_RX check was removed to be in line with the driver actual 
abilities.

Signed-off-by: Elad Nachman 

---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b65e2d1..284e6a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3293,17 +3293,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
 static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb)
 {
-   struct ethhdr *ehdr;
+   struct vlan_ethhdr *veth;
u16 vlanid;
+   __be16 vlan_proto;
 
-   if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) ==
-   NETIF_F_HW_VLAN_CTAG_RX &&
-   !__vlan_get_tag(skb, )) {
+   if (!__vlan_get_tag(skb, )) {
/* pop the vlan tag */
-   ehdr = (struct ethhdr *)skb->data;
-   memmove(skb->data + VLAN_HLEN, ehdr, ETH_ALEN * 2);
+   veth = (struct vlan_ethhdr *)skb->data;
+   vlan_proto = veth->h_vlan_proto;
+   memmove(skb->data + VLAN_HLEN, veth, ETH_ALEN * 2);
skb_pull(skb, VLAN_HLEN);
-   __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlanid);
+   __vlan_hwaccel_put_tag(skb, vlan_proto, vlanid);
}
 }
 
-- 
2.7.4

Re: [PATCH 4/7] x86: remove a stray reference to pci-nommu.c

2018-05-26 Thread Thomas Gleixner

On Fri, 25 May 2018, Christoph Hellwig wrote:

Subject should be: Documentation/x86: Remove .

please

> This is just the minimal workaround.  The file file is mostly either stale

file file?

> and/or duplicative of Documentation/admin-guide/kernel-parameters.txt,
> but that is much more work than I'm willing to do right now.

Yeah, this thing is on the todo list ...

> Signed-off-by: Christoph Hellwig 

Other than the above nits:

Reviewed-by: Thomas Gleixner 

> ---
>  Documentation/x86/x86_64/boot-options.txt | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/x86/x86_64/boot-options.txt 
> b/Documentation/x86/x86_64/boot-options.txt
> index b297c48389b9..153b3a57fba2 100644
> --- a/Documentation/x86/x86_64/boot-options.txt
> +++ b/Documentation/x86/x86_64/boot-options.txt
> @@ -187,9 +187,9 @@ PCI
>  
>  IOMMU (input/output memory management unit)
>  
> - Currently four x86-64 PCI-DMA mapping implementations exist:
> + Multiple x86-64 PCI-DMA mapping implementations exist, for example:
>  
> -   1. : use no hardware/software IOMMU at all
> +   1. : use no hardware/software IOMMU at all
>(e.g. because you have < 3 GB memory).
>Kernel boot message: "PCI-DMA: Disabling IOMMU"
>  
> -- 
> 2.17.0
> 
>

Re: [PATCH net-next v12 2/5] netvsc: refactor notifier/event handling code to use the failover framework

2018-05-26 Thread Samudrala, Sridhar


'On 5/26/2018 12:51 AM, Jiri Pirko wrote:

Sat, May 26, 2018 at 09:22:18AM CEST, sridhar.samudr...@intel.com wrote:

On 5/25/2018 4:28 PM, Stephen Hemminger wrote:

On Fri, 25 May 2018 16:11:47 -0700
"Samudrala, Sridhar"  wrote:


On 5/25/2018 3:34 PM, Stephen Hemminger wrote:

On Thu, 24 May 2018 09:55:14 -0700
Sridhar Samudrala  wrote:

--- a/drivers/net/hyperv/Kconfig
+++ b/drivers/net/hyperv/Kconfig
@@ -2,5 +2,6 @@ config HYPERV_NET
tristate "Microsoft Hyper-V virtual network driver"
depends on HYPERV
select UCS2_STRING
+   select FAILOVER

When I take a working kernel config, add the patches then do
make oldconfig

It is not autoselecting FAILOVER, it prompts me for it. This means
if user says no then a non-working netvsc device is made.

I see
  Generic failover module (FAILOVER) [M/y/?] (NEW)

So the user is given an option to either build as a Module or part of the
kernel. 'n' is not an option.

With most libraries there is no prompt at all.

Not sure what you meant by this.
Without any patches applied, i had a .config file with HYPERV_NET configured
as a module.
Then after applying the first 2 patches in this series, i did a
  make oldconfig
and i see the above prompt.

Are you saying that on some distros, 'make oldconfig creates a .config
file without any prompt and FAILOVER is not getting selected even when 
HYPERV_NET
is enabled?



Well the thing is that for a user, it makes no sense to select
"FAILOVER" by hand. It is a lib, so it should be only select it by a
user. It has no sense to have it turned on by hand - no lib user.
You can achieve that by simply removing "help" for the Kconfig
item. Same thing for "NET_FAILOVER".


I played around with the CONFIG options and i see that FAILOVER options do
get selected correctly when virtio-net/netvsc are enabled.  Even if the FAILOVER
is turned off by the user before the hyperv-net/virtio-net patches are applied,
it gets selected automatically when hyperv-net/virtio-net patches are applied 
and
enabled in config.

If we don't want to allow the user to see these options, then i think we need to
remove them from Kconfig files.  Just removing "help" doesn't seem to make a
difference.

Can we address any config issues (i don't see any at this point) as a bug-fix 
on top
of this series?

Re: [PATCH 4/6] sh_eth: remove custom .nway_reset from ethtool ops

2018-05-26 Thread Sergei Shtylyov

On 05/26/2018 09:46 PM, Sergei Shtylyov wrote:

>> The change fixes a sleep in atomic context issue, which can be
>> always triggered by running 'ethtool -r' command, because
>> phy_start_aneg() protects phydev fields by a mutex.
> 
>Again, I'm unable to reproduce this BUG()...

   Now I can! I started to suspect this check needs to be specifically enabled
under the Kernel Hacking menu, and it turned out to be so...

>> Another note is that the change implicitly replaces phy_start_aneg()
>> with a newer phy_restart_aneg().
>>
>> Signed-off-by: Vladimir Zapolskiy 
> [...]

MBR, Sergei

Re: [PATCH 4/6] sh_eth: remove custom .nway_reset from ethtool ops

2018-05-26 Thread Sergei Shtylyov

On 05/24/2018 02:11 PM, Vladimir Zapolskiy wrote:

> The change fixes a sleep in atomic context issue, which can be
> always triggered by running 'ethtool -r' command, because
> phy_start_aneg() protects phydev fields by a mutex.

   Again, I'm unable to reproduce this BUG()...

> Another note is that the change implicitly replaces phy_start_aneg()
> with a newer phy_restart_aneg().
> 
> Signed-off-by: Vladimir Zapolskiy 
[...]

MBR, Sergei

Re: [PATCH 1/6] ravb: remove custom .nway_reset from ethtool ops

2018-05-26 Thread Sergei Shtylyov

On 05/24/2018 02:11 PM, Vladimir Zapolskiy wrote:

> The change fixes a sleep in atomic context issue, which can be
> always triggered by running 'ethtool -r' command, because
> phy_start_aneg() protects phydev fields by a mutex.

   BTW, I was unable to trigger the BUG() with 'ethtool -r eth0' where 'eth0'
is EtherAVB. What am I doing wrong? :-)

MBR, Sergei

Greetings

2018-05-26 Thread Zeliha Omer Faruk




Hello

Greetings to you please i have a business proposal for you contact me
for more detailes asap thanks.

Best Regards,
Miss.Zeliha ömer faruk
Esentepe Mahallesi Büyükdere
Caddesi Kristal Kule Binasi
No:215
Sisli - Istanbul, Turkey

Re: [PATCH 0/6] ravb/sh_eth: fix sleep in atomic by reusing shared ethtool handlers

2018-05-26 Thread Sergei Shtylyov

On 05/25/2018 09:25 AM, Vladimir Zapolskiy wrote:

 For ages trivial changes to RAVB and SuperH ethernet links by means of
 standard 'ethtool' trigger a 'sleeping function called from invalid
 context' bug, to visualize it on r8a7795 ULCB:

   % ethtool -r eth0
   BUG: sleeping function called from invalid context at 
 kernel/locking/mutex.c:747
   in_atomic(): 1, irqs_disabled(): 128, pid: 554, name: ethtool
   INFO: lockdep is turned off.
   irq event stamp: 0
   hardirqs last  enabled at (0): [<>]   (null)
   hardirqs last disabled at (0): [] 
 copy_process.isra.7.part.8+0x2cc/0x1918
   softirqs last  enabled at (0): [] 
 copy_process.isra.7.part.8+0x2cc/0x1918
   softirqs last disabled at (0): [<>]   (null)
   CPU: 5 PID: 554 Comm: ethtool Not tainted 4.17.0-rc4-arm64-renesas+ #33
   Hardware name: Renesas H3ULCB board based on r8a7795 ES2.0+ (DT)
   Call trace:
dump_backtrace+0x0/0x198
show_stack+0x24/0x30
dump_stack+0xb8/0xf4
___might_sleep+0x1c8/0x1f8
__might_sleep+0x58/0x90
__mutex_lock+0x50/0x890
mutex_lock_nested+0x3c/0x50
phy_start_aneg_priv+0x38/0x180
phy_start_aneg+0x24/0x30
ravb_nway_reset+0x3c/0x68
dev_ethtool+0x3dc/0x2338
dev_ioctl+0x19c/0x490
sock_do_ioctl+0xe0/0x238
sock_ioctl+0x254/0x460
do_vfs_ioctl+0xb0/0x918
ksys_ioctl+0x50/0x80
sys_ioctl+0x34/0x48
__sys_trace_return+0x0/0x4

 The root cause is that an attempt to modify ECMR and GECMR registers
 only when RX/TX function is disabled was too overcomplicated in its
 original implementation, also processing of an optional Link Change
 interrupt added even more complexity, as a result the implementation
 was error prone.

 The new locking scheme is confirmed to be correct by dumping driver
 specific and generic PHY framework function calls with aid of ftrace
 while running more or less advanced tests.

 Please note that sh_eth patches from the series were built-tested only.

 On purpose I do not add Fixes tags, the reused PHY handlers were added
 way later than the fixed problems were firstly found in the drivers.
>>>
>>>I think you went one step too far with these fixes. On the first glance,
>>> the real fixes are to remove grabbing/releasing the spinlock for the 
>>> duration
>>> of the phylib calls. Am I right? If so, making use of the new phylib APIs
>>> would be a further enhancement, it's not needed for fixing the splats per 
>>> se...
>>
>>Note that I hadn't looked at the patches #3/#6 at the time of writing 
>> this;
>> those seem to be more complicated than the rest.
> 
> Right, the simplistic approach of just removing the held spinlock does
> not fit well into the overall lame locking model found in the driver.

   Yet you only try fixing it in the patches #3 and #6. I was talking about
the patches #1 and #4 mostly (#2 and #5 turned out to be non-fixes).

> The thing is that I would prefer to exhibit 'remove custom callbacks'
> side of the changes as it is done now, and fixing severe 'invalid contex'
> bugs is left as a valuable side effect. I may attempt to find enough
> free time to follow your instructions, but frankly speaking I don't
> see it beneficial to split a single good all-sufficient change into
> three or more: removal of spinlocks, replacement of phy_start_aneg(),
> then a non-functional clean-up.
   Yes, I would prefer these step-by-step changes.

> Bikeshedding isn't my preference,

   This is not about bikeshedding. What you are trying to do clearly
violates the 2 basic principles of the kernel development: "don't mix
fixes and enhancements" and "do one thing per patch". 

> but a report about technical flaws related to the published changes
> is appreciated, otherwise let me ask you to accept the changes as is,
> secondary optimizations can be done on top of them.

   No, I'll certainly have to NAK patches #1/#3 in their current form.
I'm yet to review patches #3/#6... anyway, if you lack the time to do things
properly, I'll have to take this burden on my shoulders (giving you credits).
Yet I'm basically is in the same situation as you -- I have to spend my copiuos
free time on the large patch sets (like yours) and I'm still having some 
cleanups
to sh_eth cooking here (which I'll most probably have to defer)...

> --
> With best wishes,
> Vladimir

MBR, Sergei

Re: [PATCH 2/6] ravb: remove custom .get_link_ksettings from ethtool ops

2018-05-26 Thread Sergei Shtylyov

On 05/24/2018 02:11 PM, Vladimir Zapolskiy wrote:

> The change replaces a custom implementation of .get_link_ksettings
> callback with a shared phy_ethtool_get_link_ksettings(), note that

> >lock wrapping is not needed, because the lock does not
> serialize access to phydev fields.

   No BUG() here, AFAICT. But then this is not a fix but an enhancement.
And I would have done that in 2 steps: 1st removing the spinlock code
and the 2nd removing the custom method implementation. 

> Signed-off-by: Vladimir Zapolskiy 
[...]

MBR, Sergei

Re: [PATCH 1/6] ravb: remove custom .nway_reset from ethtool ops

2018-05-26 Thread Sergei Shtylyov

Hello.

   A formal patch review this time...

On 05/24/2018 02:11 PM, Vladimir Zapolskiy wrote:

> The change fixes a sleep in atomic context issue, which can be
> always triggered by running 'ethtool -r' command, because
> phy_start_aneg() protects phydev fields by a mutex.

   OK so far...

> Another note is that the change implicitly replaces phy_start_aneg()
> with a newer phy_restart_aneg().

   Why? Is this necessary to fix the BUG()?

> Signed-off-by: Vladimir Zapolskiy 
> ---
>  drivers/net/ethernet/renesas/ravb_main.c | 17 +
>  1 file changed, 1 insertion(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/renesas/ravb_main.c 
> b/drivers/net/ethernet/renesas/ravb_main.c
> index 68f122140966..4a043eb0e2aa 100644
> --- a/drivers/net/ethernet/renesas/ravb_main.c
> +++ b/drivers/net/ethernet/renesas/ravb_main.c
> @@ -1150,21 +1150,6 @@ static int ravb_set_link_ksettings(struct net_device 
> *ndev,
>   return error;
>  }
>  
> -static int ravb_nway_reset(struct net_device *ndev)
> -{
> - struct ravb_private *priv = netdev_priv(ndev);
> - int error = -ENODEV;
> - unsigned long flags;
> -
> - if (ndev->phydev) {
> - spin_lock_irqsave(>lock, flags);

   OK, removing spin_lock_irqsave() fixes the BUG()...
   Not sure what we rotect against here anyway, MAC interrupts?

> - error = phy_start_aneg(ndev->phydev);
> - spin_unlock_irqrestore(>lock, flags);
> - }
> -
> - return error;
> -}
> -
>  static u32 ravb_get_msglevel(struct net_device *ndev)
>  {
>   struct ravb_private *priv = netdev_priv(ndev);
> @@ -1377,7 +1362,7 @@ static int ravb_set_wol(struct net_device *ndev, struct 
> ethtool_wolinfo *wol)
>  }
>  
>  static const struct ethtool_ops ravb_ethtool_ops = {
> - .nway_reset = ravb_nway_reset,
> + .nway_reset = phy_ethtool_nway_reset,

   What does this fix?

>   .get_msglevel   = ravb_get_msglevel,
>   .set_msglevel   = ravb_set_msglevel,
>   .get_link   = ethtool_op_get_link,

MBR, Sergei

Re: [PATCH 1/6] ravb: remove custom .nway_reset from ethtool ops

2018-05-26 Thread Sergei Shtylyov

Hello.

   A formal patch review this time...

On 05/24/2018 02:11 PM, Vladimir Zapolskiy wrote:

> The change fixes a sleep in atomic context issue, which can be
> always triggered by running 'ethtool -r' command, because
> phy_start_aneg() protects phydev fields by a mutex.

   OK so far...

> Another note is that the change implicitly replaces phy_start_aneg()
> with a newer phy_restart_aneg().

   Why? Is this necessary to fix the BUG()?

> Signed-off-by: Vladimir Zapolskiy 
> ---
>  drivers/net/ethernet/renesas/ravb_main.c | 17 +
>  1 file changed, 1 insertion(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/renesas/ravb_main.c 
> b/drivers/net/ethernet/renesas/ravb_main.c
> index 68f122140966..4a043eb0e2aa 100644
> --- a/drivers/net/ethernet/renesas/ravb_main.c
> +++ b/drivers/net/ethernet/renesas/ravb_main.c
> @@ -1150,21 +1150,6 @@ static int ravb_set_link_ksettings(struct net_device 
> *ndev,
>   return error;
>  }
>  
> -static int ravb_nway_reset(struct net_device *ndev)
> -{
> - struct ravb_private *priv = netdev_priv(ndev);
> - int error = -ENODEV;
> - unsigned long flags;
> -
> - if (ndev->phydev) {
> - spin_lock_irqsave(>lock, flags);

   OK, removing spin_lock_irqsave() fixes the BUG()...
   Not sure what we rotect against here anyway, MAC interrupts?

> - error = phy_start_aneg(ndev->phydev);
> - spin_unlock_irqrestore(>lock, flags);
> - }
> -
> - return error;
> -}
> -
>  static u32 ravb_get_msglevel(struct net_device *ndev)
>  {
>   struct ravb_private *priv = netdev_priv(ndev);
> @@ -1377,7 +1362,7 @@ static int ravb_set_wol(struct net_device *ndev, struct 
> ethtool_wolinfo *wol)
>  }
>  
>  static const struct ethtool_ops ravb_ethtool_ops = {
> - .nway_reset = ravb_nway_reset,
> + .nway_reset = phy_ethtool_nway_reset,

   What does this fix?

>   .get_msglevel   = ravb_get_msglevel,
>   .set_msglevel   = ravb_set_msglevel,
>   .get_link   = ethtool_op_get_link,

MBR, Sergei

Re: [PATCH] crypto: chtls: generic handling of data and hdr

2018-05-26 Thread Herbert Xu

On Mon, May 14, 2018 at 04:41:38PM +0530, Atul Gupta wrote:
> removed redundant check and made TLS PDU and header recv
> handling common as received from HW.
> Ensure that only tls header is read in cpl_rx_tls_cmp
> read-ahead and skb is freed when entire data is processed.
> 
> Signed-off-by: Atul Gupta 
> Signed-off-by: Harsh Jain 

Patch applied.  Thanks.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs

2018-05-26 Thread Dmitry Vyukov

On Sat, May 26, 2018 at 5:42 PM, Michael Tuexen
 wrote:
>> On 25. May 2018, at 21:13, Neil Horman  wrote:
>>
>> On Sat, May 26, 2018 at 01:41:02AM +0800, Xin Long wrote:
>>> syzbot reported a rcu_sched self-detected stall on CPU which is caused
>>> by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
>>> value, hb_timer will get stuck there, as in its timer handler it starts
>>> this timer again with this value, then goes to the timer handler again.
>>>
>>> This problem is there since very beginning, and thanks to Eric for the
>>> reproducer shared from a syzbot mail.
>>>
>>> This patch fixes it by not allowing to set rto_min with a value below
>>> 200 msecs, which is based on TCP's, by either setsockopt or sysctl.
>>>
>>> Reported-by: syzbot+3dcd59a1f907245f8...@syzkaller.appspotmail.com
>>> Suggested-by: Marcelo Ricardo Leitner 
>>> Signed-off-by: Xin Long 
>>> ---
>>> include/net/sctp/constants.h |  1 +
>>> net/sctp/socket.c| 10 +++---
>>> net/sctp/sysctl.c|  3 ++-
>>> 3 files changed, 10 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
>>> index 20ff237..2ee7a7b 100644
>>> --- a/include/net/sctp/constants.h
>>> +++ b/include/net/sctp/constants.h
>>> @@ -277,6 +277,7 @@ enum { SCTP_MAX_GABS = 16 };
>>> #define SCTP_RTO_INITIAL (3 * 1000)
>>> #define SCTP_RTO_MIN (1 * 1000)
>>> #define SCTP_RTO_MAX (60 * 1000)
>>> +#define SCTP_RTO_HARD_MIN   200
>>>
>>> #define SCTP_RTO_ALPHA  3   /* 1/8 when converted to right shifts. 
>>> */
>>> #define SCTP_RTO_BETA   2   /* 1/4 when converted to right shifts. 
>>> */
>>> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
>>> index ae7e7c6..6ef12c7 100644
>>> --- a/net/sctp/socket.c
>>> +++ b/net/sctp/socket.c
>>> @@ -3029,7 +3029,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, 
>>> char __user *optval,
>>>  * be changed.
>>>  *
>>>  */
>>> -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, 
>>> unsigned int optlen)
>>> +static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval,
>>> +   unsigned int optlen)
>>> {
>>>  struct sctp_rtoinfo rtoinfo;
>>>  struct sctp_association *asoc;
>>> @@ -3056,10 +3057,13 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, 
>>> char __user *optval, unsigne
>>>  else
>>>  rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max;
>>>
>>> -if (rto_min)
>>> +if (rto_min) {
>>> +if (rto_min < SCTP_RTO_HARD_MIN)
>>> +return -EINVAL;
>>>  rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min;
>>> -else
>>> +} else {
>>>  rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min;
>>> +}
>>>
>>>  if (rto_min > rto_max)
>>>  return -EINVAL;
>>> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
>>> index 33ca5b7..7ec854a 100644
>>> --- a/net/sctp/sysctl.c
>>> +++ b/net/sctp/sysctl.c
>>> @@ -52,6 +52,7 @@ static int rto_alpha_min = 0;
>>> static int rto_beta_min = 0;
>>> static int rto_alpha_max = 1000;
>>> static int rto_beta_max = 1000;
>>> +static int rto_hard_min = SCTP_RTO_HARD_MIN;
>>>
>>> static unsigned long max_autoclose_min = 0;
>>> static unsigned long max_autoclose_max =
>>> @@ -116,7 +117,7 @@ static struct ctl_table sctp_net_table[] = {
>>>  .maxlen = sizeof(unsigned int),
>>>  .mode   = 0644,
>>>  .proc_handler   = proc_sctp_do_rto_min,
>>> -.extra1 = ,
>>> +.extra1 = _hard_min,
>>>  .extra2 = _net.sctp.rto_max
>>>  },
>>>  {
>>> --
>>> 2.1.0
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
>>> the body of a message to majord...@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>> Patch looks fine, you probably want to note this hard minimum in man(7) sctp 
>> as
>> well
>>
> I'm aware of some signalling networks which use RTO.min of smaller values 
> than 200ms.
> So could this be reduced?

Hi Michael,

What value do they use?

Xin, Neil, is there more principled way of ensuring that a timer won't
cause a hard CPU stall? There are slow machines and there are slow
kernels (in particular syzbot kernel has tons of debug configs
enabled). 200ms _should_ not cause problems because we did not see
them with tcp. But it's hard to say what's the low limit as we are
trying to put a hard upper bound on execution time of a complex
section of code. Is there something like cond_resched for timers?

Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs

2018-05-26 Thread Michael Tuexen

> On 25. May 2018, at 21:13, Neil Horman  wrote:
> 
> On Sat, May 26, 2018 at 01:41:02AM +0800, Xin Long wrote:
>> syzbot reported a rcu_sched self-detected stall on CPU which is caused
>> by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
>> value, hb_timer will get stuck there, as in its timer handler it starts
>> this timer again with this value, then goes to the timer handler again.
>> 
>> This problem is there since very beginning, and thanks to Eric for the
>> reproducer shared from a syzbot mail.
>> 
>> This patch fixes it by not allowing to set rto_min with a value below
>> 200 msecs, which is based on TCP's, by either setsockopt or sysctl.
>> 
>> Reported-by: syzbot+3dcd59a1f907245f8...@syzkaller.appspotmail.com
>> Suggested-by: Marcelo Ricardo Leitner 
>> Signed-off-by: Xin Long 
>> ---
>> include/net/sctp/constants.h |  1 +
>> net/sctp/socket.c| 10 +++---
>> net/sctp/sysctl.c|  3 ++-
>> 3 files changed, 10 insertions(+), 4 deletions(-)
>> 
>> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
>> index 20ff237..2ee7a7b 100644
>> --- a/include/net/sctp/constants.h
>> +++ b/include/net/sctp/constants.h
>> @@ -277,6 +277,7 @@ enum { SCTP_MAX_GABS = 16 };
>> #define SCTP_RTO_INITIAL (3 * 1000)
>> #define SCTP_RTO_MIN (1 * 1000)
>> #define SCTP_RTO_MAX (60 * 1000)
>> +#define SCTP_RTO_HARD_MIN   200
>> 
>> #define SCTP_RTO_ALPHA  3   /* 1/8 when converted to right shifts. */
>> #define SCTP_RTO_BETA   2   /* 1/4 when converted to right shifts. */
>> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
>> index ae7e7c6..6ef12c7 100644
>> --- a/net/sctp/socket.c
>> +++ b/net/sctp/socket.c
>> @@ -3029,7 +3029,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, 
>> char __user *optval,
>>  * be changed.
>>  *
>>  */
>> -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, 
>> unsigned int optlen)
>> +static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval,
>> +   unsigned int optlen)
>> {
>>  struct sctp_rtoinfo rtoinfo;
>>  struct sctp_association *asoc;
>> @@ -3056,10 +3057,13 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, 
>> char __user *optval, unsigne
>>  else
>>  rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max;
>> 
>> -if (rto_min)
>> +if (rto_min) {
>> +if (rto_min < SCTP_RTO_HARD_MIN)
>> +return -EINVAL;
>>  rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min;
>> -else
>> +} else {
>>  rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min;
>> +}
>> 
>>  if (rto_min > rto_max)
>>  return -EINVAL;
>> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
>> index 33ca5b7..7ec854a 100644
>> --- a/net/sctp/sysctl.c
>> +++ b/net/sctp/sysctl.c
>> @@ -52,6 +52,7 @@ static int rto_alpha_min = 0;
>> static int rto_beta_min = 0;
>> static int rto_alpha_max = 1000;
>> static int rto_beta_max = 1000;
>> +static int rto_hard_min = SCTP_RTO_HARD_MIN;
>> 
>> static unsigned long max_autoclose_min = 0;
>> static unsigned long max_autoclose_max =
>> @@ -116,7 +117,7 @@ static struct ctl_table sctp_net_table[] = {
>>  .maxlen = sizeof(unsigned int),
>>  .mode   = 0644,
>>  .proc_handler   = proc_sctp_do_rto_min,
>> -.extra1 = ,
>> +.extra1 = _hard_min,
>>  .extra2 = _net.sctp.rto_max
>>  },
>>  {
>> -- 
>> 2.1.0
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> Patch looks fine, you probably want to note this hard minimum in man(7) sctp 
> as
> well
> 
I'm aware of some signalling networks which use RTO.min of smaller values than 
200ms.
So could this be reduced?

Best regards
Michael
> Acked-by: Neil Horman 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: INFO: rcu detected stall in skb_free_head

2018-05-26 Thread Dmitry Vyukov

On Sun, Apr 29, 2018 at 6:33 PM, syzbot
 wrote:
> Hello,
>
> syzbot hit the following crash on upstream commit
> a27fc14219f2e3c4a46ba9177b04d9b52c875532 (Mon Apr 16 21:07:39 2018 +)
> Merge branch 'parisc-4.17-3' of
> git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
> syzbot dashboard link:
> https://syzkaller.appspot.com/bug?extid=cac7c17ec0aca89d3c45
>
> Unfortunately, I don't have any reproducer for this crash yet.
> Raw console output:
> https://syzkaller.appspot.com/x/log.txt?id=6517400396627968
> Kernel config:
> https://syzkaller.appspot.com/x/.config?id=-5914490758943236750
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+cac7c17ec0aca89d3...@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> INFO: rcu_sched self-detected stall on CPU
> 1-...!: (117917 ticks this GP) idle=036/1/4611686018427387906
> softirq=114416/114416 fqs=32
>  (t=125000 jiffies g=60712 c=60711 q=345938)
> rcu_sched kthread starved for 124847 jiffies! g60712 c60711 f0x2
> RCU_GP_WAIT_FQS(3) ->state=0x0 ->cpu=0
> RCU grace-period kthread stack dump:
> rcu_sched   R  running task23592 9  2 0x8000
> Call Trace:
>  context_switch kernel/sched/core.c:2848 [inline]
>  __schedule+0x801/0x1e30 kernel/sched/core.c:3490
>  schedule+0xef/0x430 kernel/sched/core.c:3549
>  schedule_timeout+0x138/0x240 kernel/time/timer.c:1801
>  rcu_gp_kthread+0x6b5/0x1940 kernel/rcu/tree.c:2231
>  kthread+0x345/0x410 kernel/kthread.c:238
>  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
> NMI backtrace for cpu 1
> CPU: 1 PID: 24 Comm: kworker/1:1 Not tainted 4.17.0-rc1+ #6
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Workqueue: events rht_deferred_worker
> Call Trace:
>  
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>  nmi_cpu_backtrace.cold.4+0x19/0xce lib/nmi_backtrace.c:103
>  nmi_trigger_cpumask_backtrace+0x151/0x192 lib/nmi_backtrace.c:62
>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>  trigger_single_cpu_backtrace include/linux/nmi.h:156 [inline]
>  rcu_dump_cpu_stacks+0x175/0x1c2 kernel/rcu/tree.c:1376
>  print_cpu_stall kernel/rcu/tree.c:1525 [inline]
>  check_cpu_stall.isra.61.cold.80+0x36c/0x59a kernel/rcu/tree.c:1593
>  __rcu_pending kernel/rcu/tree.c:3356 [inline]
>  rcu_pending kernel/rcu/tree.c:3401 [inline]
>  rcu_check_callbacks+0x21b/0xad0 kernel/rcu/tree.c:2763
>  update_process_times+0x2d/0x70 kernel/time/timer.c:1636
>  tick_sched_handle+0x9f/0x180 kernel/time/tick-sched.c:173
>  tick_sched_timer+0x45/0x130 kernel/time/tick-sched.c:1283
>  __run_hrtimer kernel/time/hrtimer.c:1386 [inline]
>  __hrtimer_run_queues+0x3e3/0x10a0 kernel/time/hrtimer.c:1448
>  hrtimer_interrupt+0x286/0x650 kernel/time/hrtimer.c:1506
>  local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1025 [inline]
>  smp_apic_timer_interrupt+0x15d/0x710 arch/x86/kernel/apic/apic.c:1050
>  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
> RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:783
> [inline]
> RIP: 0010:kfree+0x124/0x260 mm/slab.c:3814
> RSP: 0018:8801db105450 EFLAGS: 0286 ORIG_RAX: ff13
> RAX: 0007 RBX: 88006c118040 RCX: 11003b3059e7
> RDX:  RSI: 8801d982cf90 RDI: 0286
> RBP: 8801db105470 R08: 8801d982ce78 R09: 0002
> R10: 8801d982c640 R11:  R12: 0286
> R13: 8801dac00ac0 R14: 85bd7b69 R15: 88006c0f8180
>  skb_free_head+0x99/0xc0 net/core/skbuff.c:550
>  skb_release_data+0x690/0x860 net/core/skbuff.c:570
>  skb_release_all+0x4a/0x60 net/core/skbuff.c:627
>  __kfree_skb net/core/skbuff.c:641 [inline]
>  kfree_skb+0x195/0x560 net/core/skbuff.c:659
>  enqueue_to_backlog+0x2fc/0xc90 net/core/dev.c:3968
>  netif_rx_internal+0x14d/0xae0 net/core/dev.c:4181
>  netif_rx+0xba/0x400 net/core/dev.c:4206
>  loopback_xmit+0x283/0x741 drivers/net/loopback.c:91
>  __netdev_start_xmit include/linux/netdevice.h:4087 [inline]
>  netdev_start_xmit include/linux/netdevice.h:4096 [inline]
>  xmit_one net/core/dev.c:3053 [inline]
>  dev_hard_start_xmit+0x264/0xc10 net/core/dev.c:3069
>  __dev_queue_xmit+0x2724/0x34c0 net/core/dev.c:3584
>  dev_queue_xmit+0x17/0x20 net/core/dev.c:3617
>  neigh_hh_output include/net/neighbour.h:472 [inline]
>  neigh_output include/net/neighbour.h:480 [inline]
>  ip_finish_output2+0x1046/0x1840 net/ipv4/ip_output.c:229
>  ip_finish_output+0x828/0xf80 net/ipv4/ip_output.c:317
>  NF_HOOK_COND include/linux/netfilter.h:277 [inline]
>  ip_output+0x21b/0x850 net/ipv4/ip_output.c:405
>  dst_output include/net/dst.h:444

Re: INFO: rcu detected stall in kmem_cache_alloc_node_trace

2018-05-26 Thread Dmitry Vyukov

On Mon, Apr 30, 2018 at 8:05 PM, syzbot
 wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:17dec0a94915 Merge branch 'userns-linus' of
> git://git.kerne...
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?id=6093051722203136
> kernel config:
> https://syzkaller.appspot.com/x/.config?id=-2735707888269579554
> dashboard link: https://syzkaller.appspot.com/bug?extid=deec965c578bb9b81613
> compiler:   gcc (GCC) 8.0.1 20180301 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+deec965c578bb9b81...@syzkaller.appspotmail.com
>
> sctp: [Deprecated]: syz-executor3 (pid 10218) Use of int in max_burst socket
> option.
> Use struct sctp_assoc_value instead
> sctp: [Deprecated]: syz-executor3 (pid 10218) Use of int in max_burst socket
> option.
> Use struct sctp_assoc_value instead
> random: crng init done
> INFO: rcu_sched self-detected stall on CPU
> 0-: (120712 ticks this GP) idle=ac6/1/4611686018427387908
> softirq=31693/31693 fqs=31173
>  (t=125001 jiffies g=17039 c=17038 q=303419)
> NMI backtrace for cpu 0
> CPU: 0 PID: 10218 Comm: syz-executor3 Not tainted 4.16.0+ #1
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x1b9/0x29f lib/dump_stack.c:53
>  nmi_cpu_backtrace.cold.4+0x19/0xce lib/nmi_backtrace.c:103
>  nmi_trigger_cpumask_backtrace+0x151/0x192 lib/nmi_backtrace.c:62
>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>  trigger_single_cpu_backtrace include/linux/nmi.h:156 [inline]
>  rcu_dump_cpu_stacks+0x175/0x1c2 kernel/rcu/tree.c:1376
>  print_cpu_stall kernel/rcu/tree.c:1525 [inline]
>  check_cpu_stall.isra.61.cold.80+0x36c/0x59a kernel/rcu/tree.c:1593
>  __rcu_pending kernel/rcu/tree.c:3356 [inline]
>  rcu_pending kernel/rcu/tree.c:3401 [inline]
>  rcu_check_callbacks+0x21b/0xad0 kernel/rcu/tree.c:2763
>  update_process_times+0x2d/0x70 kernel/time/timer.c:1636
>  tick_sched_handle+0xa0/0x180 kernel/time/tick-sched.c:162
>  tick_sched_timer+0x42/0x130 kernel/time/tick-sched.c:1170
>  __run_hrtimer kernel/time/hrtimer.c:1349 [inline]
>  __hrtimer_run_queues+0x3e3/0x10a0 kernel/time/hrtimer.c:1411
>  hrtimer_interrupt+0x2f3/0x750 kernel/time/hrtimer.c:1469
>  local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1025 [inline]
>  smp_apic_timer_interrupt+0x15d/0x710 arch/x86/kernel/apic/apic.c:1050
>  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:862
> RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:783
> [inline]
> RIP: 0010:lock_is_held_type+0x18b/0x210 kernel/locking/lockdep.c:3960
> RSP: 0018:8801db006400 EFLAGS: 0282 ORIG_RAX: ff12
> RAX: dc00 RBX: 0282 RCX: 
> RDX: 11162e55 RSI: 88b90c60 RDI: 0282
> RBP: 8801db006420 R08: ed003b6046c3 R09: ed003b6046c2
> R10: ed003b6046c2 R11: 8801db023613 R12: 8801b2f623c0
> R13:  R14: 88009932bb00 R15: 
>  lock_is_held include/linux/lockdep.h:344 [inline]
>  rcu_read_lock_sched_held+0x108/0x120 kernel/rcu/update.c:117
>  trace_kmalloc_node include/trace/events/kmem.h:100 [inline]
>  kmem_cache_alloc_node_trace+0x34e/0x770 mm/slab.c:3652
>  __do_kmalloc_node mm/slab.c:3669 [inline]
>  __kmalloc_node_track_caller+0x33/0x70 mm/slab.c:3684
>  __kmalloc_reserve.isra.38+0x3a/0xe0 net/core/skbuff.c:137
>  __alloc_skb+0x14d/0x780 net/core/skbuff.c:205
>  alloc_skb include/linux/skbuff.h:987 [inline]
>  sctp_packet_transmit+0x45e/0x3ba0 net/sctp/output.c:585
>  sctp_outq_flush+0x1373/0x4370 net/sctp/outqueue.c:1197
>  sctp_outq_uncork+0x6a/0x80 net/sctp/outqueue.c:776
>  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1820 [inline]
>  sctp_side_effects net/sctp/sm_sideeffect.c:1220 [inline]
>  sctp_do_sm+0x596/0x7160 net/sctp/sm_sideeffect.c:1191
>  sctp_generate_heartbeat_event+0x218/0x450 net/sctp/sm_sideeffect.c:406

#syz fix: sctp: not allow to set rto_min with a value below 200 msecs


>  call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
>  expire_timers kernel/time/timer.c:1363 [inline]
>  __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
>  run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
>  __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
>  invoke_softirq kernel/softirq.c:365 [inline]
>  irq_exit+0x1d1/0x200 kernel/softirq.c:405
>  exiting_irq arch/x86/include/asm/apic.h:525 [inline]
>  smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
>  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:862
>  
> RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:783
> [inline]
> RIP: 0010:console_unlock+0xcdf/0x1100 kernel/printk/printk.c:2403
> RSP:

Re: INFO: rcu detected stall in sctp_generate_heartbeat_event

2018-05-26 Thread Dmitry Vyukov

On Tue, May 8, 2018 at 2:06 PM, Marcelo Ricardo Leitner
 wrote:
> On Tue, May 08, 2018 at 12:35:02AM -0700, syzbot wrote:
>> Hello,
>>
>> syzbot found the following crash on:
>>
>> HEAD commit:90278871d4b0 Merge git://git.kernel.org/pub/scm/linux/kern..
>> git tree:   net-next
>> console output: https://syzkaller.appspot.com/x/log.txt?x=119a723780
>> kernel config:  https://syzkaller.appspot.com/x/.config?x=aea320d3af5ef99d
>> dashboard link: https://syzkaller.appspot.com/bug?extid=e4a5bbd54260c93014f9
>> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>>
>> Unfortunately, I don't have any reproducer for this crash yet.
>
> A reproducer will be welcomed. With just these traces, I don't think
> we have enough information.


#syz fix: sctp: not allow to set rto_min with a value below 200 msecs

Re: INFO: rcu detected stall in kfree_skbmem

2018-05-26 Thread Dmitry Vyukov

On Mon, May 14, 2018 at 8:04 PM, Xin Long  wrote:
> On Mon, May 14, 2018 at 9:34 PM, Neil Horman  wrote:
>> On Fri, May 11, 2018 at 12:00:38PM +0200, Dmitry Vyukov wrote:
>>> On Mon, Apr 30, 2018 at 8:09 PM, syzbot
>>>  wrote:
>>> > Hello,
>>> >
>>> > syzbot found the following crash on:
>>> >
>>> > HEAD commit:5d1365940a68 Merge
>>> > git://git.kernel.org/pub/scm/linux/kerne...
>>> > git tree:   net-next
>>> > console output: 
>>> > https://syzkaller.appspot.com/x/log.txt?id=5667997129637888
>>> > kernel config:
>>> > https://syzkaller.appspot.com/x/.config?id=-5947642240294114534
>>> > dashboard link: 
>>> > https://syzkaller.appspot.com/bug?extid=fc78715ba3b3257caf6a
>>> > compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>>> >
>>> > Unfortunately, I don't have any reproducer for this crash yet.
>>>
>>> This looks sctp-related, +sctp maintainers.
>>>
>> Looking at the entire trace, it appears that we are getting caught in the
>> kfree_skb that is getting triggered in enqueue_to_backlog which occurs when 
>> our
>> rx backlog list grows over netdev_max_backlog packets.  That suggests to me 
>> that
> It might be a long skb->frag_list that made kfree_skb slow when packing
> lots of small chunks to go through lo device?
>
>> whatever test(s) is/are causing this trace are queuing up a large number of
>> frames to be sent over the loopback interface, and are never/rarely getting
>> received.  Looking up higher in the stack, in the 
>> sctp_generate_heartbeat_event
>> function, we (in addition to the rcu_read_lock in sctp_v6_xmit) we also hold 
>> the
>> socket lock during the entirety of the xmit operaion.  Is it possible that we
>> are just enqueuing so many frames for xmit that we are blocking progress of
>> other threads using the same socket that we cross the RCU self detected stall
>> boundary?  While its not a fix per se, it might be a worthwhile test to limit
>> the number of frames we flush in a single pass.
>>
>> Neil
>>
>>> > IMPORTANT: if you fix the bug, please add the following tag to the commit:
>>> > Reported-by: syzbot+fc78715ba3b3257ca...@syzkaller.appspotmail.com
>>> >
>>> > INFO: rcu_sched self-detected stall on CPU
>>> > 1-...!: (1 GPs behind) idle=a3e/1/4611686018427387908
>>> > softirq=71980/71983 fqs=33
>>> >  (t=125000 jiffies g=39438 c=39437 q=958)
>>> > rcu_sched kthread starved for 124829 jiffies! g39438 c39437 f0x0
>>> > RCU_GP_WAIT_FQS(3) ->state=0x0 ->cpu=0
>>> > RCU grace-period kthread stack dump:
>>> > rcu_sched   R  running task23768 9  2 0x8000
>>> > Call Trace:
>>> >  context_switch kernel/sched/core.c:2848 [inline]
>>> >  __schedule+0x801/0x1e30 kernel/sched/core.c:3490
>>> >  schedule+0xef/0x430 kernel/sched/core.c:3549
>>> >  schedule_timeout+0x138/0x240 kernel/time/timer.c:1801
>>> >  rcu_gp_kthread+0x6b5/0x1940 kernel/rcu/tree.c:2231
>>> >  kthread+0x345/0x410 kernel/kthread.c:238
>>> >  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:411
>>> > NMI backtrace for cpu 1
>>> > CPU: 1 PID: 20560 Comm: syz-executor4 Not tainted 4.16.0+ #1
>>> > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>>> > Google 01/01/2011
>>> > Call Trace:
>>> >  
>>> >  __dump_stack lib/dump_stack.c:77 [inline]
>>> >  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>>> >  nmi_cpu_backtrace.cold.4+0x19/0xce lib/nmi_backtrace.c:103
>>> >  nmi_trigger_cpumask_backtrace+0x151/0x192 lib/nmi_backtrace.c:62
>>> >  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>>> >  trigger_single_cpu_backtrace include/linux/nmi.h:156 [inline]
>>> >  rcu_dump_cpu_stacks+0x175/0x1c2 kernel/rcu/tree.c:1376
>>> >  print_cpu_stall kernel/rcu/tree.c:1525 [inline]
>>> >  check_cpu_stall.isra.61.cold.80+0x36c/0x59a kernel/rcu/tree.c:1593
>>> >  __rcu_pending kernel/rcu/tree.c:3356 [inline]
>>> >  rcu_pending kernel/rcu/tree.c:3401 [inline]
>>> >  rcu_check_callbacks+0x21b/0xad0 kernel/rcu/tree.c:2763
>>> >  update_process_times+0x2d/0x70 kernel/time/timer.c:1636
>>> >  tick_sched_handle+0x9f/0x180 kernel/time/tick-sched.c:173
>>> >  tick_sched_timer+0x45/0x130 kernel/time/tick-sched.c:1283
>>> >  __run_hrtimer kernel/time/hrtimer.c:1386 [inline]
>>> >  __hrtimer_run_queues+0x3e3/0x10a0 kernel/time/hrtimer.c:1448
>>> >  hrtimer_interrupt+0x286/0x650 kernel/time/hrtimer.c:1506
>>> >  local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1025 [inline]
>>> >  smp_apic_timer_interrupt+0x15d/0x710 arch/x86/kernel/apic/apic.c:1050
>>> >  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:862
>>> > RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:783
>>> > [inline]
>>> > RIP: 0010:kmem_cache_free+0xb3/0x2d0 mm/slab.c:3757
>>> > RSP: 0018:8801db105228 EFLAGS: 0282 ORIG_RAX: ff13
>>> > RAX: 0007 RBX: 8800b055c940 RCX: 11003b2345a5
>>> > RDX:  RSI:

Re: INFO: rcu detected stall in sctp_packet_transmit

2018-05-26 Thread Dmitry Vyukov

On Wed, May 16, 2018 at 2:12 PM, Dmitry Vyukov  wrote:
> On Wed, May 16, 2018 at 1:02 PM, Xin Long  wrote:
  wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:961423f9fcbc Merge branch 'sctp-Introduce-sctp_flush_ctx'
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=1366aea780
> kernel config:  https://syzkaller.appspot.com/x/.config?x=51fb0a6913f757db
> dashboard link: 
> https://syzkaller.appspot.com/bug?extid=ff0b569fb5111dcd1a36
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+ff0b569fb5111dcd1...@syzkaller.appspotmail.com
>
> INFO: rcu_sched self-detected stall on CPU
> 0-: (1 GPs behind) idle=dae/1/4611686018427387908
> softirq=93090/93091 fqs=30902
>  (t=125000 jiffies g=51107 c=51106 q=972)
> NMI backtrace for cpu 0
> CPU: 0 PID: 24668 Comm: syz-executor6 Not tainted 4.17.0-rc4+ #44
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>  nmi_cpu_backtrace.cold.4+0x19/0xce lib/nmi_backtrace.c:103
>  nmi_trigger_cpumask_backtrace+0x151/0x192 lib/nmi_backtrace.c:62
>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>  trigger_single_cpu_backtrace include/linux/nmi.h:156 [inline]
>  rcu_dump_cpu_stacks+0x175/0x1c2 kernel/rcu/tree.c:1376
>  print_cpu_stall kernel/rcu/tree.c:1525 [inline]
>  check_cpu_stall.isra.61.cold.80+0x36c/0x59a kernel/rcu/tree.c:1593
>  __rcu_pending kernel/rcu/tree.c:3356 [inline]
>  rcu_pending kernel/rcu/tree.c:3401 [inline]
>  rcu_check_callbacks+0x21b/0xad0 kernel/rcu/tree.c:2763
>  update_process_times+0x2d/0x70 kernel/time/timer.c:1636
>  tick_sched_handle+0x9f/0x180 kernel/time/tick-sched.c:164
>  tick_sched_timer+0x45/0x130 kernel/time/tick-sched.c:1274
>  __run_hrtimer kernel/time/hrtimer.c:1398 [inline]
>  __hrtimer_run_queues+0x3e3/0x10a0 kernel/time/hrtimer.c:1460
>  hrtimer_interrupt+0x2f3/0x750 kernel/time/hrtimer.c:1518
>  local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1025 [inline]
>  smp_apic_timer_interrupt+0x15d/0x710 arch/x86/kernel/apic/apic.c:1050
>  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
> RIP: 0010:sctp_v6_xmit+0x259/0x6b0 net/sctp/ipv6.c:219
> RSP: 0018:8801dae068e8 EFLAGS: 0246 ORIG_RAX: ff13
> RAX: 0007 RBX: 8801bb7ec800 RCX: 86f1b345
> RDX:  RSI: 86f1b381 RDI: 8801b73d97c4
> RBP: 8801dae06988 R08: 88019505c300 R09: ed003b5c46c2
> R10: ed003b5c46c2 R11: 8801dae23613 R12: 88011fd57300
> R13: 8801bb7ecec8 R14: 0029 R15: 0002
>  sctp_packet_transmit+0x26f6/0x3ba0 net/sctp/output.c:642
>  sctp_outq_flush_transports net/sctp/outqueue.c:1164 [inline]
>  sctp_outq_flush+0x5f5/0x3430 net/sctp/outqueue.c:1212
>  sctp_outq_uncork+0x6a/0x80 net/sctp/outqueue.c:776
>  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1820 [inline]
>  sctp_side_effects net/sctp/sm_sideeffect.c:1220 [inline]
>  sctp_do_sm+0x596/0x7160 net/sctp/sm_sideeffect.c:1191
>  sctp_generate_heartbeat_event+0x218/0x450 net/sctp/sm_sideeffect.c:406
 Shocks, this timer event again. Can we try to minimize the repo.syz and
 get a short script, not neccessary to reproduce the issue 100%. we need
 to know what it was doing when this happened.

 Thanks.
>>>
>>> It's possible to reply the whole log from console output following
>>> these instructions:
>>> https://github.com/google/syzkaller/blob/master/docs/executing_syzkaller_programs.md
>> Thanks, it's running now.
>> Usually how long will it take to finish running this 5000-line log?
>
> If you run with -repeat=0 then it will run infinitely repeating the
> log again and again. If you see:
>
> parsed 1000 programs
> ...
> executed 5000 programs
>
> then it looped 5 times already. You can run with -repeat=10.
>
> syzbot has tried replaying the log, but for some reason it wasn't able
> to reproduce the crash (maybe accumulated state, or maybe it crashed
> in a different way). You can also try logs from other sctp hangs.


#syz fix: sctp: not allow to set rto_min with a value below 200 msecs

Re: INFO: rcu detected stall in ip_route_output_key_hash

2018-05-26 Thread Dmitry Vyukov

On Wed, May 16, 2018 at 5:29 PM, syzbot
 wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:0b7d9978406f Merge branch 'Microsemi-Ocelot-Ethernet-switc..
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=1138c47780
> kernel config:  https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
> dashboard link: https://syzkaller.appspot.com/bug?extid=769a7ccbbb4b5074f125
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+769a7ccbbb4b5074f...@syzkaller.appspotmail.com
>
> netlink: 4 bytes leftover after parsing attributes in process
> `syz-executor2'.
> random: crng init done
> INFO: rcu_sched self-detected stall on CPU
> 1-...!: (121515 ticks this GP) idle=e7e/1/4611686018427387908
> softirq=31362/31362 fqs=7
>  (t=125000 jiffies g=16439 c=16438 q=668508)
> rcu_sched kthread starved for 124958 jiffies! g16439 c16438 f0x2
> RCU_GP_WAIT_FQS(3) ->state=0x0 ->cpu=0
> RCU grace-period kthread stack dump:
> rcu_sched   R  running task23768 9  2 0x8000
> Call Trace:
>  context_switch kernel/sched/core.c:2848 [inline]
>  __schedule+0x801/0x1e30 kernel/sched/core.c:3490
>  schedule+0xef/0x430 kernel/sched/core.c:3549
>  schedule_timeout+0x138/0x240 kernel/time/timer.c:1801
>  rcu_gp_kthread+0x6b5/0x1940 kernel/rcu/tree.c:2231
>  kthread+0x345/0x410 kernel/kthread.c:238
>  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
> NMI backtrace for cpu 1
> CPU: 1 PID: 4488 Comm: syz-fuzzer Not tainted 4.17.0-rc4+ #45
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>  nmi_cpu_backtrace.cold.4+0x19/0xce lib/nmi_backtrace.c:103
>  nmi_trigger_cpumask_backtrace+0x151/0x192 lib/nmi_backtrace.c:62
>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>  trigger_single_cpu_backtrace include/linux/nmi.h:156 [inline]
>  rcu_dump_cpu_stacks+0x175/0x1c2 kernel/rcu/tree.c:1376
>  print_cpu_stall kernel/rcu/tree.c:1525 [inline]
>  check_cpu_stall.isra.61.cold.80+0x36c/0x59a kernel/rcu/tree.c:1593
>  __rcu_pending kernel/rcu/tree.c:3356 [inline]
>  rcu_pending kernel/rcu/tree.c:3401 [inline]
>  rcu_check_callbacks+0x21b/0xad0 kernel/rcu/tree.c:2763
>  update_process_times+0x2d/0x70 kernel/time/timer.c:1636
>  tick_sched_handle+0x9f/0x180 kernel/time/tick-sched.c:164
>  tick_sched_timer+0x45/0x130 kernel/time/tick-sched.c:1274
>  __run_hrtimer kernel/time/hrtimer.c:1398 [inline]
>  __hrtimer_run_queues+0x3e3/0x10a0 kernel/time/hrtimer.c:1460
>  hrtimer_interrupt+0x2f3/0x750 kernel/time/hrtimer.c:1518
>  local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1025 [inline]
>  smp_apic_timer_interrupt+0x15d/0x710 arch/x86/kernel/apic/apic.c:1050
>  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
> RIP: 0010:rcu_is_watching+0x6/0x140 kernel/rcu/tree.c:1071
> RSP: :8801daf06620 EFLAGS: 0206 ORIG_RAX: ff13
> RAX: 8801ad526240 RBX:  RCX: 8656
> RDX: 0100 RSI: 86b8 RDI: 0001
> RBP: 8801daf06628 R08: 8801ad526240 R09: 0002
> R10: 8801ad526240 R11:  R12: 11003b5e0cca
> R13: 88008ff1a100 R14:  R15: 8801daf066d0
>  rcu_read_unlock include/linux/rcupdate.h:684 [inline]
>  ip_route_output_key_hash+0x2cd/0x390 net/ipv4/route.c:2303
>  __ip_route_output_key include/net/route.h:124 [inline]
>  ip_route_output_flow+0x28/0xc0 net/ipv4/route.c:2557
>  ip_route_output_key include/net/route.h:134 [inline]
>  sctp_v4_get_dst+0x50e/0x17a0 net/sctp/protocol.c:447
>  sctp_transport_route+0x132/0x360 net/sctp/transport.c:303
>  sctp_packet_config+0x926/0xdd0 net/sctp/output.c:118
>  sctp_outq_select_transport+0x2bb/0x9c0 net/sctp/outqueue.c:877
>  sctp_outq_flush_ctrl.constprop.12+0x2ad/0xe60 net/sctp/outqueue.c:911
>  sctp_outq_flush+0x2ef/0x3430 net/sctp/outqueue.c:1203
>  sctp_outq_uncork+0x6a/0x80 net/sctp/outqueue.c:776
>  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1820 [inline]
>  sctp_side_effects net/sctp/sm_sideeffect.c:1220 [inline]
>  sctp_do_sm+0x596/0x7160 net/sctp/sm_sideeffect.c:1191
>  sctp_generate_heartbeat_event+0x218/0x450 net/sctp/sm_sideeffect.c:406

#syz fix: sctp: not allow to set rto_min with a value below 200 msecs

>  call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
>  expire_timers kernel/time/timer.c:1363 [inline]
>  __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
>  run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
>  __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
>  invoke_softirq kernel/softirq.c:365 [inline]
>  irq_exit+0x1d1/0x200 kernel/softirq.c:405

Re: INFO: rcu detected stall in corrupted

2018-05-26 Thread Dmitry Vyukov

On Thu, May 24, 2018 at 11:02 AM, Xin Long  wrote:
> On Thu, May 24, 2018 at 7:13 AM, Marcelo Ricardo Leitner
>  wrote:
>> On Mon, May 21, 2018 at 11:13:46AM -0700, Eric Dumazet wrote:
>>>
>>>
>>> On 05/21/2018 11:09 AM, David Miller wrote:
>>> > From: syzbot 
>>> > Date: Mon, 21 May 2018 11:05:02 -0700
>>> >
>>> >>  find_match+0x244/0x13a0 net/ipv6/route.c:691
>>> >>  find_rr_leaf net/ipv6/route.c:729 [inline]
>>> >>  rt6_select net/ipv6/route.c:779 [inline]
>>> >
>>> > Hmmm, endless loop in find_rr_leaf or similar?
>>> >
>>>
>>>
>>> I do not think so, this really looks like SCTP specific
>>> , we now have dozens of traces all sharing :
>>>
>>>  sctp_transport_route+0xad/0x450 net/sctp/transport.c:293
>>>  sctp_packet_config+0xb89/0xfd0 net/sctp/output.c:123
>>>  sctp_outq_flush+0x79c/0x4370 net/sctp/outqueue.c:894
>>>  sctp_outq_uncork+0x6a/0x80 net/sctp/outqueue.c:776
>>>  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1820 [inline]
>>>  sctp_side_effects net/sctp/sm_sideeffect.c:1220 [inline]
>>>  sctp_do_sm+0x596/0x7160 net/sctp/sm_sideeffect.c:1191
>>>  sctp_generate_heartbeat_event+0x218/0x450 net/sctp/sm_sideeffect.c:406
>>>  call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
>>>
>>>
>>> Some kind of infinite loop.
>>>
>>> When the hrtimer fires, it can point to any code that sits below but does 
>>> not necessarily have a bug.
>>
>> Agreed. Xin Long identified the root cause. syzkaller is setting too
>> aggressive parameters to SCTP RTO, leading to issues with the
>> heartbeat timer.
> Right, I will prepare a fix soon with your suggestion rto_min value "HZ/5"
> Thanks.

#syz fix: sctp: not allow to set rto_min with a value below 200 msecs

Proposal

2018-05-26 Thread Miss Zeliha Omer Faruk




Hello

Greetings to you please i have a business proposal for you contact me
for more detailes asap thanks.

Best Regards,
Miss.Zeliha ömer faruk
Esentepe Mahallesi Büyükdere
Caddesi Kristal Kule Binasi
No:215
Sisli - Istanbul, Turkey

[PATCH] rsi: fix spelling mistake "Uknown" -> "Unknown"

2018-05-26 Thread Colin King

From: Colin Ian King 

Trivial fix to spelling mistake in rsi_dbg message text

Signed-off-by: Colin Ian King 
---
 drivers/net/wireless/rsi/rsi_91x_mac80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c 
b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index bfa7569c85bb..2ca7464b7fa3 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -1103,7 +1103,7 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw 
*hw,
break;
 
default:
-   rsi_dbg(ERR_ZONE, "%s: Uknown AMPDU action\n", __func__);
+   rsi_dbg(ERR_ZONE, "%s: Unknown AMPDU action\n", __func__);
break;
}
 
-- 
2.17.0

Re: [PATCH v2] netfilter: properly initialize xt_table_info structure

2018-05-26 Thread Greg Kroah-Hartman

On Fri, May 18, 2018 at 11:27:56AM +0200, Florian Westphal wrote:
> Greg Kroah-Hartman  wrote:
> > On Thu, May 17, 2018 at 12:42:00PM +0200, Jan Engelhardt wrote:
> > > 
> > > On Thursday 2018-05-17 12:09, Greg Kroah-Hartman wrote:
> > > >> > --- a/net/netfilter/x_tables.c
> > > >> > +++ b/net/netfilter/x_tables.c
> > > >> > @@ -1183,11 +1183,10 @@ struct xt_table_info 
> > > >> > *xt_alloc_table_info(unsigned int size)
> > > >> >   * than shoot all processes down before realizing there is 
> > > >> > nothing
> > > >> >   * more to reclaim.
> > > >> >   */
> > > >> > -info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY);
> > > >> > +info = kvzalloc(sz, GFP_KERNEL | __GFP_NORETRY);
> > > >> >  if (!info)
> > > >> >  return NULL;
> > > >>
> > > >> I am curious, what particular path does not later overwrite the whole 
> > > >> zone ?
> > > >
> > > >In do_ipt_get_ctl, the IPT_SO_GET_ENTRIES: option uses a len value that
> > > >can be larger than the size of the structure itself.
> > > >
> > > >Then the data is copied to userspace in copy_entries_to_user() for ipv4
> > > >and v6, and that's where the "bad data"
> > > 
> > > If the kernel incorrectly copies more bytes than it should, isn't that
> > > a sign that may be going going past the end of the info buffer?
> > > (And thus, zeroing won't truly fix the issue)
> > 
> > No, the buffer size is correct, we just aren't filling up the whole
> > buffer as the data requested is smaller than the buffer size.
> 
> I have no objections to the patch but I'd like to understand what
> problem its fixing.
> 
> Normal pattern is:
> newinfo = xt_alloc_table_info(tmp.size);
> copy_from_user(newinfo->entries, user + sizeof(tmp), tmp.size);
> 
> So inital value of the rule blob area should not matter.
> 
> Furthermore, when copying the rule blob back to userspace,
> the kernel is not supposed to copy any padding back to userspace either,
> since commit f32815d21d4d8287336fb9cef4d2d9e0866214c2 only the
> user-relevant parts should be copied (some matches and targets allocate
> kernel-private data such as pointers, and we did use to leak such pointer
> values back to userspace).

Adding Peter to this thread, as he originally reported this issue to
Google back in February.

Peter, I know you reported this against the 4.4 kernel tree, but since
then, commit f32815d21d4d ("xtables: add xt_match, xt_target and data
copy_to_user functions") has been added to the kernel in release 4.11.
In digging through this crazy code path, I think the issue is still
there, but can not verify it for sure.

Is there any way you can run your tests on the 4.14 or newer kernel tree
to see if this issue really is fixed or not?

thanks,

greg k-h

Re: [PATCH net-next 6/7] net: bridge: Notify about bridge VLANs

2018-05-26 Thread Vivien Didelot

Hi Petr,

Petr Machata  writes:

> Vivien Didelot  writes:
>
>>> +   } else {
>>> +   err = br_switchdev_port_obj_add(dev, v->vid, flags);
>>> +   if (err && err != -EOPNOTSUPP)
>>> +   goto out;
>>> }
>>
>> Except that br_switchdev_port_obj_add taking vid and flags arguments
>> seems confusing to me, the change looks good:
>
> I'm not sure what you're aiming at. Both VID and flags are sent with the
> notification, so they need to be passed on to the function somehow. Do
> you have a counterproposal for the API?

I'm only questioning the code organization here, not the functional
aspect which I do agree with. What I'm saying is that you name a new
switchdev helper br_switchdev_port_OBJ_add, which takes VLAN arguments
(vid and flags.) How would you call another eventual helper taking MDB
arguments, br_switchdev_port_OBJ_add again? So something like
br_switchdev_port_VLAN_add would be more intuitive.

At the same time there's an effort to centralize all switchdev helpers
of the bridge layer (i.e. the software -> hardware bridge calls) into
net/bridge/br_switchdev.c, so that file would be more adequate.

You may discard my comments but I think it'd be beneficial to us all to
finally keep a bit of consistency in that bridge layer code.

Thanks,

Vivien

RE: [PATCH, net-next] net/mlx5e: fix TLS dependency

2018-05-26 Thread Boris Pismenny

Acked-by: Boris Pismenny 

Thank you.

> -Original Message-
> From: Saeed Mahameed
> Sent: Saturday, May 26, 2018 2:19 AM
> To: da...@davemloft.net; a...@arndb.de; l...@kernel.org
> Cc: linux-ker...@vger.kernel.org; linux-r...@vger.kernel.org; Boris
> Pismenny ; Or Gerlitz ;
> Feras Daoud ; Ilan Tayari ;
> netdev@vger.kernel.org; Ilya Lesokhin 
> Subject: Re: [PATCH, net-next] net/mlx5e: fix TLS dependency
> 
> On Fri, 2018-05-25 at 23:36 +0200, Arnd Bergmann wrote:
> > With CONFIG_TLS=m and MLX5_CORE_EN=y, we get a link failure:
> >
> > drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.o: In
> > function `mlx5e_tls_handle_ooo':
> > tls_rxtx.c:(.text+0x24c): undefined reference to `tls_get_record'
> > drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.o: In
> > function `mlx5e_tls_handle_tx_skb':
> > tls_rxtx.c:(.text+0x9a8): undefined reference to
> > `tls_device_sk_destruct'
> >
> > This narrows down the dependency to only allow the configurations that
> > will actually work. The existing dependency on TLS_DEVICE is not
> > sufficient here since MLX5_EN_TLS is a 'bool' symbol.
> >
> > Fixes: c83294b9efa5 ("net/mlx5e: TLS, Add Innova TLS TX support")
> > Signed-off-by: Arnd Bergmann 
> > ---
> 
> LGTM
> 
> Acked-by: Saeed Mahameed 
> 
> Thank you Arnd!
> 
> 
> >  drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
> > b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
> > index ee6684779d11..2545296a0c08 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
> > @@ -91,6 +91,7 @@ config MLX5_EN_TLS
> > bool "TLS cryptography-offload accelaration"
> > depends on MLX5_CORE_EN
> > depends on TLS_DEVICE
> > +   depends on TLS=y || MLX5_CORE=m
> > depends on MLX5_ACCEL
> > default n
> > ---help---

[PATCH bpf-next v2] selftests/bpf: missing headers test_lwt_seg6local

2018-05-26 Thread Mathieu Xhonneux

Previous patch "selftests/bpf: test for seg6local End.BPF action" lacks
some UAPI headers in tools/.

clang -I. -I./include/uapi -I../../../include/uapi -idirafter
/usr/local/include -idirafter
/data/users/yhs/work/llvm/build/install/lib/clang/7.0.0/include
-idirafter /usr/include -Wno-compare-distinct-pointer-types \
 -O2 -target bpf -emit-llvm -c test_lwt_seg6local.c -o - |  \
llc -march=bpf -mcpu=generic  -filetype=obj -o
[...]/net-next/tools/testing/selftests/bpf/test_lwt_seg6local.o
test_lwt_seg6local.c:4:10: fatal error: 'linux/seg6_local.h' file not found
 ^~~~
1 error generated.
make: Leaving directory
`/data/users/yhs/work/net-next/tools/testing/selftests/bpf'

v2: moving the headers to tools/include/uapi/.

Reported-by: Y Song 
Signed-off-by: Mathieu Xhonneux 
---
 tools/include/uapi/linux/seg6.h   | 55 
 tools/include/uapi/linux/seg6_local.h | 80 +++
 2 files changed, 135 insertions(+)
 create mode 100644 tools/include/uapi/linux/seg6.h
 create mode 100644 tools/include/uapi/linux/seg6_local.h

diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h
new file mode 100644
index ..286e8d6a8e98
--- /dev/null
+++ b/tools/include/uapi/linux/seg6.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun 
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_H
+#define _UAPI_LINUX_SEG6_H
+
+#include 
+#include  /* For struct in6_addr. */
+
+/*
+ * SRH
+ */
+struct ipv6_sr_hdr {
+   __u8nexthdr;
+   __u8hdrlen;
+   __u8type;
+   __u8segments_left;
+   __u8first_segment; /* Represents the last_entry field of SRH */
+   __u8flags;
+   __u16   tag;
+
+   struct in6_addr segments[0];
+};
+
+#define SR6_FLAG1_PROTECTED(1 << 6)
+#define SR6_FLAG1_OAM  (1 << 5)
+#define SR6_FLAG1_ALERT(1 << 4)
+#define SR6_FLAG1_HMAC (1 << 3)
+
+#define SR6_TLV_INGRESS1
+#define SR6_TLV_EGRESS 2
+#define SR6_TLV_OPAQUE 3
+#define SR6_TLV_PADDING4
+#define SR6_TLV_HMAC   5
+
+#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC)
+
+struct sr6_tlv {
+   __u8 type;
+   __u8 len;
+   __u8 data[0];
+};
+
+#endif
diff --git a/tools/include/uapi/linux/seg6_local.h 
b/tools/include/uapi/linux/seg6_local.h
new file mode 100644
index ..edc138bdc56d
--- /dev/null
+++ b/tools/include/uapi/linux/seg6_local.h
@@ -0,0 +1,80 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun 
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_LOCAL_H
+#define _UAPI_LINUX_SEG6_LOCAL_H
+
+#include 
+
+enum {
+   SEG6_LOCAL_UNSPEC,
+   SEG6_LOCAL_ACTION,
+   SEG6_LOCAL_SRH,
+   SEG6_LOCAL_TABLE,
+   SEG6_LOCAL_NH4,
+   SEG6_LOCAL_NH6,
+   SEG6_LOCAL_IIF,
+   SEG6_LOCAL_OIF,
+   SEG6_LOCAL_BPF,
+   __SEG6_LOCAL_MAX,
+};
+#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
+
+enum {
+   SEG6_LOCAL_ACTION_UNSPEC= 0,
+   /* node segment */
+   SEG6_LOCAL_ACTION_END   = 1,
+   /* adjacency segment (IPv6 cross-connect) */
+   SEG6_LOCAL_ACTION_END_X = 2,
+   /* lookup of next seg NH in table */
+   SEG6_LOCAL_ACTION_END_T = 3,
+   /* decap and L2 cross-connect */
+   SEG6_LOCAL_ACTION_END_DX2   = 4,
+   /* decap and IPv6 cross-connect */
+   SEG6_LOCAL_ACTION_END_DX6   = 5,
+   /* decap and IPv4 cross-connect */
+   SEG6_LOCAL_ACTION_END_DX4   = 6,
+   /* decap and lookup of DA in v6 table */
+   SEG6_LOCAL_ACTION_END_DT6   = 7,
+   /* decap and lookup of DA in v4 table */
+   SEG6_LOCAL_ACTION_END_DT4   = 8,
+   /* binding segment with insertion */
+   SEG6_LOCAL_ACTION_END_B6= 9,
+   /* binding segment with encapsulation */
+   SEG6_LOCAL_ACTION_END_B6_ENCAP  = 10,
+   /* binding segment with MPLS encap */
+   SEG6_LOCAL_ACTION_END_BM= 11,
+   /* lookup last seg in table */
+   SEG6_LOCAL_ACTION_END_S = 12,
+   /* forward to SR-unaware VNF with static proxy */
+   SEG6_LOCAL_ACTION_END_AS= 13,
+   /* forward to

Re: [PATCH bpf-next] selftests/bpf: missing headers test_lwt_seg6local

2018-05-26 Thread Mathieu Xhonneux

2018-05-25 18:39 GMT+02:00 Daniel Borkmann :
> Yes, should definitely go there to tools include infrastructure.

What is the point of tools/testing/selftests/bpf/include/uapi/ then ?
Incompatibility issues preventing linux/types.h to be included in
non-bpf testing executables ? My initial conception was that all
headers only related to bpf should go into this directory. Sending a
v2.

Re: [PATCH v4 2/3] media: rc: introduce BPF_PROG_LIRC_MODE2

2018-05-26 Thread Sean Young

On Fri, May 25, 2018 at 01:45:11PM -0700, Alexei Starovoitov wrote:
> On Fri, May 18, 2018 at 03:07:29PM +0100, Sean Young wrote:
> > Add support for BPF_PROG_LIRC_MODE2. This type of BPF program can call
> > rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report
> > that the last key should be repeated.
> > 
> > The bpf program can be attached to using the bpf(BPF_PROG_ATTACH) syscall;
> > the target_fd must be the /dev/lircN device.
> > 
> > Signed-off-by: Sean Young 
> ...
> >  enum bpf_attach_type {
> > @@ -158,6 +159,7 @@ enum bpf_attach_type {
> > BPF_CGROUP_INET6_CONNECT,
> > BPF_CGROUP_INET4_POST_BIND,
> > BPF_CGROUP_INET6_POST_BIND,
> > +   BPF_LIRC_MODE2,
> > __MAX_BPF_ATTACH_TYPE
> >  };
> >  
> > @@ -1902,6 +1904,53 @@ union bpf_attr {
> >   * egress otherwise). This is the only flag supported for now.
> >   * Return
> >   * **SK_PASS** on success, or **SK_DROP** on error.
> > + *
> > + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
> > + * Description
> > + * This helper is used in programs implementing IR decoding, to
> > + * report a successfully decoded key press with *scancode*,
> > + * *toggle* value in the given *protocol*. The scancode will be
> > + * translated to a keycode using the rc keymap, and reported as
> > + * an input key down event. After a period a key up event is
> > + * generated. This period can be extended by calling either
> > + * **bpf_rc_keydown** () with the same values, or calling
> > + * **bpf_rc_repeat** ().
> > + *
> > + * Some protocols include a toggle bit, in case the button
> > + * was released and pressed again between consecutive scancodes
> > + *
> > + * The *ctx* should point to the lirc sample as passed into
> > + * the program.
> > + *
> > + * The *protocol* is the decoded protocol number (see
> > + * **enum rc_proto** for some predefined values).
> > + *
> > + * This helper is only available is the kernel was compiled with
> > + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
> > + * "**y**".
> > + *
> > + * Return
> > + * 0
> > + *
> > + * int bpf_rc_repeat(void *ctx)
> > + * Description
> > + * This helper is used in programs implementing IR decoding, to
> > + * report a successfully decoded repeat key message. This delays
> > + * the generation of a key up event for previously generated
> > + * key down event.
> > + *
> > + * Some IR protocols like NEC have a special IR message for
> > + * repeating last button, for when a button is held down.
> > + *
> > + * The *ctx* should point to the lirc sample as passed into
> > + * the program.
> > + *
> > + * This helper is only available is the kernel was compiled with
> > + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
> > + * "**y**".
> 
> Hi Sean,
> 
> thank you for working on this. The patch set looks good to me.
> I'd only ask to change above two helper names to something more specific.
> Since BPF_PROG_TYPE_LIRC_MODE2 is the name of new prog type and kconfig.
> May be bpf_lirc2_keydown() and bpf_lirc2_repeat() ?

A little history might help here.

lirc and rc-core have non-obvious meanings. So, lirc was the original project
that dealt with IR. That project was rejected from mainline because it did
not send translated keycodes to input devices (it exposed its own interface
for keypresses).

Then rc-core was written which maps IR scancodes to keycodes (using rc
keymaps) and sends them to the input layer. The original lirc userspace ABI
for receiving and sending raw IR pulses and spaces was retained (mode2 as
it was called in lirc).

Reusing parts of the lirc ABI for BPF decoding raw IR makes sense, however
dispatching decoded scancodes was never part of lirc, only rc-core. In fact,
rc-core is reused in hdmi-cec for cec commands, which does not use lirc
at all. So for example, if we want to process cec messages in bpf, it would
want call rc_keydown().

I don't think this lirc/rc-core duality is particularly great, but I'm
not sure what the right answer to that is.

> > @@ -1576,6 +1577,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
> > case BPF_SK_SKB_STREAM_PARSER:
> > case BPF_SK_SKB_STREAM_VERDICT:
> > return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
> > +   case BPF_LIRC_MODE2:
> > +   return rc_dev_prog_attach(attr);
> ...
> > +   case BPF_LIRC_MODE2:
> > +   return rc_dev_prog_detach(attr);
> 
> and similar rename for internal function names that go into bpf core.

I agree with this.

> Please add accumulated acks when you respin.

Good point, will do.

Thanks,

Sean

[PATCH net-next] net: remove unnecessary genlmsg_cancel() calls

2018-05-26 Thread YueHaibing

the message be freed immediately, no need to trim it
back to the previous size.

Inspired by commit 7a9b3ec1e19f ("nl80211: remove unnecessary genlmsg_cancel() 
calls")

Signed-off-by: YueHaibing 
---
 drivers/net/team/team.c   |  2 --
 drivers/net/wireless/mac80211_hwsim.c |  1 -
 net/core/devlink.c|  4 
 net/ipv6/seg6.c   |  1 -
 net/ncsi/ncsi-netlink.c   |  1 -
 net/nfc/netlink.c | 17 -
 6 files changed, 26 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index e6730a0..267dcc9 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2426,7 +2426,6 @@ static int team_nl_send_options_get(struct team *team, 
u32 portid, u32 seq,
 nla_put_failure:
err = -EMSGSIZE;
 errout:
-   genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
 }
@@ -2720,7 +2719,6 @@ static int team_nl_send_port_list_get(struct team *team, 
u32 portid, u32 seq,
 nla_put_failure:
err = -EMSGSIZE;
 errout:
-   genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
 }
diff --git a/drivers/net/wireless/mac80211_hwsim.c 
b/drivers/net/wireless/mac80211_hwsim.c
index c26469b..38e1135 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2514,7 +2514,6 @@ static void hwsim_mcast_new_radio(int id, struct 
genl_info *info,
return;
 
 out_err:
-   genlmsg_cancel(mcast_skb, data);
nlmsg_free(mcast_skb);
 }
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 475246b..f75ee02 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1826,7 +1826,6 @@ static int devlink_dpipe_tables_fill(struct genl_info 
*info,
 nla_put_failure:
err = -EMSGSIZE;
 err_table_put:
-   genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
 }
@@ -2032,7 +2031,6 @@ int devlink_dpipe_entry_ctx_prepare(struct 
devlink_dpipe_dump_ctx *dump_ctx)
return 0;
 
 nla_put_failure:
-   genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
nlmsg_free(dump_ctx->skb);
return -EMSGSIZE;
 }
@@ -2249,7 +2247,6 @@ static int devlink_dpipe_headers_fill(struct genl_info 
*info,
 nla_put_failure:
err = -EMSGSIZE;
 err_table_put:
-   genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
 }
@@ -2551,7 +2548,6 @@ static int devlink_resource_fill(struct genl_info *info,
err = -EMSGSIZE;
 err_resource_put:
 err_skb_send_alloc:
-   genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
 }
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 7f5621d..0fdf2a5 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -226,7 +226,6 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct 
genl_info *info)
 
 nla_put_failure:
rcu_read_unlock();
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -ENOMEM;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index b09ef77..99f4c22 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -201,7 +201,6 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct 
genl_info *info)
return genlmsg_reply(skb, info);
 
 err:
-   genlmsg_cancel(skb, hdr);
kfree_skb(skb);
return rc;
 }
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f018eaf..376181c 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -206,7 +206,6 @@ int nfc_genl_targets_found(struct nfc_dev *dev)
return genlmsg_multicast(_genl_family, msg, 0, 0, GFP_ATOMIC);
 
 nla_put_failure:
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -237,7 +236,6 @@ int nfc_genl_target_lost(struct nfc_dev *dev, u32 
target_idx)
return 0;
 
 nla_put_failure:
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -269,7 +267,6 @@ int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol)
return 0;
 
 nla_put_failure:
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -299,7 +296,6 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev)
return 0;
 
 nla_put_failure:
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -340,7 +336,6 @@ int nfc_genl_device_added(struct nfc_dev *dev)
return 0;
 
 nla_put_failure:
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -370,7 +365,6 @@ int nfc_genl_device_removed(struct nfc_dev *dev)
return 0;
 
 nla_put_failure:
-   genlmsg_cancel(msg, hdr);
 free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -434,8 +428,6 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct 
hlist_head *sdres_list)
return genlmsg_multicast(_genl_family, msg, 0, 0, GFP_ATOMIC);
 
 nla_put_failure:
-

Re: System hung for reg_check_changs_work()-> rtnl_lock()->mutex_lock()

2018-05-26 Thread Dmitry Vyukov

On Mon, May 21, 2018 at 5:47 AM, Shawn Lin  wrote:
> Hi,
>
> I found this hung for several times these days, and seems syzbot already
> reported a similar problem. Is there any patch(es) for that?
>
> Successfully initialized wpa_supplicant
> [  240.091941] INFO: task kworker/u8:1:39 blocked for more than 120 seconds.
> [  240.092004]   Not tainted 4.4.126 #1
> [  240.092026] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
> this message.
> [  240.092047] kworker/u8:1D ff8008084dfc 039  2
> 0x
> [  240.092116] Workqueue: events_power_efficient reg_check_chans_work
> [  240.092153] Call trace:
> [  240.092191] [] __switch_to+0x84/0xa0
> [  240.092228] [] __schedule+0x428/0x45c
> [  240.092260] [] schedule+0x74/0x94
> [  240.092295] [] schedule_preempt_disabled+0x20/0x38
> [  240.092332] [] __mutex_lock_slowpath+0xc0/0x138
> [  240.092364] [] mutex_lock+0x2c/0x40
> [  240.092399] [] rtnl_lock+0x14/0x1c
> [  240.092428] [] reg_check_chans_work+0x2c/0x1f0
> [  240.092463] [] process_one_work+0x1b0/0x294
> [  240.092494] [] worker_thread+0x2d8/0x398
> [  240.092524] [] kthread+0xc8/0xd8
> [  240.092567] [] ret_from_fork+0x10/0x50
> [  240.092594] Kernel panic - not syncing: hung_task: blocked tasks
> [  240.101163] CPU: 0 PID: 30 Comm: khungtaskd Not tainted 4.4.126 #1
> [  240.101729] Hardware name: Rockchip RK3308 evb analog mic board (DT)
> [  240.102302] Call trace:
> [  240.102546] [] dump_backtrace+0x0/0x1c4
> [  240.103044] [] show_stack+0x14/0x1c
> [  240.103521] [] dump_stack+0x94/0xbc
> [  240.104000] [] panic+0xd8/0x228
> [  240.104446] [] proc_dohung_task_timeout_secs+0x0/0x40
> [  240.105050] [] kthread+0xc8/0xd8
> [  240.105500] [] ret_from_fork+0x10/0x50
> [  240.106065] CPU1: stopping
> [  240.106348] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.4.126 #1


Syzbot has reported whole bunch of hangs on rtnl lock, but there is no
resolution:
https://syzkaller.appspot.com/bug?id=2503c576cabb08d41812e732b390141f01a59545

I suspect this can be related to hangs in unregister_netdevice:
https://syzkaller.appspot.com/bug?id=1a97a5bd119fd97995f752819fd87840ab9479a9
They happen all the time too, there is no resolution for this either.

Also see this thread:
https://groups.google.com/d/msg/syzkaller/-06_laheMF0/xqezy58kAwAJ

Is it possible to get device information via CMSG?

2018-05-26 Thread Eric S. Raymond

I'm trying to untangle some nasty code in the Mills implementation of
NTP.  I could simplify it a lot if there were a way to query a packet
to find out the name of the network interface it arrived on.  (At the
moment the code has to iterate over all interfaces checking for
traffic on each one just so it doesn't lose that information.)

This seems like the kind of thing the CMSG macros are intended to
support, but I can't find anywhere a specification of what cmsg_level
and cmsg_type values are valid and what their semantics are.

So I have two questions:

1. Is there a cmsg_level/cmsg_type combination that will return the
name of the device the packet arrived through?

2. Is the set of possible cmsg_level and cmsg_type values documented
anywhere?  If not, how would one go about assemnbling such information?
(I would be willing to write a man page about this.)

-- 
http://www.catb.org/~esr/;>Eric S. Raymond

You [should] not examine legislation in the light of the benefits it will
convey if properly administered, but in the light of the wrongs it
would do and the harm it would cause if improperly administered
-- Lyndon Johnson, former President of the U.S.

[PATCH net-next] netfilter: nat: make symbol nat_hook static

2018-05-26 Thread Wei Yongjun

Fixes the following sparse warning:

net/netfilter/nf_nat_core.c:1039:20: warning:
 symbol 'nat_hook' was not declared. Should it be static?

Signed-off-by: Wei Yongjun 
---
 net/netfilter/nf_nat_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 821f8d8..b7df32a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1036,7 +1036,7 @@ void nf_nat_unregister_fn(struct net *net, const struct 
nf_hook_ops *ops,
.size = sizeof(struct nat_net),
 };
 
-struct nf_nat_hook nat_hook = {
+static struct nf_nat_hook nat_hook = {
.parse_nat_setup= nfnetlink_parse_nat_setup,
 #ifdef CONFIG_XFRM
.decode_session = __nf_nat_decode_session,

[PATCH v2 2/2] batman-adv: Drop "experimental" from BATMAN_V Kconfig

2018-05-26 Thread Sven Eckelmann

The Kconfig option BATMAN_ADV_BATMAN_V is now enabled by default when the
BATMAN_ADV is enabled. A feature which is enabled by default for a module
should not be considered experimental.

Reported-by: Joe Perches 
Signed-off-by: Sven Eckelmann 
---
v2: no change

 net/batman-adv/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 41bb67d70c83..da0b7aa98be9 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -32,7 +32,7 @@ config BATMAN_ADV
   tools.
 
 config BATMAN_ADV_BATMAN_V
-   bool "B.A.T.M.A.N. V protocol (experimental)"
+   bool "B.A.T.M.A.N. V protocol"
depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y)
default y
help
-- 
2.17.0

[PATCH v2 1/2] batman-adv: Remove "default n" in Kconfig

2018-05-26 Thread Sven Eckelmann

The "default n" is the default value for any bool or tristate Kconfig
setting. It is therefore not necessary to add it to a config entry.

Reported-by: Sergei Shtylyov 
Signed-off-by: Sven Eckelmann 
---
v2: changed "the an config entry" to "a config entry" in commit message

 net/batman-adv/Kconfig | 5 -
 1 file changed, 5 deletions(-)

diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index de8034d80623..41bb67d70c83 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -24,7 +24,6 @@ config BATMAN_ADV
depends on NET
select CRC16
select LIBCRC32C
-default n
help
   B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
   a routing protocol for multi-hop ad-hoc mesh networks. The
@@ -60,7 +59,6 @@ config BATMAN_ADV_BLA
 config BATMAN_ADV_DAT
bool "Distributed ARP Table"
depends on BATMAN_ADV && INET
-   default n
help
  This option enables DAT (Distributed ARP Table), a DHT based
  mechanism that increases ARP reliability on sparse wireless
@@ -70,7 +68,6 @@ config BATMAN_ADV_DAT
 config BATMAN_ADV_NC
bool "Network Coding"
depends on BATMAN_ADV
-   default n
help
  This option enables network coding, a mechanism that aims to
  increase the overall network throughput by fusing multiple
@@ -84,7 +81,6 @@ config BATMAN_ADV_NC
 config BATMAN_ADV_MCAST
bool "Multicast optimisation"
depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
-   default n
help
  This option enables the multicast optimisation which aims to
  reduce the air overhead while improving the reliability of
@@ -94,7 +90,6 @@ config BATMAN_ADV_DEBUGFS
bool "batman-adv debugfs entries"
depends on BATMAN_ADV
depends on DEBUG_FS
-   default n
help
  Enable this to export routing related debug tables via debugfs.
  The information for each soft-interface and used hard-interface can be
-- 
2.17.0

[PATCH net-next] net: bpfilter: make function bpfilter_mbox_request() static

2018-05-26 Thread Wei Yongjun

Fixes the following sparse warnings:

net/ipv4/bpfilter/sockopt.c:13:5: warning:
 symbol 'bpfilter_mbox_request' was not declared. Should it be static?

Signed-off-by: Wei Yongjun 
---
 net/ipv4/bpfilter/sockopt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 42a96d2..5e04ed2 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -10,8 +10,9 @@
unsigned int optlen, bool is_set);
 EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
 
-int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen, bool is_set)
+static int bpfilter_mbox_request(struct sock *sk, int optname,
+char __user *optval,
+unsigned int optlen, bool is_set)
 {
if (!bpfilter_process_sockopt) {
int err = request_module("bpfilter");

Re: [PATCH] rtnetlink: Add more well known protocol values

2018-05-26 Thread Sergei Shtylyov


Hello!

On 5/25/2018 9:20 PM, Donald Sharp wrote:


FRRouting installs routes into the kernel associated with
the originating protocol.  Add these values to the well
known values in rtnetlink.h.

Signed-off-by: Donald Sharp 
---
  include/uapi/linux/rtnetlink.h | 5 +
  1 file changed, 5 insertions(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cabb210c93af..81b33826f818 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -254,6 +254,11 @@ enum {
  #define RTPROT_DHCP   16  /* DHCP client */
  #define RTPROT_MROUTED17  /* Multicast daemon */
  #define RTPROT_BABEL  42  /* Babel daemon */
+#define RTPROT_BGP  186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP  189 /* RIP Routes */
+#define RTPROT_EIGRP192 /* EIGRP Routes */


   The preceding entries use tab to indent the value, yours use spaces. Not 
good...


[...]

MBR, Sergei

Re: WARNING in bpf_int_jit_compile

2018-05-26 Thread syzbot


syzbot has found a reproducer for the following crash on:

HEAD commit:62d18ecfa641 Merge tag 'arm64-fixes' of git://git.kernel.o..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=14c6bf5780
kernel config:  https://syzkaller.appspot.com/x/.config?x=982e2df1b9e60b02
dashboard link: https://syzkaller.appspot.com/bug?extid=9e762b52dd17e616a7a5
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=130e42b780

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+9e762b52dd17e616a...@syzkaller.appspotmail.com

RAX: ffda RBX: 02542914 RCX: 00455a09
RDX: 0048 RSI: 2240 RDI: 0005
RBP: 0072bea0 R08:  R09: 
R10:  R11: 0246 R12: 0003
R13: 0046 R14: 006f4730 R15: 0023
WARNING: CPU: 0 PID: 4752 at include/linux/filter.h:667  
bpf_jit_binary_lock_ro include/linux/filter.h:667 [inline]
WARNING: CPU: 0 PID: 4752 at include/linux/filter.h:667  
bpf_int_jit_compile+0xbf7/0xef7 arch/x86/net/bpf_jit_comp.c:1271

Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 4752 Comm: syz-executor0 Not tainted 4.17.0-rc6+ #67
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 panic+0x22f/0x4de kernel/panic.c:184
 __warn.cold.8+0x163/0x1b3 kernel/panic.c:536
 report_bug+0x252/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:178 [inline]
 do_error_trap+0x1de/0x490 arch/x86/kernel/traps.c:296
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
RIP: 0010:bpf_jit_binary_lock_ro include/linux/filter.h:667 [inline]
RIP: 0010:bpf_int_jit_compile+0xbf7/0xef7 arch/x86/net/bpf_jit_comp.c:1271
RSP: 0018:8801d85ff920 EFLAGS: 00010293
RAX: 8801d78c40c0 RBX: 0046 RCX: 81445d89
RDX:  RSI: 81445d97 RDI: 0005
RBP: 8801d85ffa40 R08: 8801d78c40c0 R09: 
R10:  R11:  R12: c9000194e002
R13: 8801d85ffa18 R14: fff4 R15: 0003
 bpf_prog_select_runtime+0x131/0x640 kernel/bpf/core.c:1541
 bpf_prog_load+0x16c2/0x2070 kernel/bpf/syscall.c:1333
 __do_sys_bpf kernel/bpf/syscall.c:2073 [inline]
 __se_sys_bpf kernel/bpf/syscall.c:2035 [inline]
 __x64_sys_bpf+0x389/0x4c0 kernel/bpf/syscall.c:2035
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x455a09
RSP: 002b:7ffec3da2868 EFLAGS: 0246 ORIG_RAX: 0141
RAX: ffda RBX: 02542914 RCX: 00455a09
RDX: 0048 RSI: 2240 RDI: 0005
RBP: 0072bea0 R08:  R09: 
R10:  R11: 0246 R12: 0003
R13: 0046 R14: 006f4730 R15: 0023
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

WARNING in bpf_int_jit_compile

2018-05-26 Thread syzbot


Hello,

syzbot found the following crash on:

HEAD commit:203ec2fed17a Merge tag 'armsoc-fixes' of git://git.kernel...
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=14f0d5a780
kernel config:  https://syzkaller.appspot.com/x/.config?x=f3b4e30da84ec1ed
dashboard link: https://syzkaller.appspot.com/bug?extid=9e762b52dd17e616a7a5
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+9e762b52dd17e616a...@syzkaller.appspotmail.com

RAX: ffda RBX: 7f9da107d6d4 RCX: 00455a09
RDX: 0048 RSI: 2000e000 RDI: 0005
RBP: 0072bea0 R08:  R09: 
R10:  R11: 0246 R12: 0014
R13: 0046 R14: 006f4730 R15: 0021
WARNING: CPU: 0 PID: 20757 at include/linux/filter.h:667  
bpf_jit_binary_lock_ro include/linux/filter.h:667 [inline]
WARNING: CPU: 0 PID: 20757 at include/linux/filter.h:667  
bpf_int_jit_compile+0xbf7/0xef7 arch/x86/net/bpf_jit_comp.c:1271

Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 20757 Comm: syz-executor6 Not tainted 4.17.0-rc5+ #60
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 panic+0x22f/0x4de kernel/panic.c:184
 __warn.cold.8+0x163/0x1b3 kernel/panic.c:536
 report_bug+0x252/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:178 [inline]
 do_error_trap+0x1de/0x490 arch/x86/kernel/traps.c:296
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
RIP: 0010:bpf_jit_binary_lock_ro include/linux/filter.h:667 [inline]
RIP: 0010:bpf_int_jit_compile+0xbf7/0xef7 arch/x86/net/bpf_jit_comp.c:1271
RSP: 0018:8801b3fbf920 EFLAGS: 00010246
RAX: 0004 RBX: 0047 RCX: c900050da000
RDX: 0004 RSI: 81444d37 RDI: 0005
RBP: 8801b3fbfa40 R08: 8801b4c18040 R09: 
R10:  R11:  R12: c90001932002
R13: 8801b3fbfa18 R14: fff4 R15: 0003
 bpf_prog_select_runtime+0x131/0x640 kernel/bpf/core.c:1491
 bpf_prog_load+0x16c2/0x2070 kernel/bpf/syscall.c:1333
 __do_sys_bpf kernel/bpf/syscall.c:2073 [inline]
 __se_sys_bpf kernel/bpf/syscall.c:2035 [inline]
 __x64_sys_bpf+0x389/0x4c0 kernel/bpf/syscall.c:2035
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x455a09
RSP: 002b:7f9da107cc68 EFLAGS: 0246 ORIG_RAX: 0141
RAX: ffda RBX: 7f9da107d6d4 RCX: 00455a09
RDX: 0048 RSI: 2000e000 RDI: 0005
RBP: 0072bea0 R08:  R09: 
R10:  R11: 0246 R12: 0014
R13: 0046 R14: 006f4730 R15: 0021
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.

general protection fault in sock_do_ioctl

2018-05-26 Thread syzbot


Hello,

syzbot found the following crash on:

HEAD commit:62c8a069b510 net: mvpp2: Add missing VLAN tag detection
git tree:   net-next
console output: https://syzkaller.appspot.com/x/log.txt?x=10ad582780
kernel config:  https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
dashboard link: https://syzkaller.appspot.com/bug?extid=09b980aff7b322aac68d
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+09b980aff7b322aac...@syzkaller.appspotmail.com

 __sys_sendmsg+0x115/0x270 net/socket.c:2155
kasan: CONFIG_KASAN_INLINE enabled
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2162
kasan: GPF could be caused by NULL-ptr deref or user memory access
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Modules linked in:
RIP: 0033:0x455a09
RSP: 002b:7f7f8526bc68 EFLAGS: 0246 ORIG_RAX: 002e
CPU: 0 PID: 8176 Comm: syz-executor2 Not tainted 4.17.0-rc4+ #53
RAX: ffda RBX: 7f7f8526c6d4 RCX: 00455a09
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RDX:  RSI: 200019c0 RDI: 0013
RIP: 0010:smc_tx_prepared_sends net/smc/smc_tx.h:27 [inline]
RIP: 0010:smc_ioctl+0x6db/0x9f0 net/smc/af_smc.c:1506
RBP: 0072bea0 R08:  R09: 
RSP: 0018:8801afe4f770 EFLAGS: 00010202
R10:  R11: 0246 R12: 0014
R13: 059b R14: 006fc728 R15: 0005
RAX: dc00 RBX:  RCX: dc00
RDX: 0004 RSI: 110035fc9f0d RDI: 0020
RBP: 8801afe4f9d0 R08: ed0035fc9f0e R09: ed0035fc9f0d
R10: ed0035fc9f0d R11: 8801afe4f86f R12: 110035fc9ef1
R13: 23c0 R14: 8801afe4f868 R15: 8801afe4f828
FS:  7f6710832700() GS:8801dae0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 007270dc CR3: 0001c83ae000 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 sock_do_ioctl+0xe4/0x3e0 net/socket.c:957
 sock_ioctl+0x30d/0x680 net/socket.c:1081
 vfs_ioctl fs/ioctl.c:46 [inline]
 file_ioctl fs/ioctl.c:500 [inline]
 do_vfs_ioctl+0x1cf/0x16a0 fs/ioctl.c:684
 ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701
 __do_sys_ioctl fs/ioctl.c:708 [inline]
 __se_sys_ioctl fs/ioctl.c:706 [inline]
 __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
FAULT_INJECTION: forcing a failure.
name failslab, interval 1, probability 0, space 0, times 0
CPU: 1 PID: 8189 Comm: syz-executor5 Not tainted 4.17.0-rc4+ #53
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Call Trace:
RIP: 0033:0x455a09
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
RSP: 002b:7f6710831c68 EFLAGS: 0246
 ORIG_RAX: 0010
RAX: ffda RBX: 7f67108326d4 RCX: 00455a09
 fail_dump lib/fault-inject.c:51 [inline]
 should_fail.cold.4+0xa/0x1a lib/fault-inject.c:149
RDX: 23c0 RSI: 894b RDI: 0013
RBP: 0072bea0 R08:  R09: 
R10:  R11: 0246 R12: 
R13: 044c R14: 006fa7c0 R15: 
Code:
f8
48
c1
e8
03
80
3c
10
00
0f
85
ed
01
00
00
48
8b
9b
 __should_failslab+0x124/0x180 mm/failslab.c:32
90
 should_failslab+0x9/0x14 mm/slab_common.c:1522
04
 slab_pre_alloc_hook mm/slab.h:423 [inline]
 slab_alloc mm/slab.c:3378 [inline]
 kmem_cache_alloc+0x47/0x760 mm/slab.c:3552
00
00
48
 kmem_cache_zalloc include/linux/slab.h:691 [inline]
 fill_pool lib/debugobjects.c:134 [inline]
 __debug_object_init+0xbc0/0x12c0 lib/debugobjects.c:377
b8
00
00
00
00
00 fc
ff
df
48
8d
7b
20
48
89
fa
48
c1
ea
03
<0f>
b6
04
02
84
c0
74
08
3c
03
 debug_object_init+0x16/0x20 lib/debugobjects.c:429
0f
 debug_timer_init kernel/time/timer.c:704 [inline]
 debug_init kernel/time/timer.c:757 [inline]
 init_timer_key+0xa1/0x470 kernel/time/timer.c:806
8e
b7
01
00
00
 sctp_association_init net/sctp/associola.c:152 [inline]
 sctp_association_new+0xa90/0x2170 net/sctp/associola.c:312
8b
43
20
49
8d
RIP: smc_tx_prepared_sends net/smc/smc_tx.h:27 [inline] RSP:  
8801afe4f770

RIP: smc_ioctl+0x6db/0x9f0 net/smc/af_smc.c:1506 RSP: 8801afe4f770
---[ end trace ed404e46621ff58c ]---


---
This bug is generated by a bot. It may contain errors.
See

general protection fault in bpf_tcp_close

2018-05-26 Thread syzbot


Hello,

syzbot found the following crash on:

HEAD commit:fd0bfa8d6e04 Merge branch 'bpf-af-xdp-cleanups'
git tree:   bpf-next
console output: https://syzkaller.appspot.com/x/log.txt?x=11da942780
kernel config:  https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
dashboard link: https://syzkaller.appspot.com/bug?extid=0ce137753c78f7b6acc1
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+0ce137753c78f7b6a...@syzkaller.appspotmail.com

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 12139 Comm: syz-executor2 Not tainted 4.17.0-rc4+ #17
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:__hlist_del include/linux/list.h:649 [inline]
RIP: 0010:hlist_del_rcu include/linux/rculist.h:427 [inline]
RIP: 0010:bpf_tcp_close+0x7d2/0xf80 kernel/bpf/sockmap.c:271
RSP: 0018:8801a8f8ef70 EFLAGS: 00010a02
RAX: ed00351f1dfd RBX: dc00 RCX: dead0200
RDX:  RSI: 1bd5a040 RDI: 8801cb710910
RBP: 8801a8f8f110 R08: ed003350ac9d R09: ed003350ac9c
R10: ed003350ac9c R11: 88019a8564e3 R12: 8801cb710380
R13: 8801b17ea6e0 R14: 8801cb710398 R15: 8801cb710900
FS:  7f9890c43700() GS:8801dae0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fde1a668000 CR3: 00019dca2000 CR4: 001406f0
DR0: 21c0 DR1: 21c0 DR2: 
DR3:  DR6: fffe0ff0 DR7: 0600
Call Trace:
 inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
 sock_release+0x96/0x1b0 net/socket.c:594
 sock_close+0x16/0x20 net/socket.c:1149
 __fput+0x34d/0x890 fs/file_table.c:209
 fput+0x15/0x20 fs/file_table.c:243
 task_work_run+0x1e4/0x290 kernel/task_work.c:113
 exit_task_work include/linux/task_work.h:22 [inline]
 do_exit+0x1aee/0x2730 kernel/exit.c:865
 do_group_exit+0x16f/0x430 kernel/exit.c:968
 get_signal+0x886/0x1960 kernel/signal.c:2469
 do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810
 exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162
 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
 do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x455a09
RSP: 002b:7f9890c42ce8 EFLAGS: 0246 ORIG_RAX: 00ca
RAX: fe00 RBX: 0072bec8 RCX: 00455a09
RDX:  RSI:  RDI: 0072bec8
RBP: 0072bec8 R08:  R09: 0072bea0
R10:  R11: 0246 R12: 
R13: 7ffcb48ac3ff R14: 7f9890c439c0 R15: 
Code: ff 48 c1 e9 03 80 3c 19 00 0f 85 a9 05 00 00 49 8b 4f 18 48 8b 85 98  
fe ff ff 48 89 ce c6 00 00 48 c1 ee 03 48 89 95 d8 fe ff ff <80> 3c 1e 00  
0f 85 c6 05 00 00 48 8b 85 98 fe ff ff 48 85 d2 48

RIP: __hlist_del include/linux/list.h:649 [inline] RSP: 8801a8f8ef70
RIP: hlist_del_rcu include/linux/rculist.h:427 [inline] RSP:  
8801a8f8ef70
RIP: bpf_tcp_close+0x7d2/0xf80 kernel/bpf/sockmap.c:271 RSP:  
8801a8f8ef70

---[ end trace e81227e93c7e7b75 ]---


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.

Re: [PATCH 1/2] batman-adv: Remove "default n" in Kconfig

2018-05-26 Thread Sergei Shtylyov


On 5/25/2018 10:48 PM, Sven Eckelmann wrote:


The "default n" is the default value for any bool or tristate Kconfig
setting. It is therefore not necessary to add it to the an config entry.

  ^^
   One article would be enough. And it's "a", not "an" in this case. :-)


Reported-by: Sergei Shtylyov 
Signed-off-by: Sven Eckelmann 

[...]

MBR, Sergei

KASAN: use-after-free Read in bpf_tcp_close

2018-05-26 Thread syzbot


Hello,

syzbot found the following crash on:

HEAD commit:3fb48d881dbe Merge branch 'bpf-fib-mtu-check'
git tree:   bpf-next
console output: https://syzkaller.appspot.com/x/log.txt?x=15fc197780
kernel config:  https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
dashboard link: https://syzkaller.appspot.com/bug?extid=fce8f2462c403d02af98
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=1310c85780
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=17de717780

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+fce8f2462c403d02a...@syzkaller.appspotmail.com

==
BUG: KASAN: use-after-free in hlist_del_rcu include/linux/rculist.h:427  
[inline]
BUG: KASAN: use-after-free in bpf_tcp_close+0xd7f/0xf80  
kernel/bpf/sockmap.c:271

Read of size 8 at addr 8801c884cf90 by task syz-executor330/11778

CPU: 1 PID: 11778 Comm: syz-executor330 Not tainted 4.17.0-rc4+ #18
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 print_address_description+0x6c/0x20b mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 hlist_del_rcu include/linux/rculist.h:427 [inline]
 bpf_tcp_close+0xd7f/0xf80 kernel/bpf/sockmap.c:271
 inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
 sock_release+0x96/0x1b0 net/socket.c:594
 sock_close+0x16/0x20 net/socket.c:1149
 __fput+0x34d/0x890 fs/file_table.c:209
 fput+0x15/0x20 fs/file_table.c:243
 task_work_run+0x1e4/0x290 kernel/task_work.c:113
 exit_task_work include/linux/task_work.h:22 [inline]
 do_exit+0x1aee/0x2730 kernel/exit.c:865
 do_group_exit+0x16f/0x430 kernel/exit.c:968
 get_signal+0x886/0x1960 kernel/signal.c:2469
 do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810
 exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162
 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
 do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x445ed9
RSP: 002b:7f0078c0adb8 EFLAGS: 0246 ORIG_RAX: 00ca
RAX: fe00 RBX: 006dbc24 RCX: 00445ed9
RDX:  RSI:  RDI: 006dbc24
RBP: 006dbc20 R08:  R09: 
R10:  R11: 0246 R12: 
R13: 7ffcd147dbef R14: 7f0078c0b9c0 R15: 0007

Allocated by task 11787:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
 __do_kmalloc_node mm/slab.c:3682 [inline]
 __kmalloc_node+0x47/0x70 mm/slab.c:3689
 kmalloc_node include/linux/slab.h:554 [inline]
 alloc_sock_hash_elem kernel/bpf/sockmap.c:2114 [inline]
 sock_hash_ctx_update_elem.isra.23+0xa57/0x1560 kernel/bpf/sockmap.c:2245
 sock_hash_update_elem+0x14f/0x2d0 kernel/bpf/sockmap.c:2303
 map_update_elem+0x5c4/0xc90 kernel/bpf/syscall.c:760
 __do_sys_bpf kernel/bpf/syscall.c:2134 [inline]
 __se_sys_bpf kernel/bpf/syscall.c:2105 [inline]
 __x64_sys_bpf+0x32a/0x4f0 kernel/bpf/syscall.c:2105
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 8998:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
 __cache_free mm/slab.c:3498 [inline]
 kfree+0xd9/0x260 mm/slab.c:3813
 sock_hash_free+0x24e/0x6e0 kernel/bpf/sockmap.c:2093
 bpf_map_free_deferred+0xba/0xf0 kernel/bpf/syscall.c:259
 process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
 worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
 kthread+0x345/0x410 kernel/kthread.c:238
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412

The buggy address belongs to the object at 8801c884cf80
 which belongs to the cache kmalloc-64 of size 64
The buggy address is located 16 bytes inside of
 64-byte region [8801c884cf80, 8801c884cfc0)
The buggy address belongs to the page:
page:ea0007221300 count:1 mapcount:0 mapping:8801c884c000 index:0x0
flags: 0x2fffc000100(slab)
raw: 02fffc000100 8801c884c000  00010020
raw: ea00072e08e0 ea0006e99660 8801da800340 
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8801c884ce80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
 8801c884cf00: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc

8801c884cf80: fb fb fb fb fb fb fb fb

Re: [bpf-next PATCH] bpf: sockhash fix race with bpf_tcp_close and map delete

2018-05-26 Thread Daniel Borkmann

Hi John,

On 05/25/2018 07:37 PM, John Fastabend wrote:
> syzbot reported two related splats, a use after free and null
> pointer dereference, when a TCP socket is closed while the map is
> also being removed.
> 
> The psock keeps a reference to all map slots that have a reference
> to the sock so that when the sock is closed we can clean up any
> outstanding sock{map|hash} entries. This avoids pinning a sock
> forever if the map owner fails to do proper cleanup. However, the
> result is we have two paths that can free an entry in the map. Even
> the comment in the sock{map|hash} tear down function, sock_hash_free()
> notes this:
> 
>  At this point no update, lookup or delete operations can happen.
>  However, be aware we can still get a socket state event updates,
>  and data ready callbacks that reference the psock from sk_user_data.
> 
> Both removal paths omitted taking the hash bucket lock resulting
> in the case where we have two references that are in the process
> of being free'd.
> 
> Reported-by: syzbot+a761b81c211794fa1...@syzkaller.appspotmail.com
> Signed-off-by: John Fastabend 

Could you also shortly reply with a Fixes: tag so we can track all
fixes for the original commit.

Thanks,
Daniel

P.s.: still waiting on net-next to get fast-forwarded, then I'll
fast-forward bpf-next and process the queue.

Re: WARNING: ODEBUG bug in __sk_destruct

2018-05-26 Thread syzbot


syzbot has found a reproducer for the following crash on:

HEAD commit:e52cde717093 net: dsa: dsa_loop: Make dynamic debugging he..
git tree:   net-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1424a4b780
kernel config:  https://syzkaller.appspot.com/x/.config?x=e4078980b886800c
dashboard link: https://syzkaller.appspot.com/bug?extid=92209502e7aab127c75f
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=1071bc2f80
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=16b51cb780

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+92209502e7aab127c...@syzkaller.appspotmail.com

[ cut here ]
ODEBUG: free active (active state 0) object type: work_struct hint:  
smc_tx_work+0x0/0x350 include/linux/compiler.h:188
WARNING: CPU: 0 PID: 5254 at lib/debugobjects.c:329  
debug_print_object+0x16a/0x210 lib/debugobjects.c:326

Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 5254 Comm: syz-executor351 Not tainted 4.17.0-rc6+ #64
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1b9/0x294 lib/dump_stack.c:113
 panic+0x22f/0x4de kernel/panic.c:184
 __warn.cold.8+0x163/0x1b3 kernel/panic.c:536
 report_bug+0x252/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:178 [inline]
 do_error_trap+0x1de/0x490 arch/x86/kernel/traps.c:296
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
RIP: 0010:debug_print_object+0x16a/0x210 lib/debugobjects.c:326
RSP: 0018:8801c6f67158 EFLAGS: 00010082
RAX: 0059 RBX: 0003 RCX: 818435f8
RDX:  RSI: 8160f2c1 RDI: 0001
RBP: 8801c6f67198 R08: 8801cb640580 R09: ed003b5c3eb2
R10: ed003b5c3eb2 R11: 8801dae1f597 R12: 0001
R13: 88d5f040 R14: 87fa2a00 R15: 814ccb10
 __debug_check_no_obj_freed lib/debugobjects.c:783 [inline]
 debug_check_no_obj_freed+0x3a6/0x584 lib/debugobjects.c:815
 kmem_cache_free+0x216/0x2d0 mm/slab.c:3755
 sk_prot_free net/core/sock.c:1516 [inline]
 __sk_destruct+0x6fe/0xa40 net/core/sock.c:1600
 sk_destruct+0x78/0x90 net/core/sock.c:1608
 __sk_free+0xcf/0x300 net/core/sock.c:1619
 sk_free+0x42/0x50 net/core/sock.c:1630
 sock_put include/net/sock.h:1669 [inline]
 smc_release+0x459/0x610 net/smc/af_smc.c:156
 sock_release+0x96/0x1b0 net/socket.c:594
 sock_close+0x16/0x20 net/socket.c:1149
 __fput+0x34d/0x890 fs/file_table.c:209
 fput+0x15/0x20 fs/file_table.c:243
 task_work_run+0x1e4/0x290 kernel/task_work.c:113
 exit_task_work include/linux/task_work.h:22 [inline]
 do_exit+0x1aee/0x2730 kernel/exit.c:865
 do_group_exit+0x16f/0x430 kernel/exit.c:968
 __do_sys_exit_group kernel/exit.c:979 [inline]
 __se_sys_exit_group kernel/exit.c:977 [inline]
 __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4424f9
RSP: 002b:7ffcbea55c78 EFLAGS: 0202 ORIG_RAX: 00e7
RAX: ffda RBX: 02c0 RCX: 004424f9
RDX: 004424f9 RSI: 0001 RDI: 
RBP: 7ffcbea55db0 R08: 0003 R09: 7ffcbea55cc0
R10: 0004 R11: 0202 R12: 
R13:  R14: 1380 R15: 7ffcbea55dd8

==
WARNING: possible circular locking dependency detected
4.17.0-rc6+ #64 Not tainted
--
syz-executor351/5254 is trying to acquire lock:
(ptrval) ((console_sem).lock){-...}, at: down_trylock+0x13/0x70  
kernel/locking/semaphore.c:136


but task is already holding lock:
(ptrval) (_hash[i].lock){-.-.}, at: __debug_check_no_obj_freed  
lib/debugobjects.c:774 [inline]
(ptrval) (_hash[i].lock){-.-.}, at:  
debug_check_no_obj_freed+0x159/0x584 lib/debugobjects.c:815


which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (_hash[i].lock){-.-.}:
   __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
   _raw_spin_lock_irqsave+0x96/0xc0 kernel/locking/spinlock.c:152
   __debug_object_init+0x11f/0x12c0 lib/debugobjects.c:381
   debug_object_init+0x16/0x20 lib/debugobjects.c:429
   debug_hrtimer_init kernel/time/hrtimer.c:410 [inline]
   debug_init kernel/time/hrtimer.c:458 [inline]
   hrtimer_init+0x8f/0x460 kernel/time/hrtimer.c:1308
   init_dl_task_timer+0x1b/0x50 kernel/sched/deadline.c:1056
   __sched_fork+0x2ae/0xc20 kernel/sched/core.c:2166
   init_idle+0x75/0x7a0 kernel/sched/core.c:5402
   sched_init+0xbeb/0xd10 kernel/sched/core.c:6100
   start_kernel+0x475/0x92d

Re: [PATCH net-next v12 2/5] netvsc: refactor notifier/event handling code to use the failover framework

2018-05-26 Thread Jiri Pirko

Sat, May 26, 2018 at 09:22:18AM CEST, sridhar.samudr...@intel.com wrote:
>On 5/25/2018 4:28 PM, Stephen Hemminger wrote:
>> On Fri, 25 May 2018 16:11:47 -0700
>> "Samudrala, Sridhar"  wrote:
>> 
>> > On 5/25/2018 3:34 PM, Stephen Hemminger wrote:
>> > > On Thu, 24 May 2018 09:55:14 -0700
>> > > Sridhar Samudrala  wrote:
>> > > > --- a/drivers/net/hyperv/Kconfig
>> > > > +++ b/drivers/net/hyperv/Kconfig
>> > > > @@ -2,5 +2,6 @@ config HYPERV_NET
>> > > >tristate "Microsoft Hyper-V virtual network driver"
>> > > >depends on HYPERV
>> > > >select UCS2_STRING
>> > > > +  select FAILOVER
>> > > When I take a working kernel config, add the patches then do
>> > > make oldconfig
>> > > 
>> > > It is not autoselecting FAILOVER, it prompts me for it. This means
>> > > if user says no then a non-working netvsc device is made.
>> > I see
>> >  Generic failover module (FAILOVER) [M/y/?] (NEW)
>> > 
>> > So the user is given an option to either build as a Module or part of the
>> > kernel. 'n' is not an option.
>> With most libraries there is no prompt at all.
>
>Not sure what you meant by this.
>Without any patches applied, i had a .config file with HYPERV_NET configured
>as a module.
>Then after applying the first 2 patches in this series, i did a
>  make oldconfig
>and i see the above prompt.
>
>Are you saying that on some distros, 'make oldconfig creates a .config
>file without any prompt and FAILOVER is not getting selected even when 
>HYPERV_NET
>is enabled?
>
>

Well the thing is that for a user, it makes no sense to select
"FAILOVER" by hand. It is a lib, so it should be only select it by a
user. It has no sense to have it turned on by hand - no lib user.
You can achieve that by simply removing "help" for the Kconfig
item. Same thing for "NET_FAILOVER".

Re: [PATCH net-next v12 1/5] net: Introduce generic failover module

2018-05-26 Thread Jiri Pirko

Sat, May 26, 2018 at 12:37:44AM CEST, step...@networkplumber.org wrote:
>On Thu, 24 May 2018 09:55:13 -0700
>Sridhar Samudrala  wrote:
>
>
>> +spin_lock(_lock);
>
>Since register is not in fast path, this should be a mutex?

I don't get it. Why would you prefer mutex over spinlock here?

Re: [PATCH net-next] bpfilter: fix a build err

2018-05-26 Thread Yafang Shao

On Sat, May 26, 2018 at 10:25 AM, YueHaibing  wrote:
> On 2018/5/26 0:19, Alexei Starovoitov wrote:
>> On Fri, May 25, 2018 at 06:17:57PM +0800, YueHaibing wrote:
>>> gcc-7.3.0 report following err:
>>>
>>>   HOSTCC  net/bpfilter/main.o
>>> In file included from net/bpfilter/main.c:9:0:
>>> ./include/uapi/linux/bpf.h:12:10: fatal error: linux/bpf_common.h: No such 
>>> file or directory
>>>  #include 
>>>
>>> remove it by adding a include path.
>>> Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
>>>
>>> Signed-off-by: YueHaibing 
>>> ---
>>>  net/bpfilter/Makefile | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
>>> index 2af752c..3f3cb87 100644
>>> --- a/net/bpfilter/Makefile
>>> +++ b/net/bpfilter/Makefile
>>> @@ -5,7 +5,7 @@
>>>
>>>  hostprogs-y := bpfilter_umh
>>>  bpfilter_umh-objs := main.o
>>> -HOSTCFLAGS += -I. -Itools/include/
>>> +HOSTCFLAGS += -I. -Itools/include/ -Itools/include/uapi
>>
>> Strangely I don't see this error with gcc 7.3
>> I've tried this patch and it doesn't hurt,
>> but before it gets applied could you please try
>> the top two patches from this tree:
>> https://git.kernel.org/pub/scm/linux/kernel/git/ast/bpf.git/?h=ipt_bpf
>> in your environment?
>> These two patches add the actual meat of bpfilter and I'd like
>> to make sure the build setup is good for everyone before
>> we proceed too far.
>
> after applied these two patches on net-next, the err still here:
>  bpfilter: rough bpfilter codegen example hack
>  bpfilter: add iptable get/set parsing
>
>   HOSTCC  net/bpfilter/main.o
> In file included from net/bpfilter/main.c:13:0:
> ./include/uapi/linux/bpf.h:12:10: fatal error: linux/bpf_common.h: No such 
> file or directory
>  #include 
>   ^~~~
> compilation terminated.
> make[2]: *** [net/bpfilter/main.o] Error 1
> make[1]: *** [net/bpfilter] Error 2
> make: *** [net] Error 2
>
> Also I compile your tree, error is same
>
> my gcc version info as follow:
> [root@localhost net-next]# gcc -v
> Using built-in specs.
> COLLECT_GCC=gcc
> COLLECT_LTO_WRAPPER=/home/yuehb/gcc-7.3.0-tools/libexec/gcc/x86_64-pc-linux-gnu/7.3.0/lto-wrapper
> Target: x86_64-pc-linux-gnu
> Configured with: ../gcc-7.3.0/configure --enable-checking=release 
> --enable-languages=c,c++
> --disable-multilib --prefix=/home/yuehb/gcc-7.3.0-tools
> Thread model: posix
> gcc version 7.3.0 (GCC)
>
>>


This error also occurs on gcc-4.8.5.
After applied Haibin's patch, this build error disapears.

Thanks
Yafang

Re: [PATCH net-next v12 2/5] netvsc: refactor notifier/event handling code to use the failover framework

2018-05-26 Thread Samudrala, Sridhar


On 5/25/2018 4:28 PM, Stephen Hemminger wrote:

On Fri, 25 May 2018 16:11:47 -0700
"Samudrala, Sridhar"  wrote:


On 5/25/2018 3:34 PM, Stephen Hemminger wrote:

On Thu, 24 May 2018 09:55:14 -0700
Sridhar Samudrala  wrote:
  

--- a/drivers/net/hyperv/Kconfig
+++ b/drivers/net/hyperv/Kconfig
@@ -2,5 +2,6 @@ config HYPERV_NET
tristate "Microsoft Hyper-V virtual network driver"
depends on HYPERV
select UCS2_STRING
+   select FAILOVER

When I take a working kernel config, add the patches then do
make oldconfig

It is not autoselecting FAILOVER, it prompts me for it. This means
if user says no then a non-working netvsc device is made.

I see
 Generic failover module (FAILOVER) [M/y/?] (NEW)

So the user is given an option to either build as a Module or part of the
kernel. 'n' is not an option.

With most libraries there is no prompt at all.


Not sure what you meant by this.
Without any patches applied, i had a .config file with HYPERV_NET configured
as a module.
Then after applying the first 2 patches in this series, i did a
  make oldconfig
and i see the above prompt.

Are you saying that on some distros, 'make oldconfig creates a .config
file without any prompt and FAILOVER is not getting selected even when 
HYPERV_NET
is enabled?

Re: aio poll and a new in-kernel poll API V13

2018-05-26 Thread Christoph Hellwig

I'm still waking up..

Re: aio poll and a new in-kernel poll API V13

2018-05-26 Thread Al Viro

On Sat, May 26, 2018 at 01:11:11AM +0100, Al Viro wrote:
> On Wed, May 23, 2018 at 09:19:49PM +0200, Christoph Hellwig wrote:
> > Hi all,
> > 
> > this series adds support for the IOCB_CMD_POLL operation to poll for the
> > readyness of file descriptors using the aio subsystem.  The API is based
> > on patches that existed in RHAS2.1 and RHEL3, which means it already is
> > supported by libaio.  To implement the poll support efficiently new
> > methods to poll are introduced in struct file_operations:  get_poll_head
> > and poll_mask.  The first one returns a wait_queue_head to wait on
> > (lifetime is bound by the file), and the second does a non-blocking
> > check for the POLL* events.  This allows aio poll to work without
> > any additional context switches, unlike epoll.
> > 
> > This series sits on top of the aio-fsync series that also includes
> > support for io_pgetevents.
> 
> OK, I can live with that, except for one problem - the first patch shouldn't
> be sitting on top of arseloads of next window fodder.
> 
> Please, rebase the rest of the series on top of merge of vfs.git#fixes
> (4faa99965e02) with your aio-fsync.4 and tell me what to pull.

UGH

You've based it on vfs.git#hch.aio (== your aio-fsync.4) + baf10564fbb6
(== vfs.git#fixes^), *and* started with cherry-pick of vfs.git#fixes
on top of that, followed by your series.

That makes no sense whatsoever.  Please, take your aio-fsync.4, merge
vfs.git#fixes (== 4faa99965e02, "fix io_destroy()/aio_complete() race",
same change as your 4e79230e5254) into it and rebase the rest of your
branch on top of that (from "uapi: turn __poll_t sparse checkin
on by default" to "random: convert to ->poll_mask").  BTW, you probably
want s/checkin/checks/ in the first one of those...

Re: [PATCH] net: netsec: reduce DMA mask to 40 bits

2018-05-26 Thread Ard Biesheuvel

On 26 May 2018 at 05:44, Jassi Brar  wrote:
> On 26 May 2018 at 08:56, Jassi Brar  wrote:
>> On 26 May 2018 at 01:07, Robin Murphy  wrote:
>>> On Sat, 26 May 2018 00:33:05 +0530
>>> Jassi Brar  wrote:
>>>
 On 25 May 2018 at 18:20, Ard Biesheuvel 
 wrote:
 > The netsec network controller IP can drive 64 address bits for DMA,
 > and the DMA mask is set accordingly in the driver. However, the
 > SynQuacer SoC, which is the only silicon incorporating this IP at
 > the moment, integrates this IP in a manner that leaves address bits
 > [63:40] unconnected.
 >
 > Up until now, this has not resulted in any problems, given that the
 > DDR controller doesn't decode those bits to begin with. However,
 > recent firmware updates for platforms incorporating this SoC allow
 > the IOMMU to be enabled, which does decode address bits [47:40],
 > and allocates top down from the IOVA space, producing DMA addresses
 > that have bits set that have been left unconnected.
 >
 > Both the DT and ACPI (IORT) descriptions of the platform take this
 > into account, and only describe a DMA address space of 40 bits
 > (using either dma-ranges DT properties, or DMA address limits in
 > IORT named component nodes). However, even though our IOMMU and bus
 > layers may take such limitations into account by setting a narrower
 > DMA mask when creating the platform device, the netsec probe()
 > entrypoint follows the common practice of setting the DMA mask
 > uncondionally, according to the capabilities of the IP block itself
 > rather than to its integration into the chip.
 >
 > It is currently unclear what the correct fix is here. We could hack
 > around it by only setting the DMA mask if it deviates from its
 > default value of DMA_BIT_MASK(32). However, this makes it
 > impossible for the bus layer to use DMA_BIT_MASK(32) as the bus
 > limit, and so it appears that a more comprehensive approach is
 > required to take DMA limits imposed by the SoC as a whole into
 > account.
 >
 > In the mean time, let's limit the DMA mask to 40 bits. Given that
 > there is currently only one SoC that incorporates this IP, this is
 > a reasonable approach that can be backported to -stable and buys us
 > some time to come up with a proper fix going forward.
 >
 I am sure you already thought about it, but why not let the platform
 specify the bit mask for the driver (via some "bus-width" property),
 to override the default 64 bit mask?
>>>
>>> Because lack of a property to describe the integration is not the
>>> problem. There are already at least two ways: the general DT/IORT
>>> properties for describing DMA addressing - which it would be a bit
>>> ungainly for a driver to parse for this reason, but not impossible -
>> 
>>
>>
>>> and inferring it from a SoC-specific compatible - which is more
>>> appropriate, and what we happen to be able to do here.
>>>
>> Sorry, I am not sure I follow. This patch changes from 64-bits default
>> to 40-bits capability without checking for the parent SoC. If the next
>> generation implements the full 64-bit or just 32-bit bus, we'll be
>> back in the pit again. No?
>>
> Probably you meant we'll change the ethernet compatible string for
> differently capable SoC. OK, but here it is more of integration issue
> than controller version.
>
> Which makes me realise the extant compatible property for netsec is
> not so correct (it embeds the platform name). So I am ok either way.
>

The platform in question has a dma-ranges DT property at the root
level that only describes 40 bits' worth of DMA. Also, the ACPI
description in the IORT table of the IOMMU integration of the netsec
controller limits DMA to 40 bits. In the latter case, we actually
enter netsec_probe() with the correct value already assigned to the
DMA mask fields. (In the former case, the DMA limit is ignored
entirely)

In other words, we can already describe these SoC limitations and
distinguish them from device limitations. The problem is that drivers
ignore the existing values of DMA mask.

Robin has volunteered to look into fixing this, but this cannot be
done in a way that is suitable for -stable. In the mean time, we have
a single platform using this network IP in the field that cannot
upgrade its firmware to a version that describes the IOMMU, because
the existing DMA layer code will start driving address bits that are
correctly described as unconnected by the DT/ACPI tables.

So as a a workaround, until Robin fixes things properly, let's reduce
the DMA mask to 40 bits.

Re: [bpf-next PATCH] bpf: sockhash fix race with bpf_tcp_close and map delete

2018-05-26 Thread Song Liu

On Fri, May 25, 2018 at 10:37 AM, John Fastabend
 wrote:
> syzbot reported two related splats, a use after free and null
> pointer dereference, when a TCP socket is closed while the map is
> also being removed.
>
> The psock keeps a reference to all map slots that have a reference
> to the sock so that when the sock is closed we can clean up any
> outstanding sock{map|hash} entries. This avoids pinning a sock
> forever if the map owner fails to do proper cleanup. However, the
> result is we have two paths that can free an entry in the map. Even
> the comment in the sock{map|hash} tear down function, sock_hash_free()
> notes this:
>
>  At this point no update, lookup or delete operations can happen.
>  However, be aware we can still get a socket state event updates,
>  and data ready callbacks that reference the psock from sk_user_data.
>
> Both removal paths omitted taking the hash bucket lock resulting
> in the case where we have two references that are in the process
> of being free'd.
>
> Reported-by: syzbot+a761b81c211794fa1...@syzkaller.appspotmail.com
> Signed-off-by: John Fastabend 

Acked-by: Song Liu 

> ---
>  kernel/bpf/sockmap.c |   33 +
>  1 file changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index 52a91d8..b508141f 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -225,6 +225,16 @@ static void free_htab_elem(struct bpf_htab *htab, struct 
> htab_elem *l)
> kfree_rcu(l, rcu);
>  }
>
> +static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
> +{
> +   return >buckets[hash & (htab->n_buckets - 1)];
> +}
> +
> +static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 
> hash)
> +{
> +   return &__select_bucket(htab, hash)->head;
> +}
> +
>  static void bpf_tcp_close(struct sock *sk, long timeout)
>  {
> void (*close_fun)(struct sock *sk, long timeout);
> @@ -268,9 +278,15 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
> smap_release_sock(psock, sk);
> }
> } else {
> +   u32 hash = e->hash_link->hash;
> +   struct bucket *b;
> +
> +   b = __select_bucket(e->htab, hash);
> +   raw_spin_lock_bh(>lock);
> hlist_del_rcu(>hash_link->hash_node);
> smap_release_sock(psock, e->hash_link->sk);
> free_htab_elem(e->htab, e->hash_link);
> +   raw_spin_unlock_bh(>lock);
> }
> }
> write_unlock_bh(>sk_callback_lock);
> @@ -2043,16 +2059,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr 
> *attr)
> return ERR_PTR(err);
>  }
>
> -static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
> -{
> -   return >buckets[hash & (htab->n_buckets - 1)];
> -}
> -
> -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 
> hash)
> -{
> -   return &__select_bucket(htab, hash)->head;
> -}
> -
>  static void sock_hash_free(struct bpf_map *map)
>  {
> struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
> @@ -2069,10 +2075,12 @@ static void sock_hash_free(struct bpf_map *map)
>  */
> rcu_read_lock();
> for (i = 0; i < htab->n_buckets; i++) {
> -   struct hlist_head *head = select_bucket(htab, i);
> +   struct bucket *b = __select_bucket(htab, i);
> +   struct hlist_head *head = >head;
> struct hlist_node *n;
> struct htab_elem *l;
>
> +   raw_spin_lock_bh(>lock);
> hlist_for_each_entry_safe(l, n, head, hash_node) {
> struct sock *sock = l->sk;
> struct smap_psock *psock;
> @@ -2090,8 +2098,9 @@ static void sock_hash_free(struct bpf_map *map)
> smap_release_sock(psock, sock);
> }
> write_unlock_bh(>sk_callback_lock);
> -   kfree(l);
> +   free_htab_elem(htab, l);
> }
> +   raw_spin_unlock_bh(>lock);
> }
> rcu_read_unlock();
> bpf_map_area_free(htab->buckets);
>

63 matches

Mail list logo