[PATCH 4.19 042/100] vfio/type1: Limit DMA mappings per container

2019-04-30 Thread Greg Kroah-Hartman
From: Alex Williamson 

commit 492855939bdb59c6f947b0b5b44af9ad82b7e38c upstream.

Memory backed DMA mappings are accounted against a user's locked
memory limit, including multiple mappings of the same memory.  This
accounting bounds the number of such mappings that a user can create.
However, DMA mappings that are not backed by memory, such as DMA
mappings of device MMIO via mmaps, do not make use of page pinning
and therefore do not count against the user's locked memory limit.
These mappings still consume memory, but the memory is not well
associated to the process for the purpose of oom killing a task.

To add bounding on this use case, we introduce a limit to the total
number of concurrent DMA mappings that a user is allowed to create.
This limit is exposed as a tunable module option where the default
value of 64K is expected to be well in excess of any reasonable use
case (a large virtual machine configuration would typically only make
use of tens of concurrent mappings).

This fixes CVE-2019-3882.

Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Reviewed-by: Peter Xu 
Reviewed-by: Cornelia Huck 
Signed-off-by: Alex Williamson 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/vfio/vfio_iommu_type1.c |   14 ++
 1 file changed, 14 insertions(+)

--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -58,12 +58,18 @@ module_param_named(disable_hugepages,
 MODULE_PARM_DESC(disable_hugepages,
 "Disable VFIO IOMMU support for IOMMU hugepages.");
 
+static unsigned int dma_entry_limit __read_mostly = U16_MAX;
+module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
+MODULE_PARM_DESC(dma_entry_limit,
+"Maximum number of user DMA mappings per container (65535).");
+
 struct vfio_iommu {
struct list_headdomain_list;
struct vfio_domain  *external_domain; /* domain for external user */
struct mutexlock;
struct rb_root  dma_list;
struct blocking_notifier_head notifier;
+   unsigned intdma_avail;
boolv2;
boolnesting;
 };
@@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
kfree(dma);
+   iommu->dma_avail++;
 }
 
 static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
@@ -1110,12 +1117,18 @@ static int vfio_dma_do_map(struct vfio_i
goto out_unlock;
}
 
+   if (!iommu->dma_avail) {
+   ret = -ENOSPC;
+   goto out_unlock;
+   }
+
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
ret = -ENOMEM;
goto out_unlock;
}
 
+   iommu->dma_avail--;
dma->iova = iova;
dma->vaddr = vaddr;
dma->prot = prot;
@@ -1612,6 +1625,7 @@ static void *vfio_iommu_type1_open(unsig
 
INIT_LIST_HEAD(>domain_list);
iommu->dma_list = RB_ROOT;
+   iommu->dma_avail = dma_entry_limit;
mutex_init(>lock);
BLOCKING_INIT_NOTIFIER_HEAD(>notifier);
 




[PATCH 4.19 011/100] netfilter: nf_tables: bogus EBUSY in helper removal from transaction

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit 8ffcd32f64633926163cdd07a7d295c500a947d1 ]

Proper use counter updates when activating and deactivating the object,
otherwise, this hits bogus EBUSY error.

Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate 
and destroy phase")
Reported-by: Laura Garcia 
Signed-off-by: Pablo Neira Ayuso 
Signed-off-by: Sasha Levin 
---
 net/netfilter/nft_objref.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index d8737c115257..bf92a40dd1b2 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -64,21 +64,34 @@ nla_put_failure:
return -1;
 }
 
-static void nft_objref_destroy(const struct nft_ctx *ctx,
-  const struct nft_expr *expr)
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
 {
struct nft_object *obj = nft_objref_priv(expr);
 
+   if (phase == NFT_TRANS_COMMIT)
+   return;
+
obj->use--;
 }
 
+static void nft_objref_activate(const struct nft_ctx *ctx,
+   const struct nft_expr *expr)
+{
+   struct nft_object *obj = nft_objref_priv(expr);
+
+   obj->use++;
+}
+
 static struct nft_expr_type nft_objref_type;
 static const struct nft_expr_ops nft_objref_ops = {
.type   = _objref_type,
.size   = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
.eval   = nft_objref_eval,
.init   = nft_objref_init,
-   .destroy= nft_objref_destroy,
+   .activate   = nft_objref_activate,
+   .deactivate = nft_objref_deactivate,
.dump   = nft_objref_dump,
 };
 
-- 
2.19.1





[PATCH 4.19 015/100] tipc: handle the err returned from cmd header function

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit 2ac695d1d602ce00b12170242f58c3d3a8e36d04 ]

Syzbot found a crash:

  BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0 
net/tipc/netlink_compat.c:872
  Call Trace:
tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872
__tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215
tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280
tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline]
tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265
genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg net/socket.c:632 [inline]

  Uninit was created at:
__alloc_skb+0x309/0xa20 net/core/skbuff.c:208
alloc_skb include/linux/skbuff.h:1012 [inline]
netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg net/socket.c:632 [inline]

It was supposed to be fixed on commit 974cb0e3e7c9 ("tipc: fix uninit-value
in tipc_nl_compat_name_table_dump") by checking TLV_GET_DATA_LEN(msg->req)
in cmd->header()/tipc_nl_compat_name_table_dump_header(), which is called
ahead of tipc_nl_compat_name_table_dump().

However, tipc_nl_compat_dumpit() doesn't handle the error returned from cmd
header function. It means even when the check added in that fix fails, it
won't stop calling tipc_nl_compat_name_table_dump(), and the issue will be
triggered again.

So this patch is to add the process for the err returned from cmd header
function in tipc_nl_compat_dumpit().

Reported-by: syzbot+3ce8520484b0d4e26...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 net/tipc/netlink_compat.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 0b21187d74df..e3de41eb 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct 
tipc_nl_compat_cmd_dump *cmd,
if (msg->rep_type)
tipc_tlv_init(msg->rep, msg->rep_type);
 
-   if (cmd->header)
-   (*cmd->header)(msg);
+   if (cmd->header) {
+   err = (*cmd->header)(msg);
+   if (err) {
+   kfree_skb(msg->rep);
+   msg->rep = NULL;
+   return err;
+   }
+   }
 
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
-- 
2.19.1





[PATCH 4.19 013/100] net: mvpp2: fix validate for PPv2.1

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit 8b318f30ab4ef9bbc1241e6f8c1db366dbd347f2 ]

The Phylink validate function is the Marvell PPv2 driver makes a check
on the GoP id. This is valid an has to be done when using PPv2.2 engines
but makes no sense when using PPv2.1. The check done when using an RGMII
interface makes sure the GoP id is not 0, but this breaks PPv2.1. Fixes
it.

Fixes: 0fb628f0f250 ("net: mvpp2: fix phylink handling of invalid PHY modes")
Signed-off-by: Antoine Tenart 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c 
b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 9988c89ed9fd..9b10abb604cb 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4272,7 +4272,7 @@ static void mvpp2_phylink_validate(struct net_device *dev,
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_RXID:
case PHY_INTERFACE_MODE_RGMII_TXID:
-   if (port->gop_id == 0)
+   if (port->priv->hw_version == MVPP22 && port->gop_id == 0)
goto empty_set;
break;
default:
-- 
2.19.1





[PATCH] 9p/virtio: Add cleanup path in p9_virtio_init

2019-04-30 Thread YueHaibing
KASAN report this:

BUG: unable to handle kernel paging request at a0097000
PGD 3870067 P4D 3870067 PUD 3871063 PMD 2326e2067 PTE 0
Oops:  [#1
CPU: 0 PID: 5340 Comm: modprobe Not tainted 5.1.0-rc7+ #25
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
RIP: 0010:__list_add_valid+0x10/0x70
Code: c3 48 8b 06 55 48 89 e5 5d 48 39 07 0f 94 c0 0f b6 c0 c3 90 90 90 90 90 
90 90 55 48 89 d0 48 8b 52 08 48 89 e5 48 39 f2 75 19 <48> 8b 32 48 39 f0 75 3a

RSP: 0018:c9e23c68 EFLAGS: 00010246
RAX: a00ad000 RBX: a009d000 RCX: 
RDX: a0097000 RSI: a0097000 RDI: a009d000
RBP: c9e23c68 R08: 0001 R09: 
R10:  R11:  R12: a0097000
R13: 888231797180 R14:  R15: c9e23e78
FS:  7fb215285540() GS:888237a0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: a0097000 CR3: 00022f144000 CR4: 06f0
Call Trace:
 v9fs_register_trans+0x2f/0x60 [9pnet
 ? 0xa0087000
 p9_virtio_init+0x25/0x1000 [9pnet_virtio
 do_one_initcall+0x6c/0x3cc
 ? kmem_cache_alloc_trace+0x248/0x3b0
 do_init_module+0x5b/0x1f1
 load_module+0x1db1/0x2690
 ? m_show+0x1d0/0x1d0
 __do_sys_finit_module+0xc5/0xd0
 __x64_sys_finit_module+0x15/0x20
 do_syscall_64+0x6b/0x1d0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7fb214d8e839
Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01

RSP: 002b:7ffc96554278 EFLAGS: 0246 ORIG_RAX: 0139
RAX: ffda RBX: 55e67eed2aa0 RCX: 7fb214d8e839
RDX:  RSI: 55e67ce95c2e RDI: 0003
RBP: 55e67ce95c2e R08:  R09: 55e67eed2aa0
R10: 0003 R11: 0246 R12: 
R13: 55e67eeda500 R14: 0004 R15: 55e67eed2aa0
Modules linked in: 9pnet_virtio(+) 9pnet gre rfkill 
vmw_vsock_virtio_transport_common vsock [last unloaded: 9pnet_virtio
CR2: a0097000
---[ end trace 4a52bb13ff07b761

If register_virtio_driver() fails in p9_virtio_init,
we should call v9fs_unregister_trans() to do cleanup.

Reported-by: Hulk Robot 
Fixes: b530cc794024 ("9p: add virtio transport")
Signed-off-by: YueHaibing 
---
 net/9p/trans_virtio.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b1d39ca..6753ee9 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -782,10 +782,16 @@ static struct p9_trans_module p9_virtio_trans = {
 /* The standard init function */
 static int __init p9_virtio_init(void)
 {
+   int rc;
+
INIT_LIST_HEAD(_chan_list);
 
v9fs_register_trans(_virtio_trans);
-   return register_virtio_driver(_virtio_drv);
+   rc = register_virtio_driver(_virtio_drv);
+   if (rc)
+   v9fs_unregister_trans(_virtio_trans);
+
+   return rc;
 }
 
 static void __exit p9_virtio_cleanup(void)
-- 
2.7.0




[PATCH 4.19 012/100] net/ibmvnic: Fix RTNL deadlock during device reset

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit 986103e7920cabc0b910749e77ae5589d3934d52 ]

Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem
in reset") made the change to hold the RTNL lock during
driver reset but still calls netdev_notify_peers, which
results in a deadlock. Instead, use call_netdevice_notifiers,
which is functionally the same except that it does not
take the RTNL lock again.

Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
Signed-off-by: Thomas Falcon 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index a475f36ddf8c..426789e2c23d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1859,7 +1859,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
adapter->reset_reason != VNIC_RESET_CHANGE_PARAM)
-   netdev_notify_peers(netdev);
+   call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
 
netif_carrier_on(netdev);
 
-- 
2.19.1





[PATCH 4.19 055/100] workqueue: Try to catch flush_work() without INIT_WORK().

2019-04-30 Thread Greg Kroah-Hartman
From: Tetsuo Handa 

commit 4d43d395fed124631ca02356c711facb90185175 upstream.

syzbot found a flush_work() caller who forgot to call INIT_WORK()
because that work_struct was allocated by kzalloc() [1]. But the message

  INFO: trying to register non-static key.
  the code is fine but needs lockdep annotation.
  turning off the locking correctness validator.

by lock_map_acquire() is failing to tell that INIT_WORK() is missing.

Since flush_work() without INIT_WORK() is a bug, and INIT_WORK() should
set ->func field to non-zero, let's warn if ->func field is zero.

[1] 
https://syzkaller.appspot.com/bug?id=a5954455fcfa51c29ca2ab55b203076337e1c770

Signed-off-by: Tetsuo Handa 
Signed-off-by: Tejun Heo 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/workqueue.c |3 +++
 1 file changed, 3 insertions(+)

--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2908,6 +2908,9 @@ static bool __flush_work(struct work_str
if (WARN_ON(!wq_online))
return false;
 
+   if (WARN_ON(!work->func))
+   return false;
+
if (!from_cancel) {
lock_map_acquire(>lockdep_map);
lock_map_release(>lockdep_map);




[PATCH 4.19 046/100] powerpc/mm/radix: Make Radix require HUGETLB_PAGE

2019-04-30 Thread Greg Kroah-Hartman
From: Michael Ellerman 

commit 8adddf349fda0d3de2f6bb41ddf838cbf36a8ad2 upstream.

Joel reported weird crashes using skiroot_defconfig, in his case we
jumped into an NX page:

  kernel tried to execute exec-protected page (c2bff4f0) - exploit 
attempt? (uid: 0)
  BUG: Unable to handle kernel instruction fetch
  Faulting instruction address: 0xc2bff4f0

Looking at the disassembly, we had simply branched to that address:

  c0c001bc  49fff335bl c2bff4f0

But that didn't match the original kernel image:

  c0c001bc  4bfff335bl c0bff4f0 

When STRICT_KERNEL_RWX is enabled, and we're using the radix MMU, we
call radix__change_memory_range() late in boot to change page
protections. We do that both to mark rodata read only and also to mark
init text no-execute. That involves walking the kernel page tables,
and clearing _PAGE_WRITE or _PAGE_EXEC respectively.

With radix we may use hugepages for the linear mapping, so the code in
radix__change_memory_range() uses eg. pmd_huge() to test if it has
found a huge mapping, and if so it stops the page table walk and
changes the PMD permissions.

However if the kernel is built without HUGETLBFS support, pmd_huge()
is just a #define that always returns 0. That causes the code in
radix__change_memory_range() to incorrectly interpret the PMD value as
a pointer to a PTE page rather than as a PTE at the PMD level.

We can see this using `dv` in xmon which also uses pmd_huge():

  0:mon> dv c000
  pgd  @ 0xc174
  pgdp @ 0xc174 = 0x8000b009
  pudp @ 0xc000b000 = 0x8000a009
  pmdp @ 0xc000a000 = 0xc18f   <- this is a PTE
  ptep @ 0xc100 = 0xa64bb17da64ab07d   <- kernel text

The end result is we treat the value at 0xc100 as a PTE
and clear _PAGE_WRITE or _PAGE_EXEC, potentially corrupting the code
at that address.

In Joel's specific case we cleared the sign bit in the offset of the
branch, causing a backward branch to turn into a forward branch which
caused us to branch into a non-executable page. However the exact
nature of the crash depends on kernel version, compiler version, and
other factors.

We need to fix radix__change_memory_range() to not use accessors that
depend on HUGETLBFS, but we also have radix memory hotplug code that
uses pmd_huge() etc that will also need fixing. So for now just
disallow the broken combination of Radix with HUGETLBFS disabled.

The only defconfig we have that is affected is skiroot_defconfig, so
turn on HUGETLBFS there so that it still gets Radix.

Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()")
Cc: sta...@vger.kernel.org # v4.7+
Reported-by: Joel Stanley 
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/powerpc/configs/skiroot_defconfig |1 +
 arch/powerpc/platforms/Kconfig.cputype |2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -195,6 +195,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 # CONFIG_MISC_FILESYSTEMS is not set
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -330,7 +330,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 
 config PPC_RADIX_MMU
bool "Radix MMU Support"
-   depends on PPC_BOOK3S_64
+   depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
default y
help




[PATCH 4.19 059/100] netfilter: ebtables: CONFIG_COMPAT: drop a bogus WARN_ON

2019-04-30 Thread Greg Kroah-Hartman
From: Florian Westphal 

commit 7caa56f006e9d712b44f27b32520c66420d5cbc6 upstream.

It means userspace gave us a ruleset where there is some other
data after the ebtables target but before the beginning of the next rule.

Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Reported-by: syzbot+659574e7bcc7f7eb4...@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
Signed-off-by: Greg Kroah-Hartman 

---
 net/bridge/netfilter/ebtables.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_eb
if (match_kern)
match_kern->match_size = ret;
 
-   if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+   /* rule should have no remaining data after target */
+   if (type == EBT_COMPAT_TARGET && size_left)
return -EINVAL;
 
match32 = (struct compat_ebt_entry_mwt *) buf;




[PATCH 4.19 060/100] fm10k: Fix a potential NULL pointer dereference

2019-04-30 Thread Greg Kroah-Hartman
From: Yue Haibing 

commit 01ca667133d019edc9f0a1f70a272447c84ec41f upstream.

Syzkaller report this:

kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN PTI
CPU: 0 PID: 4378 Comm: syz-executor.0 Tainted: G C5.0.0+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 
04/01/2014
RIP: 0010:__lock_acquire+0x95b/0x3200 kernel/locking/lockdep.c:3573
Code: 00 0f 85 28 1e 00 00 48 81 c4 08 01 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f 
c3 4c 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 cc 24 
00 00 49 81 7d 00 e0 de 03 a6 41 bc 00 00
RSP: 0018:8881e3c07a40 EFLAGS: 00010002
RAX: dc00 RBX:  RCX: 
RDX: 0010 RSI:  RDI: 0080
RBP:  R08: 0001 R09: 
R10: 8881e3c07d98 R11: 8881c7f21f80 R12: 0001
R13: 0080 R14:  R15: 0001
FS:  7fce2252e700() GS:8881f240() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fffc7eb0228 CR3: 0001e5bea002 CR4: 007606f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
PKRU: 5554
Call Trace:
 lock_acquire+0xff/0x2c0 kernel/locking/lockdep.c:4211
 __mutex_lock_common kernel/locking/mutex.c:925 [inline]
 __mutex_lock+0xdf/0x1050 kernel/locking/mutex.c:1072
 drain_workqueue+0x24/0x3f0 kernel/workqueue.c:2934
 destroy_workqueue+0x23/0x630 kernel/workqueue.c:4319
 __do_sys_delete_module kernel/module.c:1018 [inline]
 __se_sys_delete_module kernel/module.c:961 [inline]
 __x64_sys_delete_module+0x30c/0x480 kernel/module.c:961
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 
c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:7fce2252dc58 EFLAGS: 0246 ORIG_RAX: 00b0
RAX: ffda RBX: 0073bf00 RCX: 00462e99
RDX:  RSI:  RDI: 2140
RBP: 0002 R08:  R09: 
R10:  R11: 0246 R12: 7fce2252e6bc
R13: 004bcca9 R14: 006f6b48 R15: 

If alloc_workqueue fails, it should return -ENOMEM, otherwise may
trigger this NULL pointer dereference while unloading drivers.

Reported-by: Hulk Robot 
Fixes: 0a38c17a21a0 ("fm10k: Remove create_workqueue")
Signed-off-by: Yue Haibing 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/ethernet/intel/fm10k/fm10k_main.c |2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -41,6 +41,8 @@ static int __init fm10k_init_module(void
/* create driver workqueue */
fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
  fm10k_driver_name);
+   if (!fm10k_workqueue)
+   return -ENOMEM;
 
fm10k_dbg_init();
 




[PATCH 4.19 061/100] tipc: check bearer name with right length in tipc_nl_compat_bearer_enable

2019-04-30 Thread Greg Kroah-Hartman
From: Xin Long 

commit 6f07e5f06c8712acc423485f657799fc8e11e56c upstream.

Syzbot reported the following crash:

BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961
  memchr+0xce/0x110 lib/string.c:961
  string_is_valid net/tipc/netlink_compat.c:176 [inline]
  tipc_nl_compat_bearer_enable+0x2c4/0x910 net/tipc/netlink_compat.c:401
  __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline]
  tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354
  tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline]
  tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265
  genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
  genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
  netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
  genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
  netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
  netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
  netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
  sock_sendmsg_nosec net/socket.c:622 [inline]
  sock_sendmsg net/socket.c:632 [inline]

Uninit was created at:
  __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
  alloc_skb include/linux/skbuff.h:1012 [inline]
  netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
  netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
  sock_sendmsg_nosec net/socket.c:622 [inline]
  sock_sendmsg net/socket.c:632 [inline]

It was triggered when the bearer name size < TIPC_MAX_BEARER_NAME,
it would check with a wrong len/TLV_GET_DATA_LEN(msg->req), which
also includes priority and disc_domain length.

This patch is to fix it by checking it with a right length:
'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_bearer_config, name)'.

Reported-by: syzbot+8b707430713eb46e1...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/tipc/netlink_compat.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -403,7 +403,12 @@ static int tipc_nl_compat_bearer_enable(
if (!bearer)
return -EMSGSIZE;
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   len -= offsetof(struct tipc_bearer_config, name);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(b->name, len))
return -EINVAL;
 




[PATCH 4.19 062/100] tipc: check link name with right length in tipc_nl_compat_link_set

2019-04-30 Thread Greg Kroah-Hartman
From: Xin Long 

commit 8c63bf9ab4be8b83bd8c34aacfd2f1d2c8901c8a upstream.

A similar issue as fixed by Patch "tipc: check bearer name with right
length in tipc_nl_compat_bearer_enable" was also found by syzbot in
tipc_nl_compat_link_set().

The length to check with should be 'TLV_GET_DATA_LEN(msg->req) -
offsetof(struct tipc_link_config, name)'.

Reported-by: syzbot+de00a87b8644a582a...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/tipc/netlink_compat.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -777,7 +777,12 @@ static int tipc_nl_compat_link_set(struc
 
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   len -= offsetof(struct tipc_link_config, name);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_LINK_NAME);
if (!string_is_valid(lc->name, len))
return -EINVAL;
 




Re: [PATCH 2/5] irqchip/renesas-irqc: Remove devm_kzalloc()/ioremap_nocache() error printing

2019-04-30 Thread Geert Uytterhoeven
Hi Sergei,

On Tue, Apr 30, 2019 at 10:12 AM Sergei Shtylyov
 wrote:
> On 29.04.2019 18:20, Geert Uytterhoeven wrote:
> > There is no need to print a message if devm_kzalloc() or
>
> Just kzalloc() in this case.

Thanks, silly copy-and-paste error.

> > --- a/drivers/irqchip/irq-renesas-irqc.c
> > +++ b/drivers/irqchip/irq-renesas-irqc.c
> > @@ -173,7 +172,6 @@ static int irqc_probe(struct platform_device *pdev)
> >   /* ioremap IOMEM and setup read/write callbacks */
> >   p->iomem = ioremap_nocache(io->start, resource_size(io));
> >   if (!p->iomem) {
> > - dev_err(>dev, "failed to remap IOMEM\n");
> >   ret = -ENXIO;
>
> -ENOMEM?

I'd say yes, except that this error code is removed in patch [5/5], so I don't
think it's worth changing it in this patch.

Gr{oetje,eeting}s,

Geert


--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


[PATCH 4.19 070/100] aio: use iocb_put() instead of open coding it

2019-04-30 Thread Greg Kroah-Hartman
From: Jens Axboe 

commit 71ebc6fef0f53459f37fb39e1466792232fa52ee upstream.

Replace the percpu_ref_put() + kmem_cache_free() with a call to
iocb_put() instead.

Reviewed-by: Christoph Hellwig 
Signed-off-by: Jens Axboe 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1886,10 +1886,9 @@ static int io_submit_one(struct kioctx *
goto out_put_req;
return 0;
 out_put_req:
-   percpu_ref_put(>reqs);
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
-   kmem_cache_free(kiocb_cachep, req);
+   iocb_put(req);
 out_put_reqs_available:
put_reqs_available(ctx, 1);
return ret;




[PATCH 4.19 065/100] rxrpc: fix race condition in rxrpc_input_packet()

2019-04-30 Thread Greg Kroah-Hartman
From: Eric Dumazet 

commit 032be5f19a94de51093851757089133dcc1e92aa upstream.

After commit 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook"),
rxrpc_input_packet() is directly called from lockless UDP receive
path, under rcu_read_lock() protection.

It must therefore use RCU rules :

- udp_sk->sk_user_data can be cleared at any point in this function.
  rcu_dereference_sk_user_data() is what we need here.

- Also, since sk_user_data might have been set in rxrpc_open_socket()
  we must observe a proper RCU grace period before kfree(local) in
  rxrpc_lookup_local()

v4: @local can be NULL in xrpc_lookup_local() as reported by kbuild test robot 

and Julia Lawall , thanks !

v3,v2 : addressed David Howells feedback, thanks !

syzbot reported :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] PREEMPT SMP KASAN
CPU: 0 PID: 19236 Comm: syz-executor703 Not tainted 5.1.0-rc6 #79
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:__lock_acquire+0xbef/0x3fb0 kernel/locking/lockdep.c:3573
Code: 00 0f 85 a5 1f 00 00 48 81 c4 10 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d 
c3 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 4a 21 
00 00 49 81 7d 00 20 54 9c 89 0f 84 cf f4
RSP: 0018:88809d7aef58 EFLAGS: 00010002
RAX: dc00 RBX:  RCX: 
RDX: 0026 RSI:  RDI: 0001
RBP: 88809d7af090 R08: 0001 R09: 0001
R10: ed1015d05bc7 R11: 888089428600 R12: 
R13: 0130 R14: 0001 R15: 0001
FS:  7f059044d700() GS:8880ae80() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 004b6040 CR3: 955ca000 CR4: 001406f0
Call Trace:
 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:4211
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152
 skb_queue_tail+0x26/0x150 net/core/skbuff.c:2972
 rxrpc_reject_packet net/rxrpc/input.c:1126 [inline]
 rxrpc_input_packet+0x4a0/0x5536 net/rxrpc/input.c:1414
 udp_queue_rcv_one_skb+0xaf2/0x1780 net/ipv4/udp.c:2011
 udp_queue_rcv_skb+0x128/0x730 net/ipv4/udp.c:2085
 udp_unicast_rcv_skb.isra.0+0xb9/0x360 net/ipv4/udp.c:2245
 __udp4_lib_rcv+0x701/0x2ca0 net/ipv4/udp.c:2301
 udp_rcv+0x22/0x30 net/ipv4/udp.c:2482
 ip_protocol_deliver_rcu+0x60/0x8f0 net/ipv4/ip_input.c:208
 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:255
 dst_input include/net/dst.h:450 [inline]
 ip_rcv_finish+0x1e1/0x300 net/ipv4/ip_input.c:413
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:523
 __netif_receive_skb_one_core+0x115/0x1a0 net/core/dev.c:4987
 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5099
 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5202
 napi_frags_finish net/core/dev.c:5769 [inline]
 napi_gro_frags+0xade/0xd10 net/core/dev.c:5843
 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981
 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027
 call_write_iter include/linux/fs.h:1866 [inline]
 do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681
 do_iter_write fs/read_write.c:957 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:938
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002
 do_writev+0x15e/0x370 fs/read_write.c:1037
 __do_sys_writev fs/read_write.c:1110 [inline]
 __se_sys_writev fs/read_write.c:1107 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook")
Signed-off-by: Eric Dumazet 
Reported-by: syzbot 
Acked-by: David Howells 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/rxrpc/input.c|   12 
 net/rxrpc/local_object.c |3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1155,19 +1155,19 @@ int rxrpc_extract_header(struct rxrpc_sk
  * handle data received on the local endpoint
  * - may be called in interrupt context
  *
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
  *
  * Called with the RCU read lock held from the IP layer via UDP.
  */
 int 

[PATCH 4.19 067/100] aio: use assigned completion handler

2019-04-30 Thread Greg Kroah-Hartman
From: Jens Axboe 

commit bc9bff61624ac33b7c95861abea1af24ee7a94fc upstream.

We know this is a read/write request, but in preparation for
having different kinds of those, ensure that we call the assigned
handler instead of assuming it's aio_complete_rq().

Reviewed-by: Christoph Hellwig 
Signed-off-by: Jens Axboe 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1492,7 +1492,7 @@ static inline void aio_rw_done(struct ki
ret = -EINTR;
/*FALLTHRU*/
default:
-   aio_complete_rw(req, ret, 0);
+   req->ki_complete(req, ret, 0);
}
 }
 




[PATCH 4.19 072/100] aio: abstract out io_event filler helper

2019-04-30 Thread Greg Kroah-Hartman
From: Jens Axboe 

commit 875736bb3f3ded168469f6a14df7a938416a99d5 upstream.

Reviewed-by: Christoph Hellwig 
Signed-off-by: Jens Axboe 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1059,6 +1059,15 @@ static inline void iocb_put(struct aio_k
}
 }
 
+static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
+  long res, long res2)
+{
+   ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
+   ev->data = iocb->ki_user_data;
+   ev->res = res;
+   ev->res2 = res2;
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1086,10 +1095,7 @@ static void aio_complete(struct aio_kioc
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-   event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
-   event->data = iocb->ki_user_data;
-   event->res = res;
-   event->res2 = res2;
+   aio_fill_event(event, iocb, res, res2);
 
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);




[PATCH 4.19 074/100] aio: simplify - and fix - fget/fput for io_submit()

2019-04-30 Thread Greg Kroah-Hartman
From: Linus Torvalds 

commit 84c4e1f89fefe70554da0ab33be72c9be7994379 upstream.

Al Viro root-caused a race where the IOCB_CMD_POLL handling of
fget/fput() could cause us to access the file pointer after it had
already been freed:

 "In more details - normally IOCB_CMD_POLL handling looks so:

   1) io_submit(2) allocates aio_kiocb instance and passes it to
  aio_poll()

   2) aio_poll() resolves the descriptor to struct file by req->file =
  fget(iocb->aio_fildes)

   3) aio_poll() sets ->woken to false and raises ->ki_refcnt of that
  aio_kiocb to 2 (bumps by 1, that is).

   4) aio_poll() calls vfs_poll(). After sanity checks (basically,
  "poll_wait() had been called and only once") it locks the queue.
  That's what the extra reference to iocb had been for - we know we
  can safely access it.

   5) With queue locked, we check if ->woken has already been set to
  true (by aio_poll_wake()) and, if it had been, we unlock the
  queue, drop a reference to aio_kiocb and bugger off - at that
  point it's a responsibility to aio_poll_wake() and the stuff
  called/scheduled by it. That code will drop the reference to file
  in req->file, along with the other reference to our aio_kiocb.

   6) otherwise, we see whether we need to wait. If we do, we unlock the
  queue, drop one reference to aio_kiocb and go away - eventual
  wakeup (or cancel) will deal with the reference to file and with
  the other reference to aio_kiocb

   7) otherwise we remove ourselves from waitqueue (still under the
  queue lock), so that wakeup won't get us. No async activity will
  be happening, so we can safely drop req->file and iocb ourselves.

  If wakeup happens while we are in vfs_poll(), we are fine - aio_kiocb
  won't get freed under us, so we can do all the checks and locking
  safely. And we don't touch ->file if we detect that case.

  However, vfs_poll() most certainly *does* touch the file it had been
  given. So wakeup coming while we are still in ->poll() might end up
  doing fput() on that file. That case is not too rare, and usually we
  are saved by the still present reference from descriptor table - that
  fput() is not the final one.

  But if another thread closes that descriptor right after our fget()
  and wakeup does happen before ->poll() returns, we are in trouble -
  final fput() done while we are in the middle of a method:

Al also wrote a patch to take an extra reference to the file descriptor
to fix this, but I instead suggested we just streamline the whole file
pointer handling by submit_io() so that the generic aio submission code
simply keeps the file pointer around until the aio has completed.

Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL")
Acked-by: Al Viro 
Reported-by: syzbot+503d4cc169fcec1cb...@syzkaller.appspotmail.com
Signed-off-by: Linus Torvalds 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c   |   72 +
 include/linux/fs.h |8 +
 2 files changed, 36 insertions(+), 44 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -161,9 +161,13 @@ struct kioctx {
unsignedid;
 };
 
+/*
+ * First field must be the file pointer in all the
+ * iocb unions! See also 'struct kiocb' in 
+ */
 struct fsync_iocb {
-   struct work_struct  work;
struct file *file;
+   struct work_struct  work;
booldatasync;
 };
 
@@ -177,8 +181,15 @@ struct poll_iocb {
struct work_struct  work;
 };
 
+/*
+ * NOTE! Each of the iocb union members has the file pointer
+ * as the first entry in their struct definition. So you can
+ * access the file pointer through any of the sub-structs,
+ * or directly as just 'ki_filp' in this struct.
+ */
 struct aio_kiocb {
union {
+   struct file *ki_filp;
struct kiocbrw;
struct fsync_iocb   fsync;
struct poll_iocbpoll;
@@ -1054,6 +1065,8 @@ static inline void iocb_put(struct aio_k
 {
if (refcount_read(>ki_refcnt) == 0 ||
refcount_dec_and_test(>ki_refcnt)) {
+   if (iocb->ki_filp)
+   fput(iocb->ki_filp);
percpu_ref_put(>ki_ctx->reqs);
kmem_cache_free(kiocb_cachep, iocb);
}
@@ -1418,7 +1431,6 @@ static void aio_complete_rw(struct kiocb
file_end_write(kiocb->ki_filp);
}
 
-   fput(kiocb->ki_filp);
aio_complete(iocb, res, res2);
 }
 
@@ -1426,9 +1438,6 @@ static int aio_prep_rw(struct kiocb *req
 {
int ret;
 
-   req->ki_filp = fget(iocb->aio_fildes);
-   if (unlikely(!req->ki_filp))
-   return -EBADF;
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
@@ -1445,7 +1454,7 @@ static int aio_prep_rw(struct kiocb *req

[PATCH 4.19 079/100] Fix aio_poll() races

2019-04-30 Thread Greg Kroah-Hartman
From: Al Viro 

commit af5c72b1fc7a00aa484e90b0c4e0eeb582545634 upstream.

aio_poll() has to cope with several unpleasant problems:
* requests that might stay around indefinitely need to
be made visible for io_cancel(2); that must not be done to
a request already completed, though.
* in cases when ->poll() has placed us on a waitqueue,
wakeup might have happened (and request completed) before ->poll()
returns.
* worse, in some early wakeup cases request might end
up re-added into the queue later - we can't treat "woken up and
currently not in the queue" as "it's not going to stick around
indefinitely"
* ... moreover, ->poll() might have decided not to
put it on any queues to start with, and that needs to be distinguished
from the previous case
* ->poll() might have tried to put us on more than one queue.
Only the first will succeed for aio poll, so we might end up missing
wakeups.  OTOH, we might very well notice that only after the
wakeup hits and request gets completed (all before ->poll() gets
around to the second poll_wait()).  In that case it's too late to
decide that we have an error.

req->woken was an attempt to deal with that.  Unfortunately, it was
broken.  What we need to keep track of is not that wakeup has happened -
the thing might come back after that.  It's that async reference is
already gone and won't come back, so we can't (and needn't) put the
request on the list of cancellables.

The easiest case is "request hadn't been put on any waitqueues"; we
can tell by seeing NULL apt.head, and in that case there won't be
anything async.  We should either complete the request ourselves
(if vfs_poll() reports anything of interest) or return an error.

In all other cases we get exclusion with wakeups by grabbing the
queue lock.

If request is currently on queue and we have something interesting
from vfs_poll(), we can steal it and complete the request ourselves.

If it's on queue and vfs_poll() has not reported anything interesting,
we either put it on the cancellable list, or, if we know that it
hadn't been put on all queues ->poll() wanted it on, we steal it and
return an error.

If it's _not_ on queue, it's either been already dealt with (in which
case we do nothing), or there's aio_poll_complete_work() about to be
executed.  In that case we either put it on the cancellable list,
or, if we know it hadn't been put on all queues ->poll() wanted it on,
simulate what cancel would've done.

It's a lot more convoluted than I'd like it to be.  Single-consumer APIs
suck, and unfortunately aio is not an exception...

Signed-off-by: Al Viro 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   90 ---
 1 file changed, 40 insertions(+), 50 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -175,7 +175,7 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head  *head;
__poll_tevents;
-   boolwoken;
+   booldone;
boolcancelled;
struct wait_queue_entry wait;
struct work_struct  work;
@@ -1600,12 +1600,6 @@ static int aio_fsync(struct fsync_iocb *
return 0;
 }
 
-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
-   iocb->ki_res.res = mangle_poll(mask);
-   iocb_put(iocb);
-}
-
 static void aio_poll_complete_work(struct work_struct *work)
 {
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1631,9 +1625,11 @@ static void aio_poll_complete_work(struc
return;
}
list_del_init(>ki_list);
+   iocb->ki_res.res = mangle_poll(mask);
+   req->done = true;
spin_unlock_irq(>ctx_lock);
 
-   aio_poll_complete(iocb, mask);
+   iocb_put(iocb);
 }
 
 /* assumes we are called with irqs disabled */
@@ -1661,31 +1657,27 @@ static int aio_poll_wake(struct wait_que
__poll_t mask = key_to_poll(key);
unsigned long flags;
 
-   req->woken = true;
-
/* for instances that support it check for an event match first: */
-   if (mask) {
-   if (!(mask & req->events))
-   return 0;
+   if (mask && !(mask & req->events))
+   return 0;
 
+   list_del_init(>wait.entry);
+
+   if (mask && spin_trylock_irqsave(>ki_ctx->ctx_lock, flags)) {
/*
 * Try to complete the iocb inline if we can. Use
 * irqsave/irqrestore because not all filesystems (e.g. fuse)
 * call this function with IRQs disabled and because IRQs
 * have to be disabled before ctx_lock is obtained.
 */
-   if (spin_trylock_irqsave(>ki_ctx->ctx_lock, flags)) {
-   list_del(>ki_list);
-   spin_unlock_irqrestore(>ki_ctx->ctx_lock, flags);
-
-

[PATCH 4.19 085/100] ipv4: set the tcp_min_rtt_wlen range from 0 to one day

2019-04-30 Thread Greg Kroah-Hartman
From: ZhangXiaoxu 

[ Upstream commit 19fad20d15a6494f47f85d869f00b11343ee5c78 ]

There is a UBSAN report as below:
UBSAN: Undefined behaviour in net/ipv4/tcp_input.c:2877:56
signed integer overflow:
2147483647 * 1000 cannot be represented in type 'int'
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.1.0-rc4-00058-g582549e #1
Call Trace:
 
 dump_stack+0x8c/0xba
 ubsan_epilogue+0x11/0x60
 handle_overflow+0x12d/0x170
 ? ttwu_do_wakeup+0x21/0x320
 __ubsan_handle_mul_overflow+0x12/0x20
 tcp_ack_update_rtt+0x76c/0x780
 tcp_clean_rtx_queue+0x499/0x14d0
 tcp_ack+0x69e/0x1240
 ? __wake_up_sync_key+0x2c/0x50
 ? update_group_capacity+0x50/0x680
 tcp_rcv_established+0x4e2/0xe10
 tcp_v4_do_rcv+0x22b/0x420
 tcp_v4_rcv+0xfe8/0x1190
 ip_protocol_deliver_rcu+0x36/0x180
 ip_local_deliver+0x15b/0x1a0
 ip_rcv+0xac/0xd0
 __netif_receive_skb_one_core+0x7f/0xb0
 __netif_receive_skb+0x33/0xc0
 netif_receive_skb_internal+0x84/0x1c0
 napi_gro_receive+0x2a0/0x300
 receive_buf+0x3d4/0x2350
 ? detach_buf_split+0x159/0x390
 virtnet_poll+0x198/0x840
 ? reweight_entity+0x243/0x4b0
 net_rx_action+0x25c/0x770
 __do_softirq+0x19b/0x66d
 irq_exit+0x1eb/0x230
 do_IRQ+0x7a/0x150
 common_interrupt+0xf/0xf
 

It can be reproduced by:
  echo 2147483647 > /proc/sys/net/ipv4/tcp_min_rtt_wlen

Fixes: f672258391b42 ("tcp: track min RTT using windowed min-filter")
Signed-off-by: ZhangXiaoxu 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 Documentation/networking/ip-sysctl.txt |1 +
 net/ipv4/sysctl_net_ipv4.c |5 -
 2 files changed, 5 insertions(+), 1 deletion(-)

--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -410,6 +410,7 @@ tcp_min_rtt_wlen - INTEGER
minimum RTT when it is moved to a longer path (e.g., due to traffic
engineering). A longer window makes the filter more resistant to RTT
inflations such as transient congestion. The unit is seconds.
+   Possible values: 0 - 86400 (1 day)
Default: 300
 
 tcp_moderate_rcvbuf - BOOLEAN
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = {
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static int comp_sack_nr_max = 255;
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1140,7 +1141,9 @@ static struct ctl_table ipv4_net_table[]
.data   = _net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode   = 0644,
-   .proc_handler   = proc_dointvec
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = ,
+   .extra2 = _day_secs
},
{
.procname   = "tcp_autocorking",




[PATCH 4.19 077/100] aio: keep io_event in aio_kiocb

2019-04-30 Thread Greg Kroah-Hartman
From: Al Viro 

commit a9339b7855094ba11a97e8822ae038135e879e79 upstream.

We want to separate forming the resulting io_event from putting it
into the ring buffer.

Signed-off-by: Al Viro 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -198,8 +198,7 @@ struct aio_kiocb {
struct kioctx   *ki_ctx;
kiocb_cancel_fn *ki_cancel;
 
-   struct iocb __user  *ki_user_iocb;  /* user's aiocb */
-   __u64   ki_user_data;   /* user's data for completion */
+   struct io_event ki_res;
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
@@ -1078,15 +1077,6 @@ static inline void iocb_put(struct aio_k
iocb_destroy(iocb);
 }
 
-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
-  long res, long res2)
-{
-   ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
-   ev->data = iocb->ki_user_data;
-   ev->res = res;
-   ev->res2 = res2;
-}
-
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1098,6 +1088,8 @@ static void aio_complete(struct aio_kioc
unsigned tail, pos, head;
unsigned long   flags;
 
+   iocb->ki_res.res = res;
+   iocb->ki_res.res2 = res2;
/*
 * Add a completion event to the ring buffer. Must be done holding
 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1114,14 +1106,14 @@ static void aio_complete(struct aio_kioc
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-   aio_fill_event(event, iocb, res, res2);
+   *event = iocb->ki_res;
 
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 
-   pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
-res, res2);
+   pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
+(void __user *)(unsigned long)iocb->ki_res.obj,
+iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
 
/* after flagging the request as done, we
 * must never even look at it again
@@ -1838,8 +1830,10 @@ static int __io_submit_one(struct kioctx
goto out_put_req;
}
 
-   req->ki_user_iocb = user_iocb;
-   req->ki_user_data = iocb->aio_data;
+   req->ki_res.obj = (u64)(unsigned long)user_iocb;
+   req->ki_res.data = iocb->aio_data;
+   req->ki_res.res = 0;
+   req->ki_res.res2 = 0;
 
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
@@ -2009,6 +2003,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
struct aio_kiocb *kiocb;
int ret = -EINVAL;
u32 key;
+   u64 obj = (u64)(unsigned long)iocb;
 
if (unlikely(get_user(key, >aio_key)))
return -EFAULT;
@@ -2022,7 +2017,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
spin_lock_irq(>ctx_lock);
/* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, >active_reqs, ki_list) {
-   if (kiocb->ki_user_iocb == iocb) {
+   if (kiocb->ki_res.obj == obj) {
ret = kiocb->ki_cancel(>rw);
list_del_init(>ki_list);
break;




[PATCH 4.19 082/100] mm: Fix warning in insert_pfn()

2019-04-30 Thread Greg Kroah-Hartman
From: Jan Kara 

commit f2c57d91b0d96aa13ccff4e3b178038f17b00658 upstream.

In DAX mode a write pagefault can race with write(2) in the following
way:

CPU0CPU1
write fault for mapped zero page (hole)
dax_iomap_rw()
  iomap_apply()
xfs_file_iomap_begin()
  - allocates blocks
dax_iomap_actor()
  invalidate_inode_pages2_range()
- invalidates radix tree entries in given range
dax_iomap_pte_fault()
  grab_mapping_entry()
- no entry found, creates empty
  ...
  xfs_file_iomap_begin()
- finds already allocated block
  ...
  vmf_insert_mixed_mkwrite()
- WARNs and does nothing because there
  is still zero page mapped in PTE
unmap_mapping_pages()

This race results in WARN_ON from insert_pfn() and is occasionally
triggered by fstest generic/344. Note that the race is otherwise
harmless as before write(2) on CPU0 is finished, we will invalidate page
tables properly and thus user of mmap will see modified data from
write(2) from that point on. So just restrict the warning only to the
case when the PFN in PTE is not zero page.

Link: http://lkml.kernel.org/r/20180824154542.26872-1-j...@suse.cz
Signed-off-by: Jan Kara 
Reviewed-by: Andrew Morton 
Cc: Ross Zwisler 
Cc: Dan Williams 
Cc: Dave Jiang 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 mm/memory.c |9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1787,10 +1787,15 @@ static int insert_pfn(struct vm_area_str
 * in may not match the PFN we have mapped if the
 * mapped PFN is a writeable COW page.  In the mkwrite
 * case we are creating a writable PTE for a shared
-* mapping and we expect the PFNs to match.
+* mapping and we expect the PFNs to match. If they
+* don't match, we are likely racing with block
+* allocation and mapping invalidation so just skip the
+* update.
 */
-   if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn)))
+   if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) {
+   WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
goto out_unlock;
+   }
entry = *pte;
goto out_mkwrite;
} else




[PATCH 4.19 081/100] x86/retpolines: Disable switch jump tables when retpolines are enabled

2019-04-30 Thread Greg Kroah-Hartman
From: Daniel Borkmann 

commit a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 upstream.

Commit ce02ef06fcf7 ("x86, retpolines: Raise limit for generating indirect
calls from switch-case") raised the limit under retpolines to 20 switch
cases where gcc would only then start to emit jump tables, and therefore
effectively disabling the emission of slow indirect calls in this area.

After this has been brought to attention to gcc folks [0], Martin Liska
has then fixed gcc to align with clang by avoiding to generate switch jump
tables entirely under retpolines. This is taking effect in gcc starting
from stable version 8.4.0. Given kernel supports compilation with older
versions of gcc where the fix is not being available or backported anymore,
we need to keep the extra KBUILD_CFLAGS around for some time and generally
set the -fno-jump-tables to align with what more recent gcc is doing
automatically today.

More than 20 switch cases are not expected to be fast-path critical, but
it would still be good to align with gcc behavior for versions < 8.4.0 in
order to have consistency across supported gcc versions. vmlinux size is
slightly growing by 0.27% for older gcc. This flag is only set to work
around affected gcc, no change for clang.

  [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952

Suggested-by: Martin Liska 
Signed-off-by: Daniel Borkmann 
Signed-off-by: Thomas Gleixner 
Cc: David Woodhouse 
Cc: Linus Torvalds 
Cc: Jesper Dangaard Brouer 
Cc: Björn Töpel
Cc: Magnus Karlsson 
Cc: Alexei Starovoitov 
Cc: H.J. Lu 
Cc: Alexei Starovoitov 
Cc: David S. Miller 
Link: https://lkml.kernel.org/r/20190325135620.14882-1-dan...@iogearbox.net
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/Makefile |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -227,8 +227,12 @@ ifdef CONFIG_RETPOLINE
   # Additionally, avoid generating expensive indirect jumps which
   # are subject to retpolines for small number of switch cases.
   # clang turns off jump table generation by default when under
-  # retpoline builds, however, gcc does not for x86.
-  KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
+  # retpoline builds, however, gcc does not for x86. This has
+  # only been fixed starting from gcc stable version 8.4.0 and
+  # onwards, but not for older ones. See gcc bug #86952.
+  ifndef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables)
+  endif
 endif
 
 archscripts: scripts_basic




Re: [PATCH 2/4] rtc: digicolor: set range

2019-04-30 Thread Alexandre Belloni
On 30/04/2019 14:36:24+0300, Baruch Siach wrote:
> Hi Alexandre,
> 
> On Tue, Apr 30 2019, Alexandre Belloni wrote:
> 
> > While the range of REFERENCE + TIME is actually 33 bits, the counter
> > itself (TIME) is a 32-bits seconds counter.
> >
> > Signed-off-by: Alexandre Belloni 
> > ---
> >  drivers/rtc/rtc-digicolor.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/rtc/rtc-digicolor.c b/drivers/rtc/rtc-digicolor.c
> > index 5bb14c56bc9a..e6e16aaac254 100644
> > --- a/drivers/rtc/rtc-digicolor.c
> > +++ b/drivers/rtc/rtc-digicolor.c
> > @@ -206,6 +206,7 @@ static int __init dc_rtc_probe(struct platform_device 
> > *pdev)
> > platform_set_drvdata(pdev, rtc);
> >  
> > rtc->rtc_dev->ops = _rtc_ops;
> > +   rtc->rtc_dev->range_max = U32_MAX;
> 
> Where can I find documentation on the meaning and usage of the range_max
> value? I could not find anything in the kernel source.
> 

It should be set to the maximum UNIX timestamp the RTC can be set to
while keeping range_min to range_max contiguous.

In the digicolor case, you could go up to 8589934590 (Wed Mar 16
12:56:30 UTC 2242) but the driver only writes DC_RTC_REFERENCE and I'm
not sure it can also update DC_RTC_TIME safely.

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


[PATCH 4.19 048/100] Revert "drm/i915/fbdev: Actually configure untiled displays"

2019-04-30 Thread Greg Kroah-Hartman
From: Dave Airlie 

commit 9fa246256e09dc30820524401cdbeeaadee94025 upstream.

This reverts commit d179b88deb3bf6fed4991a31fd6f0f2cad21fab5.

This commit is documented to break userspace X.org modesetting driver in 
certain configurations.

The X.org modesetting userspace driver is broken. No fixes are available yet. 
In order for this patch to be applied it either needs a config option or a 
workaround developed.

This has been reported a few times, saying it's a userspace problem is clearly 
against the regression rules.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109806
Signed-off-by: Dave Airlie 
Cc:  # v3.19+
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpu/drm/i915/intel_fbdev.c |   12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -334,8 +334,8 @@ static bool intel_fb_initial_config(stru
bool *enabled, int width, int height)
 {
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
+   unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
-   unsigned long conn_configured, conn_seq;
int i, j;
bool *save_enabled;
bool fallback = true, ret = true;
@@ -353,9 +353,10 @@ static bool intel_fb_initial_config(stru
drm_modeset_backoff();
 
memcpy(save_enabled, enabled, count);
-   conn_seq = GENMASK(count - 1, 0);
+   mask = GENMASK(count - 1, 0);
conn_configured = 0;
 retry:
+   conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
@@ -368,8 +369,7 @@ retry:
if (conn_configured & BIT(i))
continue;
 
-   /* First pass, only consider tiled connectors */
-   if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+   if (conn_seq == 0 && !connector->has_tile)
continue;
 
if (connector->status == connector_status_connected)
@@ -473,10 +473,8 @@ retry:
conn_configured |= BIT(i);
}
 
-   if (conn_configured != conn_seq) { /* repeat until no more are found */
-   conn_seq = conn_configured;
+   if ((conn_configured & mask) != mask && conn_configured != conn_seq)
goto retry;
-   }
 
/*
 * If the BIOS didn't enable everything it could, fall back to have the




[PATCH 4.19 054/100] fs/proc/proc_sysctl.c: Fix a NULL pointer dereference

2019-04-30 Thread Greg Kroah-Hartman
From: YueHaibing 

commit 89189557b47b35683a27c80ee78aef18248eefb4 upstream.

Syzkaller report this:

  sysctl could not get directory: /net//bridge -12
  kasan: CONFIG_KASAN_INLINE enabled
  kasan: GPF could be caused by NULL-ptr deref or user memory access
  general protection fault:  [#1] SMP KASAN PTI
  CPU: 1 PID: 7027 Comm: syz-executor.0 Tainted: G C5.1.0-rc3+ 
#8
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 
04/01/2014
  RIP: 0010:__write_once_size include/linux/compiler.h:220 [inline]
  RIP: 0010:__rb_change_child include/linux/rbtree_augmented.h:144 [inline]
  RIP: 0010:__rb_erase_augmented include/linux/rbtree_augmented.h:186 [inline]
  RIP: 0010:rb_erase+0x5f4/0x19f0 lib/rbtree.c:459
  Code: 00 0f 85 60 13 00 00 48 89 1a 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e 41 5f 
c3 48 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 0c 
00 00 4d 85 ed 4c 89 2e 74 ce 4c 89 ea 48
  RSP: 0018:8881bb507778 EFLAGS: 00010206
  RAX: dc00 RBX: 8881f224b5b8 RCX: 818f3f6a
  RDX: 000a RSI: 0050 RDI: 8881f224b568
  RBP:  R08: ed10376a0ef4 R09: ed10376a0ef4
  R10: 0001 R11: ed10376a0ef4 R12: 8881f224b558
  R13:  R14:  R15: 
  FS:  7f3e7ce13700() GS:8881f730() knlGS:
  CS:  0010 DS:  ES:  CR0: 80050033
  CR2: 7fd60fbe9398 CR3: 0001cb55c001 CR4: 007606e0
  DR0:  DR1:  DR2: 
  DR3:  DR6: fffe0ff0 DR7: 0400
  PKRU: 5554
  Call Trace:
   erase_entry fs/proc/proc_sysctl.c:178 [inline]
   erase_header+0xe3/0x160 fs/proc/proc_sysctl.c:207
   start_unregistering fs/proc/proc_sysctl.c:331 [inline]
   drop_sysctl_table+0x558/0x880 fs/proc/proc_sysctl.c:1631
   get_subdir fs/proc/proc_sysctl.c:1022 [inline]
   __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335
   br_netfilter_init+0x68/0x1000 [br_netfilter]
   do_one_initcall+0xbc/0x47d init/main.c:901
   do_init_module+0x1b5/0x547 kernel/module.c:3456
   load_module+0x6405/0x8c10 kernel/module.c:3804
   __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
   do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe
  Modules linked in: br_netfilter(+) backlight comedi(C) hid_sensor_hub max3100 
ti_ads8688 udc_core fddi snd_mona leds_gpio rc_streamzap mtd pata_netcell 
nf_log_common rc_winfast udp_tunnel snd_usbmidi_lib snd_usb_toneport 
snd_usb_line6 snd_rawmidi snd_seq_device snd_hwdep videobuf2_v4l2 
videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops 
rc_gadmei_rm008z 8250_of smm665 hid_tmff hid_saitek hwmon_vid 
rc_ati_tv_wonder_hd_600 rc_core pata_pdc202xx_old dn_rtmsg as3722 ad714x_i2c 
ad714x snd_soc_cs4265 hid_kensington panel_ilitek_ili9322 drm 
drm_panel_orientation_quirks ipack cdc_phonet usbcore phonet hid_jabra hid 
extcon_arizona can_dev industrialio_triggered_buffer kfifo_buf industrialio 
adm1031 i2c_mux_ltc4306 i2c_mux ipmi_msghandler mlxsw_core snd_soc_cs35l34 
snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd 
soundcore gpio_da9055 uio ecdh_generic mdio_thunder of_mdio fixed_phy libphy 
mdio_cavium iptable_security iptable_raw iptable_mangle
   iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter 
bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim 
vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan 
bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm 
kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel 
ghash_clmulni_intel aesni_intel ide_pci_generic piix aes_x86_64 crypto_simd 
cryptd ide_core glue_helper input_leds psmouse intel_agp intel_gtt serio_raw 
ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos 
sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: 
br_netfilter]
  Dumping ftrace buffer:
 (ftrace buffer empty)
  ---[ end trace 68741688d5fbfe85 ]---

commit 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer
dereference in put_links") forgot to handle start_unregistering() case,
while header->parent is NULL, it calls erase_header() and as seen in the
above syzkaller call trace, accessing >parent->root will trigger
a NULL pointer dereference.

As that commit explained, there is also no need to call
start_unregistering() if header->parent is NULL.

Link: http://lkml.kernel.org/r/20190409153622.28112-1-yuehaib...@huawei.com
Fixes: 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer dereference in 
put_links")
Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between 
sysctl_table_sets")
Signed-off-by: YueHaibing 
Reported-by: Hulk Robot 
Reviewed-by: Kees Cook 
Cc: Luis Chamberlain 
Cc: Alexey Dobriyan 
Cc: Al Viro 

[PATCH 4.19 049/100] drm/vc4: Fix compilation error reported by kbuild test bot

2019-04-30 Thread Greg Kroah-Hartman
From: Maarten Lankhorst 

commit 462ce5d963f18b71c63f6b7730a35a2ee5273540 upstream.

A pointer to crtc was missing, resulting in the following build error:
drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: sparse: incorrect type in 
argument 1 (different base types)
drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse:expected struct drm_crtc 
*crtc
drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse:got struct drm_crtc_state 
*state
drivers/gpu/drm/vc4/vc4_crtc.c:1045:39: sparse: sparse: not enough arguments 
for function vc4_crtc_destroy_state

Signed-off-by: Maarten Lankhorst 
Reported-by: kbuild test robot 
Cc: Eric Anholt 
Link: 
https://patchwork.freedesktop.org/patch/msgid/2b6ed5e6-81b0-4276-8860-870b54ca3...@linux.intel.com
Fixes: d08106796a78 ("drm/vc4: Fix memory leak during gpu reset.")
Cc:  # v4.6+
Acked-by: Daniel Vetter 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpu/drm/vc4/vc4_crtc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -998,7 +998,7 @@ static void
 vc4_crtc_reset(struct drm_crtc *crtc)
 {
if (crtc->state)
-   vc4_crtc_destroy_state(crtc->state);
+   vc4_crtc_destroy_state(crtc, crtc->state);
 
crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
if (crtc->state)




[PATCH 4.19 084/100] ipv4: add sanity checks in ipv4_link_failure()

2019-04-30 Thread Greg Kroah-Hartman
From: Eric Dumazet 

[ Upstream commit 20ff83f10f113c88d0bb74589389b05250994c16 ]

Before calling __ip_options_compile(), we need to ensure the network
header is a an IPv4 one, and that it is already pulled in skb->head.

RAW sockets going through a tunnel can end up calling ipv4_link_failure()
with total garbage in the skb, or arbitrary lengthes.

syzbot report :

BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:355 [inline]
BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x294/0x1120 
net/ipv4/ip_options.c:123
Write of size 69 at addr 888096abf068 by task syz-executor.4/9204

CPU: 0 PID: 9204 Comm: syz-executor.4 Not tainted 5.1.0-rc5+ #77
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 check_memory_region_inline mm/kasan/generic.c:185 [inline]
 check_memory_region+0x123/0x190 mm/kasan/generic.c:191
 memcpy+0x38/0x50 mm/kasan/common.c:133
 memcpy include/linux/string.h:355 [inline]
 __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123
 __icmp_send+0x725/0x1400 net/ipv4/icmp.c:695
 ipv4_link_failure+0x29f/0x550 net/ipv4/route.c:1204
 dst_link_failure include/net/dst.h:427 [inline]
 vti6_xmit net/ipv6/ip6_vti.c:514 [inline]
 vti6_tnl_xmit+0x10d4/0x1c0c net/ipv6/ip6_vti.c:553
 __netdev_start_xmit include/linux/netdevice.h:4414 [inline]
 netdev_start_xmit include/linux/netdevice.h:4423 [inline]
 xmit_one net/core/dev.c:3292 [inline]
 dev_hard_start_xmit+0x1b2/0x980 net/core/dev.c:3308
 __dev_queue_xmit+0x271d/0x3060 net/core/dev.c:3878
 dev_queue_xmit+0x18/0x20 net/core/dev.c:3911
 neigh_direct_output+0x16/0x20 net/core/neighbour.c:1527
 neigh_output include/net/neighbour.h:508 [inline]
 ip_finish_output2+0x949/0x1740 net/ipv4/ip_output.c:229
 ip_finish_output+0x73c/0xd50 net/ipv4/ip_output.c:317
 NF_HOOK_COND include/linux/netfilter.h:278 [inline]
 ip_output+0x21f/0x670 net/ipv4/ip_output.c:405
 dst_output include/net/dst.h:444 [inline]
 NF_HOOK include/linux/netfilter.h:289 [inline]
 raw_send_hdrinc net/ipv4/raw.c:432 [inline]
 raw_sendmsg+0x1d2b/0x2f20 net/ipv4/raw.c:663
 inet_sendmsg+0x147/0x5d0 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:661
 sock_write_iter+0x27c/0x3e0 net/socket.c:988
 call_write_iter include/linux/fs.h:1866 [inline]
 new_sync_write+0x4c7/0x760 fs/read_write.c:474
 __vfs_write+0xe4/0x110 fs/read_write.c:487
 vfs_write+0x20c/0x580 fs/read_write.c:549
 ksys_write+0x14f/0x2d0 fs/read_write.c:599
 __do_sys_write fs/read_write.c:611 [inline]
 __se_sys_write fs/read_write.c:608 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:608
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x458c29
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 
7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:7f293b44bc78 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 0003 RCX: 00458c29
RDX: 0014 RSI: 22c0 RDI: 0003
RBP: 0073bf00 R08:  R09: 
R10:  R11: 0246 R12: 7f293b44c6d4
R13: 004c8623 R14: 004ded68 R15: 

The buggy address belongs to the page:
page:ea00025aafc0 count:0 mapcount:0 mapping: index:0x0
flags: 0x1fffc00()
raw: 01fffc00  025a0101 
raw:    
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 888096abef80: 00 00 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 f2
 888096abf000: f2 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00
>888096abf080: 00 00 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00
 ^
 888096abf100: 00 00 00 00 f1 f1 f1 f1 00 00 f3 f3 00 00 00 00
 888096abf180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Signed-off-by: Eric Dumazet 
Cc: Stephen Suryaputra 
Acked-by: Willem de Bruijn 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv4/route.c |   34 --
 1 file changed, 24 insertions(+), 10 deletions(-)

--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1185,25 +1185,39 @@ static struct dst_entry *ipv4_dst_check(
return dst;
 }
 
-static void ipv4_link_failure(struct sk_buff *skb)
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
 {
struct ip_options opt;
-   struct rtable *rt;
int res;
 
/* Recompile ip options 

[PATCH 4.19 050/100] USB: Add new USB LPM helpers

2019-04-30 Thread Greg Kroah-Hartman
From: Kai-Heng Feng 

commit 7529b2574a7aaf902f1f8159fbc2a7caa74be559 upstream.

Use new helpers to make LPM enabling/disabling more clear.

This is a preparation to subsequent patch.

Signed-off-by: Kai-Heng Feng 
Cc: stable  # after much soaking
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/usb/core/driver.c  |   12 +++-
 drivers/usb/core/hub.c |   12 ++--
 drivers/usb/core/message.c |2 +-
 drivers/usb/core/sysfs.c   |5 -
 drivers/usb/core/usb.h |   10 --
 5 files changed, 30 insertions(+), 11 deletions(-)

--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1899,7 +1899,7 @@ int usb_runtime_idle(struct device *dev)
return -EBUSY;
 }
 
-int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
+static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
 {
struct usb_hcd *hcd = bus_to_hcd(udev->bus);
int ret = -EPERM;
@@ -1916,6 +1916,16 @@ int usb_set_usb2_hardware_lpm(struct usb
return ret;
 }
 
+int usb_enable_usb2_hardware_lpm(struct usb_device *udev)
+{
+   return usb_set_usb2_hardware_lpm(udev, 1);
+}
+
+int usb_disable_usb2_hardware_lpm(struct usb_device *udev)
+{
+   return usb_set_usb2_hardware_lpm(udev, 0);
+}
+
 #endif /* CONFIG_PM */
 
 struct bus_type usb_bus_type = {
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3218,7 +3218,7 @@ int usb_port_suspend(struct usb_device *
 
/* disable USB2 hardware LPM */
if (udev->usb2_hw_lpm_enabled == 1)
-   usb_set_usb2_hardware_lpm(udev, 0);
+   usb_disable_usb2_hardware_lpm(udev);
 
if (usb_disable_ltm(udev)) {
dev_err(>dev, "Failed to disable LTM before suspend\n");
@@ -3257,7 +3257,7 @@ int usb_port_suspend(struct usb_device *
  err_ltm:
/* Try to enable USB2 hardware LPM again */
if (udev->usb2_hw_lpm_capable == 1)
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
 
if (udev->do_remote_wakeup)
(void) usb_disable_remote_wakeup(udev);
@@ -3541,7 +3541,7 @@ int usb_port_resume(struct usb_device *u
} else  {
/* Try to enable USB2 hardware LPM */
if (udev->usb2_hw_lpm_capable == 1)
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
 
/* Try to enable USB3 LTM */
usb_enable_ltm(udev);
@@ -4432,7 +4432,7 @@ static void hub_set_initial_usb2_lpm_pol
if ((udev->bos->ext_cap->bmAttributes & cpu_to_le32(USB_BESL_SUPPORT)) 
||
connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) {
udev->usb2_hw_lpm_allowed = 1;
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
}
 }
 
@@ -5609,7 +5609,7 @@ static int usb_reset_and_verify_device(s
 * It will be re-enabled by the enumeration process.
 */
if (udev->usb2_hw_lpm_enabled == 1)
-   usb_set_usb2_hardware_lpm(udev, 0);
+   usb_disable_usb2_hardware_lpm(udev);
 
/* Disable LPM while we reset the device and reinstall the alt settings.
 * Device-initiated LPM, and system exit latency settings are cleared
@@ -5712,7 +5712,7 @@ static int usb_reset_and_verify_device(s
 
 done:
/* Now that the alt settings are re-installed, enable LTM and LPM. */
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
usb_unlocked_enable_lpm(udev);
usb_enable_ltm(udev);
usb_release_bos_descriptor(udev);
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1244,7 +1244,7 @@ void usb_disable_device(struct usb_devic
}
 
if (dev->usb2_hw_lpm_enabled == 1)
-   usb_set_usb2_hardware_lpm(dev, 0);
+   usb_disable_usb2_hardware_lpm(dev);
usb_unlocked_disable_lpm(dev);
usb_disable_ltm(dev);
 
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -528,7 +528,10 @@ static ssize_t usb2_hardware_lpm_store(s
 
if (!ret) {
udev->usb2_hw_lpm_allowed = value;
-   ret = usb_set_usb2_hardware_lpm(udev, value);
+   if (value)
+   ret = usb_enable_usb2_hardware_lpm(udev);
+   else
+   ret = usb_disable_usb2_hardware_lpm(udev);
}
 
usb_unlock_device(udev);
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -92,7 +92,8 @@ extern int usb_remote_wakeup(struct usb_
 extern int usb_runtime_suspend(struct device *dev);
 extern int usb_runtime_resume(struct device *dev);
 extern int usb_runtime_idle(struct device *dev);
-extern int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable);
+extern int 

[PATCH 4.19 053/100] intel_th: gth: Fix an off-by-one in output unassigning

2019-04-30 Thread Greg Kroah-Hartman
From: Alexander Shishkin 

commit 91d3f8a629849968dc91d6ce54f2d46abf4feb7f upstream.

Commit 9ed3f3c3 ("intel_th: Don't reference unassigned outputs")
fixes a NULL dereference for all masters except the last one ("256+"),
which keeps the stale pointer after the output driver had been unassigned.

Fix the off-by-one.

Signed-off-by: Alexander Shishkin 
Fixes: 9ed3f3c3 ("intel_th: Don't reference unassigned outputs")
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/hwtracing/intel_th/gth.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/hwtracing/intel_th/gth.c
+++ b/drivers/hwtracing/intel_th/gth.c
@@ -616,7 +616,7 @@ static void intel_th_gth_unassign(struct
othdev->output.port = -1;
othdev->output.active = false;
gth->output[port].output = NULL;
-   for (master = 0; master < TH_CONFIGURABLE_MASTERS; master++)
+   for (master = 0; master <= TH_CONFIGURABLE_MASTERS; master++)
if (gth->master[master] == port)
gth->master[master] = -1;
spin_unlock(>gth_lock);




[PATCH 5.0 01/89] netfilter: nf_tables: bogus EBUSY when deleting set after flush

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit 273fe3f1006ea5ebc63d6729e43e8e45e32b256a ]

Set deletion after flush coming in the same batch results in EBUSY. Add
set use counter to track the number of references to this set from
rules. We cannot rely on the list of bindings for this since such list
is still populated from the preparation phase.

Reported-by: Václav Zindulka 
Signed-off-by: Pablo Neira Ayuso 
Signed-off-by: Sasha Levin 
---
 include/net/netfilter/nf_tables.h |  6 ++
 net/netfilter/nf_tables_api.c | 28 +++-
 net/netfilter/nft_dynset.c| 13 +
 net/netfilter/nft_lookup.c| 13 +
 net/netfilter/nft_objref.c| 13 +
 5 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 0612439909dc..9e0b9ecb43db 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -382,6 +382,7 @@ void nft_unregister_set(struct nft_set_type *type);
  * @dtype: data type (verdict or numeric type defined by userspace)
  * @objtype: object type (see NFT_OBJECT_* definitions)
  * @size: maximum set size
+ * @use: number of rules references to this set
  * @nelems: number of elements
  * @ndeact: number of deactivated elements queued for removal
  * @timeout: default timeout value in jiffies
@@ -407,6 +408,7 @@ struct nft_set {
u32 dtype;
u32 objtype;
u32 size;
+   u32 use;
atomic_tnelems;
u32 ndeact;
u64 timeout;
@@ -467,6 +469,10 @@ struct nft_set_binding {
u32 flags;
 };
 
+enum nft_trans_phase;
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase);
 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
   struct nft_set_binding *binding);
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index acb124ce92ec..e2aac80f9b7b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3624,6 +3624,9 @@ err1:
 
 static void nft_set_destroy(struct nft_set *set)
 {
+   if (WARN_ON(set->use > 0))
+   return;
+
set->ops->destroy(set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
@@ -3664,7 +3667,7 @@ static int nf_tables_delset(struct net *net, struct sock 
*nlsk,
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(set);
}
-   if (!list_empty(>bindings) ||
+   if (set->use ||
(nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(>nelems) > 0)) 
{
NL_SET_BAD_ATTR(extack, attr);
return -EBUSY;
@@ -3694,6 +3697,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct 
nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
 
+   if (set->use == UINT_MAX)
+   return -EOVERFLOW;
+
if (!list_empty(>bindings) && nft_set_is_anonymous(set))
return -EBUSY;
 
@@ -3721,6 +3727,7 @@ bind:
binding->chain = ctx->chain;
list_add_tail_rcu(>list, >bindings);
nft_set_trans_bind(ctx, set);
+   set->use++;
 
return 0;
 }
@@ -3740,6 +3747,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, 
struct nft_set *set,
 }
 EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase)
+{
+   switch (phase) {
+   case NFT_TRANS_PREPARE:
+   set->use--;
+   return;
+   case NFT_TRANS_ABORT:
+   case NFT_TRANS_RELEASE:
+   set->use--;
+   /* fall through */
+   default:
+   nf_tables_unbind_set(ctx, set, binding,
+phase == NFT_TRANS_COMMIT);
+   }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
 void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
 {
if (list_empty(>bindings) && nft_set_is_anonymous(set))
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index f1172f99752b..eb7f9a5f2aeb 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -241,11 +241,15 @@ static void nft_dynset_deactivate(const struct nft_ctx 
*ctx,
 {
struct nft_dynset *priv = nft_expr_priv(expr);
 
-   if (phase == NFT_TRANS_PREPARE)
-   return;
+   

[PATCH 4.19 096/100] mlxsw: spectrum: Put MC TCs into DWRR mode

2019-04-30 Thread Greg Kroah-Hartman
From: Petr Machata 

[ Upstream commit f476b3f809fa02f47af6333ed63715058c3fc348 ]

Both Spectrum-1 and Spectrum-2 chips are currently configured such that
pairs of TC n (which is used for UC traffic) and TC n+8 (which is used
for MC traffic) are feeding into the same subgroup. Strict
prioritization is configured between the two TCs, and by enabling
MC-aware mode on the switch, the lower-numbered (UC) TCs are favored
over the higher-numbered (MC) TCs.

On Spectrum-2 however, there is an issue in configuration of the
MC-aware mode. As a result, MC traffic is prioritized over UC traffic.
To work around the issue, configure the MC TCs with DWRR mode (while
keeping the UC TCs in strict mode).

With this patch, the multicast-unicast arbitration results in the same
behavior on both Spectrum-1 and Spectrum-2 chips.

Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports")
Signed-off-by: Petr Machata 
Signed-off-by: Ido Schimmel 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2783,7 +2783,7 @@ static int mlxsw_sp_port_ets_init(struct
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
-   false, 0);
+   true, 100);
if (err)
return err;
}




[PATCH 4.19 092/100] stmmac: pci: Adjust IOT2000 matching

2019-04-30 Thread Greg Kroah-Hartman
From: Su Bao Cheng 

[ Upstream commit e0c1d14a1a3211dccf0540a6703ffbd5d2a75bdb ]

Since there are more IOT2040 variants with identical hardware but
different asset tags, the asset tag matching should be adjusted to
support them.

For the board name "SIMATIC IOT2000", currently there are 2 types of
hardware, IOT2020 and IOT2040. The IOT2020 is identified by its unique
asset tag. Match on it first. If we then match on the board name only,
we will catch all IOT2040 variants. In the future there will be no other
devices with the "SIMATIC IOT2000" DMI board name but different
hardware.

Signed-off-by: Su Bao Cheng 
Reviewed-by: Jan Kiszka 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -159,6 +159,12 @@ static const struct dmi_system_id quark_
},
.driver_data = (void *)_stmmac_dmi_data,
},
+   /*
+* There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+* The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+* has only one pci network device while other asset tags are
+* for IOT2040 which has two.
+*/
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +176,6 @@ static const struct dmi_system_id quark_
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
-   DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
-   "6ES7647-0AA00-1YA2"),
},
.driver_data = (void *)_stmmac_dmi_data,
},




[PATCH 4.19 045/100] ARM: 8857/1: efi: enable CP15 DMB instructions before cleaning the cache

2019-04-30 Thread Greg Kroah-Hartman
From: Ard Biesheuvel 

commit e17b1af96b2afc38e684aa2f1033387e2ed10029 upstream.

The EFI stub is entered with the caches and MMU enabled by the
firmware, and once the stub is ready to hand over to the decompressor,
we clean and disable the caches.

The cache clean routines use CP15 barrier instructions, which can be
disabled via SCTLR. Normally, when using the provided cache handling
routines to enable the caches and MMU, this bit is enabled as well.
However, but since we entered the stub with the caches already enabled,
this routine is not executed before we call the cache clean routines,
resulting in undefined instruction exceptions if the firmware never
enabled this bit.

So set the bit explicitly in the EFI entry code, but do so in a way that
guarantees that the resulting code can still run on v6 cores as well
(which are guaranteed to have CP15 barriers enabled)

Cc:  # v4.9+
Acked-by: Marc Zyngier 
Signed-off-by: Ard Biesheuvel 
Signed-off-by: Russell King 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm/boot/compressed/head.S |   16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1395,7 +1395,21 @@ ENTRY(efi_stub_entry)
 
@ Preserve return value of efi_entry() in r4
mov r4, r0
-   bl  cache_clean_flush
+
+   @ our cache maintenance code relies on CP15 barrier instructions
+   @ but since we arrived here with the MMU and caches configured
+   @ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
+   @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
+   @ the enable path will be executed on v7+ only.
+   mrc p15, 0, r1, c1, c0, 0   @ read SCTLR
+   tst r1, #(1 << 5)   @ CP15BEN bit set?
+   bne 0f
+   orr r1, r1, #(1 << 5)   @ CP15 barrier instructions
+   mcr p15, 0, r1, c1, c0, 0   @ write SCTLR
+ ARM(  .inst   0xf57ff06f  @ v7+ isb   )
+ THUMB(isb )
+
+0: bl  cache_clean_flush
bl  cache_off
 
@ Set parameters for booting zImage according to boot protocol




[PATCH 4.19 100/100] net/tls: dont leak IV and record seq when offload fails

2019-04-30 Thread Greg Kroah-Hartman
From: Jakub Kicinski 

[ Upstream commit 12c7686111326148b4b5db189130522a4ad1be4a ]

When device refuses the offload in tls_set_device_offload_rx()
it calls tls_sw_free_resources_rx() to clean up software context
state.

Unfortunately, tls_sw_free_resources_rx() does not free all
the state tls_set_sw_offload() allocated - it leaks IV and
sequence number buffers.  All other code paths which lead to
tls_sw_release_resources_rx() (which tls_sw_free_resources_rx()
calls) free those right before the call.

Avoid the leak by moving freeing of iv and rec_seq into
tls_sw_release_resources_rx().

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski 
Reviewed-by: Dirk van der Merwe 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/tls/tls_device.c |2 --
 net/tls/tls_main.c   |5 +
 net/tls/tls_sw.c |3 +++
 3 files changed, 4 insertions(+), 6 deletions(-)

--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -911,8 +911,6 @@ void tls_device_offload_cleanup_rx(struc
}
 out:
up_read(_offload_lock);
-   kfree(tls_ctx->rx.rec_seq);
-   kfree(tls_ctx->rx.iv);
tls_sw_release_resources_rx(sk);
 }
 
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -290,11 +290,8 @@ static void tls_sk_proto_close(struct so
tls_sw_free_resources_tx(sk);
}
 
-   if (ctx->rx_conf == TLS_SW) {
-   kfree(ctx->rx.rec_seq);
-   kfree(ctx->rx.iv);
+   if (ctx->rx_conf == TLS_SW)
tls_sw_free_resources_rx(sk);
-   }
 
 #ifdef CONFIG_TLS_DEVICE
if (ctx->rx_conf == TLS_HW)
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1118,6 +1118,9 @@ void tls_sw_release_resources_rx(struct
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
+   kfree(tls_ctx->rx.rec_seq);
+   kfree(tls_ctx->rx.iv);
+
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;




[PATCH 4.19 099/100] net/tls: avoid potential deadlock in tls_set_device_offload_rx()

2019-04-30 Thread Greg Kroah-Hartman
From: Jakub Kicinski 

[ Upstream commit 62ef81d5632634d5e310ed25b9b940b2b6612b46 ]

If device supports offload, but offload fails tls_set_device_offload_rx()
will call tls_sw_free_resources_rx() which (unhelpfully) releases
and reacquires the socket lock.

For a small fix release and reacquire the device_offload_lock.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski 
Reviewed-by: Dirk van der Merwe 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/tls/tls_device.c |2 ++
 1 file changed, 2 insertions(+)

--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -874,7 +874,9 @@ int tls_set_device_offload_rx(struct soc
goto release_netdev;
 
 free_sw_resources:
+   up_read(_offload_lock);
tls_sw_free_resources_rx(sk);
+   down_read(_offload_lock);
 release_ctx:
ctx->priv_ctx_rx = NULL;
 release_netdev:




[PATCH 4.19 090/100] net: stmmac: move stmmac_check_ether_addr() to driver probe

2019-04-30 Thread Greg Kroah-Hartman
From: Vinod Koul 

[ Upstream commit b561af36b1841088552464cdc3f6371d92f17710 ]

stmmac_check_ether_addr() checks the MAC address and assigns one in
driver open(). In many cases when we create slave netdevice, the dev
addr is inherited from master but the master dev addr maybe NULL at
that time, so move this call to driver probe so that address is
always valid.

Signed-off-by: Xiaofei Shen 
Tested-by: Xiaofei Shen 
Signed-off-by: Sneh Shah 
Signed-off-by: Vinod Koul 
Reviewed-by: Andrew Lunn 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2595,8 +2595,6 @@ static int stmmac_open(struct net_device
u32 chan;
int ret;
 
-   stmmac_check_ether_addr(priv);
-
if (priv->hw->pcs != STMMAC_PCS_RGMII &&
priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -4296,6 +4294,8 @@ int stmmac_dvr_probe(struct device *devi
if (ret)
goto error_hw_init;
 
+   stmmac_check_ether_addr(priv);
+
/* Configure real RX and TX queues */
netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);




[PATCH 4.19 093/100] team: fix possible recursive locking when add slaves

2019-04-30 Thread Greg Kroah-Hartman
From: Hangbin Liu 

[ Upstream commit 925b0c841e066b488cc3a60272472b2c56300704 ]

If we add a bond device which is already the master of the team interface,
we will hold the team->lock in team_add_slave() first and then request the
lock in team_set_mac_address() again. The functions are called like:

- team_add_slave()
 - team_port_add()
   - team_port_enter()
 - team_modeop_port_enter()
   - __set_port_dev_addr()
 - dev_set_mac_address()
   - bond_set_mac_address()
 - dev_set_mac_address()
   - team_set_mac_address

Although team_upper_dev_link() would check the upper devices but it is
called too late. Fix it by adding a checking before processing the slave.

v2: Do not split the string in netdev_err()

Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Acked-by: Jiri Pirko 
Signed-off-by: Hangbin Liu 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/team/team.c |7 +++
 1 file changed, 7 insertions(+)

--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1160,6 +1160,13 @@ static int team_port_add(struct team *te
return -EINVAL;
}
 
+   if (netdev_has_upper_dev(dev, port_dev)) {
+   NL_SET_ERR_MSG(extack, "Device is already an upper device of 
the team interface");
+   netdev_err(dev, "Device %s is already an upper device of the 
team interface\n",
+  portname);
+   return -EBUSY;
+   }
+
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team 
device has VLAN set up");




[PATCH 4.19 095/100] mlxsw: pci: Reincrease PCI reset timeout

2019-04-30 Thread Greg Kroah-Hartman
From: Ido Schimmel 

[ Upstream commit 1ab3030193d25878b3b1409060e1e0a879800c95 ]

During driver initialization the driver sends a reset to the device and
waits for the firmware to signal that it is ready to continue.

Commit d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout")
increased the timeout to 13 seconds due to longer PHY calibration in
Spectrum-2 compared to Spectrum-1.

Recently it became apparent that this timeout is too short and therefore
this patch increases it again to a safer limit that will be reduced in
the future.

Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC")
Fixes: d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout")
Signed-off-by: Ido Schimmel 
Acked-by: Jiri Pirko 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlxsw/pci_hw.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
 
 #define MLXSW_PCI_SW_RESET 0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS   13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS   2
 #define MLXSW_PCI_SW_RESET_WAIT_MSECS  100
 #define MLXSW_PCI_FW_READY 0xA1844
 #define MLXSW_PCI_FW_READY_MASK0x




[PATCH 5.0 02/89] netfilter: nf_tables: bogus EBUSY in helper removal from transaction

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit 8ffcd32f64633926163cdd07a7d295c500a947d1 ]

Proper use counter updates when activating and deactivating the object,
otherwise, this hits bogus EBUSY error.

Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate 
and destroy phase")
Reported-by: Laura Garcia 
Signed-off-by: Pablo Neira Ayuso 
Signed-off-by: Sasha Levin 
---
 net/netfilter/nft_objref.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index d8737c115257..bf92a40dd1b2 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -64,21 +64,34 @@ nla_put_failure:
return -1;
 }
 
-static void nft_objref_destroy(const struct nft_ctx *ctx,
-  const struct nft_expr *expr)
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
 {
struct nft_object *obj = nft_objref_priv(expr);
 
+   if (phase == NFT_TRANS_COMMIT)
+   return;
+
obj->use--;
 }
 
+static void nft_objref_activate(const struct nft_ctx *ctx,
+   const struct nft_expr *expr)
+{
+   struct nft_object *obj = nft_objref_priv(expr);
+
+   obj->use++;
+}
+
 static struct nft_expr_type nft_objref_type;
 static const struct nft_expr_ops nft_objref_ops = {
.type   = _objref_type,
.size   = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
.eval   = nft_objref_eval,
.init   = nft_objref_init,
-   .destroy= nft_objref_destroy,
+   .activate   = nft_objref_activate,
+   .deactivate = nft_objref_deactivate,
.dump   = nft_objref_dump,
 };
 
-- 
2.19.1





Re: [PATCH v2 1/2] livepatch: Remove duplicate warning about missing reliable stacktrace support

2019-04-30 Thread Kamalesh Babulal
On Tue, Apr 30, 2019 at 11:10:48AM +0200, Petr Mladek wrote:
> WARN_ON_ONCE() could not be called safely under rq lock because
> of console deadlock issues. Fortunately, there is another check
> for the reliable stacktrace support in klp_enable_patch().
> 
> Signed-off-by: Petr Mladek 

Reviewed-by: Kamalesh Babulal 



[PATCH 5.0 07/89] block, bfq: fix use after free in bfq_bfqq_expire

2019-04-30 Thread Greg Kroah-Hartman
[ Upstream commit eed47d19d9362bdd958e4ab56af480b9dbf6b2b6 ]

The function bfq_bfqq_expire() invokes the function
__bfq_bfqq_expire(), and the latter may free the in-service bfq-queue.
If this happens, then no other instruction of bfq_bfqq_expire() must
be executed, or a use-after-free will occur.

Basing on the assumption that __bfq_bfqq_expire() invokes
bfq_put_queue() on the in-service bfq-queue exactly once, the queue is
assumed to be freed if its refcounter is equal to one right before
invoking __bfq_bfqq_expire().

But, since commit 9dee8b3b057e ("block, bfq: fix queue removal from
weights tree") this assumption is false. __bfq_bfqq_expire() may also
invoke bfq_weights_tree_remove() and, since commit 9dee8b3b057e
("block, bfq: fix queue removal from weights tree"), also
the latter function may invoke bfq_put_queue(). So __bfq_bfqq_expire()
may invoke bfq_put_queue() twice, and this is the actual case where
the in-service queue may happen to be freed.

To address this issue, this commit moves the check on the refcounter
of the queue right around the last bfq_put_queue() that may be invoked
on the queue.

Fixes: 9dee8b3b057e ("block, bfq: fix queue removal from weights tree")
Reported-by: Dmitrii Tcvetkov 
Reported-by: Douglas Anderson 
Tested-by: Dmitrii Tcvetkov 
Tested-by: Douglas Anderson 
Signed-off-by: Paolo Valente 
Signed-off-by: Jens Axboe 
Signed-off-by: Sasha Levin 
---
 block/bfq-iosched.c | 15 +++
 block/bfq-iosched.h |  2 +-
 block/bfq-wf2q.c| 17 +++--
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e5ed28629271..72510c470001 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2804,7 +2804,7 @@ static void bfq_dispatch_remove(struct request_queue *q, 
struct request *rq)
bfq_remove_request(q, rq);
 }
 
-static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
/*
 * If this bfqq is shared between multiple processes, check
@@ -2837,9 +2837,11 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, 
struct bfq_queue *bfqq)
/*
 * All in-service entities must have been properly deactivated
 * or requeued before executing the next function, which
-* resets all in-service entites as no more in service.
+* resets all in-service entities as no more in service. This
+* may cause bfqq to be freed. If this happens, the next
+* function returns true.
 */
-   __bfq_bfqd_reset_in_service(bfqd);
+   return __bfq_bfqd_reset_in_service(bfqd);
 }
 
 /**
@@ -3244,7 +3246,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
bool slow;
unsigned long delta = 0;
struct bfq_entity *entity = >entity;
-   int ref;
 
/*
 * Check whether the process is slow (see bfq_bfqq_is_slow).
@@ -3313,10 +3314,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 * reason.
 */
__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
-   ref = bfqq->ref;
-   __bfq_bfqq_expire(bfqd, bfqq);
-
-   if (ref == 1) /* bfqq is gone, no more actions on it */
+   if (__bfq_bfqq_expire(bfqd, bfqq))
+   /* bfqq is gone, no more actions on it */
return;
 
bfqq->injected_service = 0;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 746bd570b85a..ca98c98a8179 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -993,7 +993,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity,
 bool ins_into_idle_tree);
 bool next_queue_may_preempt(struct bfq_data *bfqd);
 struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 bool ins_into_idle_tree, bool expiration);
 void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 4aab1a8191f0..8077bf71d2ac 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1599,7 +1599,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data 
*bfqd)
return bfqq;
 }
 
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+/* returns true if the in-service queue gets freed */
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 {
struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
struct bfq_entity *in_serv_entity = _serv_bfqq->entity;
@@ -1623,8 +1624,20 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 * service tree either, then release the service reference to
 * the queue it represents (taken with bfq_get_entity).
 */
-   if (!in_serv_entity->on_st)
+   if (!in_serv_entity->on_st) {
+   /*
+ 

[PATCH 5.0 09/89] cifs: fix page reference leak with readv/writev

2019-04-30 Thread Greg Kroah-Hartman
From: Jérôme Glisse 

commit 13f5938d8264b5501368523c4513ff26608a33e8 upstream.

CIFS can leak pages reference gotten through GUP (get_user_pages*()
through iov_iter_get_pages()). This happen if cifs_send_async_read()
or cifs_write_from_iter() calls fail from within __cifs_readv() and
__cifs_writev() respectively. This patch move page unreference to
cifs_aio_ctx_release() which will happens on all code paths this is
all simpler to follow for correctness.

Signed-off-by: Jérôme Glisse 
Cc: Steve French 
Cc: linux-c...@vger.kernel.org
Cc: samba-techni...@lists.samba.org
Cc: Alexander Viro 
Cc: linux-fsde...@vger.kernel.org
Cc: Linus Torvalds 
Cc: Stable 
Signed-off-by: Steve French 
Reviewed-by: Pavel Shilovsky 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/cifs/file.c |   15 +--
 fs/cifs/misc.c |   23 ++-
 2 files changed, 23 insertions(+), 15 deletions(-)

--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2796,7 +2796,6 @@ static void collect_uncached_write_data(
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
struct dentry *dentry = ctx->cfile->dentry;
-   unsigned int i;
int rc;
 
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2860,10 +2859,6 @@ restart_loop:
kref_put(>refcount, cifs_uncached_writedata_release);
}
 
-   if (!ctx->direct_io)
-   for (i = 0; i < ctx->npages; i++)
-   put_page(ctx->bv[i].bv_page);
-
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, _I(dentry->d_inode)->flags);
 
@@ -3472,7 +3467,6 @@ collect_uncached_read_data(struct cifs_a
struct iov_iter *to = >iter;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
-   unsigned int i;
int rc;
 
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3556,15 +3550,8 @@ again:
kref_put(>refcount, cifs_uncached_readdata_release);
}
 
-   if (!ctx->direct_io) {
-   for (i = 0; i < ctx->npages; i++) {
-   if (ctx->should_dirty)
-   set_page_dirty(ctx->bv[i].bv_page);
-   put_page(ctx->bv[i].bv_page);
-   }
-
+   if (!ctx->direct_io)
ctx->total_len = ctx->len - iov_iter_count(to);
-   }
 
cifs_stats_bytes_read(tcon, ctx->total_len);
 
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void)
 {
struct cifs_aio_ctx *ctx;
 
+   /*
+* Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+* to false so that we know when we have to unreference pages within
+* cifs_aio_ctx_release()
+*/
ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
if (!ctx)
return NULL;
@@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcou
struct cifs_aio_ctx, refcount);
 
cifsFileInfo_put(ctx->cfile);
-   kvfree(ctx->bv);
+
+   /*
+* ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+* which means that iov_iter_get_pages() was a success and thus that
+* we have taken reference on pages.
+*/
+   if (ctx->bv) {
+   unsigned i;
+
+   for (i = 0; i < ctx->npages; i++) {
+   if (ctx->should_dirty)
+   set_page_dirty(ctx->bv[i].bv_page);
+   put_page(ctx->bv[i].bv_page);
+   }
+   kvfree(ctx->bv);
+   }
+
kfree(ctx);
 }
 




[PATCH 5.0 20/89] mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model

2019-04-30 Thread Greg Kroah-Hartman
From: Mel Gorman 

commit 24512228b7a3f412b5a51f189df302616b021c33 upstream.

Mikulas Patocka reported that commit 1c30844d2dfe ("mm: reclaim small
amounts of memory when an external fragmentation event occurs") "broke"
memory management on parisc.

The machine is not NUMA but the DISCONTIG model creates three pgdats
even though it's a UMA machine for the following ranges

0) Start 0x End 0x3fff Size   1024 MB
1) Start 0x0001 End 0x0001bfdf Size   3070 MB
2) Start 0x00404000 End 0x0040 Size   3072 MB

Mikulas reported:

With the patch 1c30844d2, the kernel will incorrectly reclaim the
first zone when it fills up, ignoring the fact that there are two
completely free zones. Basiscally, it limits cache size to 1GiB.

For example, if I run:
# dd if=/dev/sda of=/dev/null bs=1M count=2048

- with the proper kernel, there should be "Buffers - 2GiB"
when this command finishes. With the patch 1c30844d2, buffers
will consume just 1GiB or slightly more, because the kernel was
incorrectly reclaiming them.

The page allocator and reclaim makes assumptions that pgdats really
represent NUMA nodes and zones represent ranges and makes decisions on
that basis.  Watermark boosting for small pgdats leads to unexpected
results even though this would have behaved reasonably on SPARSEMEM.

DISCONTIG is essentially deprecated and even parisc plans to move to
SPARSEMEM so there is no need to be fancy, this patch simply disables
watermark boosting by default on DISCONTIGMEM.

Link: http://lkml.kernel.org/r/20190419094335.gj18...@techsingularity.net
Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external 
fragmentation event occurs")
Signed-off-by: Mel Gorman 
Reported-by: Mikulas Patocka 
Tested-by: Mikulas Patocka 
Acked-by: Vlastimil Babka 
Cc: James Bottomley 
Cc: Matthew Wilcox 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 Documentation/sysctl/vm.txt |   16 
 mm/page_alloc.c |   13 +
 2 files changed, 21 insertions(+), 8 deletions(-)

--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -866,14 +866,14 @@ The intent is that compaction has less w
 increase the success rate of future high-order allocations such as SLUB
 allocations, THP and hugetlbfs pages.
 
-To make it sensible with respect to the watermark_scale_factor parameter,
-the unit is in fractions of 10,000. The default value of 15,000 means
-that up to 150% of the high watermark will be reclaimed in the event of
-a pageblock being mixed due to fragmentation. The level of reclaim is
-determined by the number of fragmentation events that occurred in the
-recent past. If this value is smaller than a pageblock then a pageblocks
-worth of pages will be reclaimed (e.g.  2MB on 64-bit x86). A boost factor
-of 0 will disable the feature.
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
+watermark will be reclaimed in the event of a pageblock being mixed due
+to fragmentation. The level of reclaim is determined by the number of
+fragmentation events that occurred in the recent past. If this value is
+smaller than a pageblock then a pageblocks worth of pages will be reclaimed
+(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
 
 =
 
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
+ * are not on separate NUMA nodes. Functionally this works but with
+ * watermark_boost_factor, it can reclaim prematurely as the ranges can be
+ * quite small. By default, do not boost watermarks on discontigmem as in
+ * many cases very high-order allocations like THP are likely to be
+ * unsupported and the premature reclaim offsets the advantage of long-term
+ * fragmentation avoidance.
+ */
+int watermark_boost_factor __read_mostly;
+#else
 int watermark_boost_factor __read_mostly = 15000;
+#endif
 int watermark_scale_factor = 10;
 
 static unsigned long nr_kernel_pages __initdata;




[PATCH 5.0 21/89] arm64: mm: Ensure tail of unaligned initrd is reserved

2019-04-30 Thread Greg Kroah-Hartman
From: Bjorn Andersson 

commit d4d18e3ec6091843f607e8929a56723e28f393a6 upstream.

In the event that the start address of the initrd is not aligned, but
has an aligned size, the base + size will not cover the entire initrd
image and there is a chance that the kernel will corrupt the tail of the
image.

By aligning the end of the initrd to a page boundary and then
subtracting the adjusted start address the memblock reservation will
cover all pages that contains the initrd.

Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size")
Cc: sta...@vger.kernel.org
Acked-by: Will Deacon 
Signed-off-by: Bjorn Andersson 
Signed-off-by: Catalin Marinas 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm64/mm/init.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -406,7 +406,7 @@ void __init arm64_memblock_init(void)
 * Otherwise, this is a no-op
 */
u64 base = phys_initrd_start & PAGE_MASK;
-   u64 size = PAGE_ALIGN(phys_initrd_size);
+   u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - 
base;
 
/*
 * We can only add back the initrd memory if we don't end up




[PATCH 5.0 11/89] tracing: Fix a memory leak by early error exit in trace_pid_write()

2019-04-30 Thread Greg Kroah-Hartman
From: Wenwen Wang 

commit 91862cc7867bba4ee5c8fcf0ca2f1d30427b6129 upstream.

In trace_pid_write(), the buffer for trace parser is allocated through
kmalloc() in trace_parser_get_init(). Later on, after the buffer is used,
it is then freed through kfree() in trace_parser_put(). However, it is
possible that trace_pid_write() is terminated due to unexpected errors,
e.g., ENOMEM. In that case, the allocated buffer will not be freed, which
is a memory leak bug.

To fix this issue, free the allocated buffer when an error is encountered.

Link: 
http://lkml.kernel.org/r/1555726979-15633-1-git-send-email-wang6...@umn.edu

Fixes: f4d34a87e9c10 ("tracing: Use pid bitmap instead of a pid array for 
set_event_pid")
Cc: sta...@vger.kernel.org
Signed-off-by: Wenwen Wang 
Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/trace/trace.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_lis
 * not modified.
 */
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
-   if (!pid_list)
+   if (!pid_list) {
+   trace_parser_put();
return -ENOMEM;
+   }
 
pid_list->pid_max = READ_ONCE(pid_max);
 
@@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_lis
 
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
+   trace_parser_put();
kfree(pid_list);
return -ENOMEM;
}




[PATCH 5.0 29/89] ceph: fix ci->i_head_snapc leak

2019-04-30 Thread Greg Kroah-Hartman
From: Yan, Zheng 

commit 37659182bff1eeaaeadcfc8f853c6d2b6dbc3f47 upstream.

We missed two places that i_wrbuffer_ref_head, i_wr_ref, i_dirty_caps
and i_flushing_caps may change. When they are all zeros, we should free
i_head_snapc.

Cc: sta...@vger.kernel.org
Link: https://tracker.ceph.com/issues/38224
Reported-and-tested-by: Luis Henriques 
Signed-off-by: "Yan, Zheng" 
Signed-off-by: Ilya Dryomov 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/ceph/mds_client.c |9 +
 fs/ceph/snap.c   |7 ++-
 2 files changed, 15 insertions(+), 1 deletion(-)

--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1286,6 +1286,15 @@ static int remove_session_caps_cb(struct
list_add(>i_prealloc_cap_flush->i_list, _remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+   if (drop &&
+  ci->i_wrbuffer_ref_head == 0 &&
+  ci->i_wr_ref == 0 &&
+  ci->i_dirty_caps == 0 &&
+  ci->i_flushing_caps == 0) {
+  ceph_put_snap_context(ci->i_head_snapc);
+  ci->i_head_snapc = NULL;
+   }
}
spin_unlock(>i_ceph_lock);
while (!list_empty(_remove)) {
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -568,7 +568,12 @@ void ceph_queue_cap_snap(struct ceph_ino
old_snapc = NULL;
 
 update_snapc:
-   if (ci->i_head_snapc) {
+   if (ci->i_wrbuffer_ref_head == 0 &&
+   ci->i_wr_ref == 0 &&
+   ci->i_dirty_caps == 0 &&
+   ci->i_flushing_caps == 0) {
+   ci->i_head_snapc = NULL;
+   } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}




[PATCH 5.0 23/89] RDMA/mlx5: Do not allow the user to write to the clock page

2019-04-30 Thread Greg Kroah-Hartman
From: Jason Gunthorpe 

commit c660133c339f9ab684fdf568c0d51b9ae5e86002 upstream.

The intent of this VMA was to be read-only from user space, but the
VM_MAYWRITE masking was missed, so mprotect could make it writable.

Cc: sta...@vger.kernel.org
Fixes: 5c99eaecb1fc ("IB/mlx5: Mmap the HCA's clock info to user-space")
Signed-off-by: Jason Gunthorpe 
Reviewed-by: Haggai Eran 
Signed-off-by: Leon Romanovsky 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/infiniband/hw/mlx5/main.c |2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1982,6 +1982,7 @@ static int mlx5_ib_mmap_clock_info_page(
 
if (vma->vm_flags & VM_WRITE)
return -EPERM;
+   vma->vm_flags &= ~VM_MAYWRITE;
 
if (!dev->mdev->clock_info_page)
return -EOPNOTSUPP;
@@ -2147,6 +2148,7 @@ static int mlx5_ib_mmap(struct ib_uconte
 
if (vma->vm_flags & VM_WRITE)
return -EPERM;
+   vma->vm_flags &= ~VM_MAYWRITE;
 
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)




[PATCH 5.0 25/89] RDMA/ucontext: Fix regression with disassociate

2019-04-30 Thread Greg Kroah-Hartman
From: Jason Gunthorpe 

commit 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 upstream.

When this code was consolidated the intention was that the VMA would
become backed by anonymous zero pages after the zap_vma_pte - however this
very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED
bits to transform the VMA into an anonymous VMA. Since the vm_ops was
removed this broke.

Now userspace gets a SIGBUS if it touches the vma after disassociation.

Instead of converting the VMA to anonymous provide a fault handler that
puts a zero'd page into the VMA when user-space touches it after
disassociation.

Cc: sta...@vger.kernel.org
Suggested-by: Andrea Arcangeli 
Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO 
memory")
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Leon Romanovsky 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/infiniband/core/uverbs.h  |1 
 drivers/infiniband/core/uverbs_main.c |   52 --
 2 files changed, 50 insertions(+), 3 deletions(-)

--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -160,6 +160,7 @@ struct ib_uverbs_file {
 
struct mutex umap_lock;
struct list_head umaps;
+   struct page *disassociate_page;
 
struct idr  idr;
/* spinlock protects write access to idr */
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref
kref_put(>async_file->ref,
 ib_uverbs_release_async_event_file);
put_device(>device->dev);
+
+   if (file->disassociate_page)
+   __free_pages(file->disassociate_page, 0);
kfree(file);
 }
 
@@ -876,9 +879,50 @@ static void rdma_umap_close(struct vm_ar
kfree(priv);
 }
 
+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+   struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+   struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+   vm_fault_t ret = 0;
+
+   if (!priv)
+   return VM_FAULT_SIGBUS;
+
+   /* Read only pages can just use the system zero page. */
+   if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+   vmf->page = ZERO_PAGE(vmf->vm_start);
+   get_page(vmf->page);
+   return 0;
+   }
+
+   mutex_lock(>umap_lock);
+   if (!ufile->disassociate_page)
+   ufile->disassociate_page =
+   alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+   if (ufile->disassociate_page) {
+   /*
+* This VMA is forced to always be shared so this doesn't have
+* to worry about COW.
+*/
+   vmf->page = ufile->disassociate_page;
+   get_page(vmf->page);
+   } else {
+   ret = VM_FAULT_SIGBUS;
+   }
+   mutex_unlock(>umap_lock);
+
+   return ret;
+}
+
 static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open,
.close = rdma_umap_close,
+   .fault = rdma_umap_fault,
 };
 
 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
@@ -888,6 +932,9 @@ static struct rdma_umap_priv *rdma_user_
struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv;
 
+   if (!(vma->vm_flags & VM_SHARED))
+   return ERR_PTR(-EINVAL);
+
if (vma->vm_end - vma->vm_start != size)
return ERR_PTR(-EINVAL);
 
@@ -991,7 +1038,7 @@ void uverbs_user_mmap_disassociate(struc
 * at a time to get the lock ordering right. Typically there
 * will only be one mm, so no big deal.
 */
-   down_write(>mmap_sem);
+   down_read(>mmap_sem);
if (!mmget_still_valid(mm))
goto skip_mm;
mutex_lock(>umap_lock);
@@ -1005,11 +1052,10 @@ void uverbs_user_mmap_disassociate(struc
 
zap_vma_ptes(vma, vma->vm_start,
 vma->vm_end - vma->vm_start);
-   vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(>umap_lock);
skip_mm:
-   up_write(>mmap_sem);
+   up_read(>mmap_sem);
mmput(mm);
}
 }




Re: Question about sched_setaffinity()

2019-04-30 Thread Peter Zijlstra
On Tue, Apr 30, 2019 at 03:51:30AM -0700, Paul E. McKenney wrote:
> > Then I'm not entirely sure how we can return 0 and not run on the
> > expected CPU. If we look at __set_cpus_allowed_ptr(), the only paths out
> > to 0 are:
> > 
> >  - if the mask didn't change
> >  - if we already run inside the new mask
> >  - if we migrated ourself with the stop-task
> >  - if we're not in fact running
> > 
> > That last case should never trigger in your circumstances, since @p ==
> > current and current is obviously running. But for completeness, the
> > wakeup of @p would do the task placement in that case.
> 
> Are there some diagnostics I could add that would help track this down,
> be it my bug or yours?

Maybe limited function trace combined with the scheduling tracepoints
would give clue.

Trouble is, I forever forget how to set that up properly :/ Maybe
something along these lines:

$ trace-cmd record -p function_graph -g sched_setaffinity -g migration_cpu_stop 
-e
sched_migirate_task -e sched_switch -e sched_wakeup

Also useful would be:

echo 1 > /proc/sys/kernel/traceoff_on_warning

which ensures the trace stops the moment we find fail.


[PATCH 5.0 30/89] nfsd: Dont release the callback slot unless it was actually held

2019-04-30 Thread Greg Kroah-Hartman
From: Trond Myklebust 

commit e6abc8caa6deb14be2a206253f7e1c5e37e9515b upstream.

If there are multiple callbacks queued, waiting for the callback
slot when the callback gets shut down, then they all currently
end up acting as if they hold the slot, and call
nfsd4_cb_sequence_done() resulting in interesting side-effects.

In addition, the 'retry_nowait' path in nfsd4_cb_sequence_done()
causes a loop back to nfsd4_cb_prepare() without first freeing the
slot, which causes a deadlock when nfsd41_cb_get_slot() gets called
a second time.

This patch therefore adds a boolean to track whether or not the
callback did pick up the slot, so that it can do the right thing
in these 2 cases.

Cc: sta...@vger.kernel.org
Signed-off-by: Trond Myklebust 
Signed-off-by: J. Bruce Fields 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/nfsd/nfs4callback.c |8 +++-
 fs/nfsd/state.h|1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1023,8 +1023,9 @@ static void nfsd4_cb_prepare(struct rpc_
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
-   if (!nfsd41_cb_get_slot(clp, task))
+   if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+   cb->cb_holds_slot = true;
}
rpc_call_start(task);
 }
@@ -1051,6 +1052,9 @@ static bool nfsd4_cb_sequence_done(struc
return true;
}
 
+   if (!cb->cb_holds_slot)
+   goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1089,6 +1093,7 @@ static bool nfsd4_cb_sequence_done(struc
cb->cb_seq_status);
}
 
+   cb->cb_holds_slot = false;
clear_bit(0, >cl_cb_slot_busy);
rpc_wake_up_next(>cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1296,6 +1301,7 @@ void nfsd4_init_cb(struct nfsd4_callback
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+   cb->cb_holds_slot = false;
 }
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+   bool cb_holds_slot;
 };
 
 struct nfsd4_callback_ops {




[PATCH 5.0 35/89] Input: synaptics-rmi4 - write config register values to the right offset

2019-04-30 Thread Greg Kroah-Hartman
From: Lucas Stach 

commit 3a349763cf11e63534b8f2d302f2d0c790566497 upstream.

Currently any changed config register values don't take effect, as the
function to write them back is called with the wrong register offset.

Fixes: ff8f83708b3e (Input: synaptics-rmi4 - add support for 2D
 sensors and F11)
Signed-off-by: Lucas Stach 
Reviewed-by: Philipp Zabel 
Cc: sta...@vger.kernel.org
Signed-off-by: Dmitry Torokhov 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/input/rmi4/rmi_f11.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi
}
 
rc = f11_write_control_regs(fn, >sens_query,
-  >dev_controls, fn->fd.query_base_addr);
+  >dev_controls, fn->fd.control_base_addr);
if (rc)
dev_warn(>dev, "Failed to write control registers\n");
 




[PATCH 5.0 13/89] crypto: xts - Fix atomic sleep when walking skcipher

2019-04-30 Thread Greg Kroah-Hartman
From: Herbert Xu 

commit 44427c0fbc09b448b22410978a4ef6ee37599d25 upstream.

When we perform a walk in the completion function, we need to ensure
that it is atomic.

Reported-by: syzbot+6f72c20560060c98b...@syzkaller.appspotmail.com
Fixes: 78105c7e769b ("crypto: xts - Drop use of auxiliary buffer")
Cc: 
Signed-off-by: Herbert Xu 
Acked-by: Ondrej Mosnacek 
Signed-off-by: Herbert Xu 
Signed-off-by: Greg Kroah-Hartman 

---
 crypto/xts.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_asy
 {
struct skcipher_request *req = areq->data;
 
-   if (!err)
+   if (!err) {
+   struct rctx *rctx = skcipher_request_ctx(req);
+
+   rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
err = xor_tweak_post(req);
+   }
 
skcipher_request_complete(req, err);
 }




[PATCH 5.0 33/89] sunrpc: dont mark uninitialised items as VALID.

2019-04-30 Thread Greg Kroah-Hartman
From: NeilBrown 

commit d58431eacb226222430940134d97bfd72f292fcd upstream.

A recent commit added a call to cache_fresh_locked()
when an expired item was found.
The call sets the CACHE_VALID flag, so it is important
that the item actually is valid.
There are two ways it could be valid:
1/ If ->update has been called to fill in relevant content
2/ if CACHE_NEGATIVE is set, to say that content doesn't exist.

An expired item that is waiting for an update will be neither.
Setting CACHE_VALID will mean that a subsequent call to cache_put()
will be likely to dereference uninitialised pointers.

So we must make sure the item is valid, and we already have code to do
that in try_to_negate_entry().  This takes the hash lock and so cannot
be used directly, so take out the two lines that we need and use them.

Now cache_fresh_locked() is certain to be called only on
a valid item.

Cc: sta...@kernel.org # 2.6.35
Fixes: 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request")
Signed-off-by: NeilBrown 
Signed-off-by: J. Bruce Fields 
Signed-off-by: Greg Kroah-Hartman 

---
 net/sunrpc/cache.c |3 +++
 1 file changed, 3 insertions(+)

--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,7 @@ static void cache_init(struct cache_head
h->last_refresh = now;
 }
 
+static inline int cache_is_valid(struct cache_head *h);
 static void cache_fresh_locked(struct cache_head *head, time_t expiry,
struct cache_detail *detail);
 static void cache_fresh_unlocked(struct cache_head *head,
@@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_a
if (cache_is_expired(detail, tmp)) {
hlist_del_init_rcu(>cache_list);
detail->entries --;
+   if (cache_is_valid(tmp) == -EAGAIN)
+   set_bit(CACHE_NEGATIVE, >flags);
cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;




[PATCH 5.0 37/89] dmaengine: sh: rcar-dmac: With cyclic DMA residue 0 is valid

2019-04-30 Thread Greg Kroah-Hartman
From: Dirk Behme 

commit 907bd68a2edc491849e2fdcfe52c4596627bca94 upstream.

Having a cyclic DMA, a residue 0 is not an indication of a completed
DMA. In case of cyclic DMA make sure that dma_set_residue() is called
and with this a residue of 0 is forwarded correctly to the caller.

Fixes: 3544d2878817 ("dmaengine: rcar-dmac: use result of updated get_residue 
in tx_status")
Signed-off-by: Dirk Behme 
Signed-off-by: Achim Dahlhoff 
Signed-off-by: Hiroyuki Yokoyama 
Signed-off-by: Yao Lihua 
Reviewed-by: Yoshihiro Shimoda 
Reviewed-by: Laurent Pinchart 
Cc:  # v4.8+
Signed-off-by: Vinod Koul 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/dma/sh/rcar-dmac.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1368,6 +1368,7 @@ static enum dma_status rcar_dmac_tx_stat
enum dma_status status;
unsigned long flags;
unsigned int residue;
+   bool cyclic;
 
status = dma_cookie_status(chan, cookie, txstate);
if (status == DMA_COMPLETE || !txstate)
@@ -1375,10 +1376,11 @@ static enum dma_status rcar_dmac_tx_stat
 
spin_lock_irqsave(>lock, flags);
residue = rcar_dmac_chan_get_residue(rchan, cookie);
+   cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false;
spin_unlock_irqrestore(>lock, flags);
 
/* if there's no residue, the cookie is complete */
-   if (!residue)
+   if (!residue && !cyclic)
return DMA_COMPLETE;
 
dma_set_residue(txstate, residue);




[PATCH 5.0 12/89] tracing: Fix buffer_ref pipe ops

2019-04-30 Thread Greg Kroah-Hartman
From: Jann Horn 

commit b987222654f84f7b4ca95b3a55eca784cb30235b upstream.

This fixes multiple issues in buffer_pipe_buf_ops:

 - The ->steal() handler must not return zero unless the pipe buffer has
   the only reference to the page. But generic_pipe_buf_steal() assumes
   that every reference to the pipe is tracked by the page's refcount,
   which isn't true for these buffers - buffer_pipe_buf_get(), which
   duplicates a buffer, doesn't touch the page's refcount.
   Fix it by using generic_pipe_buf_nosteal(), which refuses every
   attempted theft. It should be easy to actually support ->steal, but the
   only current users of pipe_buf_steal() are the virtio console and FUSE,
   and they also only use it as an optimization. So it's probably not worth
   the effort.
 - The ->get() and ->release() handlers can be invoked concurrently on pipe
   buffers backed by the same struct buffer_ref. Make them safe against
   concurrency by using refcount_t.
 - The pointers stored in ->private were only zeroed out when the last
   reference to the buffer_ref was dropped. As far as I know, this
   shouldn't be necessary anyway, but if we do it, let's always do it.

Link: http://lkml.kernel.org/r/20190404215925.253531-1-ja...@google.com

Cc: Ingo Molnar 
Cc: Masami Hiramatsu 
Cc: Al Viro 
Cc: sta...@vger.kernel.org
Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring 
buffer")
Signed-off-by: Jann Horn 
Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/splice.c   |4 ++--
 include/linux/pipe_fs_i.h |1 +
 kernel/trace/trace.c  |   28 ++--
 3 files changed, 17 insertions(+), 16 deletions(-)

--- a/fs/splice.c
+++ b/fs/splice.c
@@ -333,8 +333,8 @@ const struct pipe_buf_operations default
.get = generic_pipe_buf_get,
 };
 
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
-   struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+struct pipe_buffer *buf)
 {
return 1;
 }
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -181,6 +181,7 @@ void free_pipe_info(struct pipe_inode_in
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
 
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6823,19 +6823,23 @@ struct buffer_ref {
struct ring_buffer  *buffer;
void*page;
int cpu;
-   int ref;
+   refcount_t  refcount;
 };
 
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+   if (!refcount_dec_and_test(>refcount))
+   return;
+   ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+   kfree(ref);
+}
+
 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
 {
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
-   if (--ref->ref)
-   return;
-
-   ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
-   kfree(ref);
+   buffer_ref_release(ref);
buf->private = 0;
 }
 
@@ -6844,7 +6848,7 @@ static void buffer_pipe_buf_get(struct p
 {
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
-   ref->ref++;
+   refcount_inc(>refcount);
 }
 
 /* Pipe buffer operations for a buffer. */
@@ -6852,7 +6856,7 @@ static const struct pipe_buf_operations
.can_merge  = 0,
.confirm= generic_pipe_buf_confirm,
.release= buffer_pipe_buf_release,
-   .steal  = generic_pipe_buf_steal,
+   .steal  = generic_pipe_buf_nosteal,
.get= buffer_pipe_buf_get,
 };
 
@@ -6865,11 +6869,7 @@ static void buffer_spd_release(struct sp
struct buffer_ref *ref =
(struct buffer_ref *)spd->partial[i].private;
 
-   if (--ref->ref)
-   return;
-
-   ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
-   kfree(ref);
+   buffer_ref_release(ref);
spd->partial[i].private = 0;
 }
 
@@ -6924,7 +6924,7 @@ tracing_buffers_splice_read(struct file
break;
}
 
-   ref->ref = 1;
+   refcount_set(>refcount, 1);
ref->buffer = iter->trace_buffer->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer, 
iter->cpu_file);
if 

[PATCH 5.0 15/89] gpio: eic: sprd: Fix incorrect irq type setting for the sync EIC

2019-04-30 Thread Greg Kroah-Hartman
From: Baolin Wang 

commit 102bbe34b31c9159e714432afd64458f6f3876d7 upstream.

When setting sync EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the
SPRD_EIC_SYNC_INTMODE register to 0, which means detecting edge signals.

Thus this patch fixes the issue.

Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support")
Cc: 
Signed-off-by: Baolin Wang 
Signed-off-by: Linus Walleij 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpio/gpio-eic-sprd.c |1 +
 1 file changed, 1 insertion(+)

--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_BOTH:
+   sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;




[PATCH 5.0 43/89] drm/ttm: fix re-init of global structures

2019-04-30 Thread Greg Kroah-Hartman
From: Christian König 

commit bd4264112f93045704731850c5e4d85db981cd85 upstream.

When a driver unloads without unloading TTM we don't correctly
clear the global structures leading to errors on re-init.

Next step should probably be to remove the global structures and
kobjs all together, but this is tricky since we need to maintain
backward compatibility.

Signed-off-by: Christian König 
Reviewed-by: Karol Herbst 
Tested-by: Karol Herbst 
CC: sta...@vger.kernel.org # 5.0.x
Signed-off-by: Alex Deucher 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpu/drm/ttm/ttm_bo.c |   10 +-
 drivers/gpu/drm/ttm/ttm_memory.c |5 +++--
 include/drm/ttm/ttm_bo_driver.h  |1 -
 3 files changed, 8 insertions(+), 8 deletions(-)

--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(s
  * ttm_global_mutex - protecting the global BO state
  */
 DEFINE_MUTEX(ttm_global_mutex);
-struct ttm_bo_global ttm_bo_glob = {
-   .use_count = 0
-};
+unsigned ttm_bo_glob_use_count;
+struct ttm_bo_global ttm_bo_glob;
 
 static struct attribute ttm_bo_count = {
.name = "bo_count",
@@ -1535,12 +1534,13 @@ static void ttm_bo_global_release(void)
struct ttm_bo_global *glob = _bo_glob;
 
mutex_lock(_global_mutex);
-   if (--glob->use_count > 0)
+   if (--ttm_bo_glob_use_count > 0)
goto out;
 
kobject_del(>kobj);
kobject_put(>kobj);
ttm_mem_global_release(_mem_glob);
+   memset(glob, 0, sizeof(*glob));
 out:
mutex_unlock(_global_mutex);
 }
@@ -1552,7 +1552,7 @@ static int ttm_bo_global_init(void)
unsigned i;
 
mutex_lock(_global_mutex);
-   if (++glob->use_count > 1)
+   if (++ttm_bo_glob_use_count > 1)
goto out;
 
ret = ttm_mem_global_init(_mem_glob);
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -461,8 +461,8 @@ out_no_zone:
 
 void ttm_mem_global_release(struct ttm_mem_global *glob)
 {
-   unsigned int i;
struct ttm_mem_zone *zone;
+   unsigned int i;
 
/* let the page allocator first stop the shrink work. */
ttm_page_alloc_fini();
@@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_m
zone = glob->zones[i];
kobject_del(>kobj);
kobject_put(>kobj);
-   }
+   }
kobject_del(>kobj);
kobject_put(>kobj);
+   memset(glob, 0, sizeof(*glob));
 }
 
 static void ttm_check_swapping(struct ttm_mem_global *glob)
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -411,7 +411,6 @@ extern struct ttm_bo_global {
/**
 * Protected by ttm_global_mutex.
 */
-   unsigned int use_count;
struct list_head device_list;
 
/**




[PATCH 5.0 57/89] netfilter: ebtables: CONFIG_COMPAT: drop a bogus WARN_ON

2019-04-30 Thread Greg Kroah-Hartman
From: Florian Westphal 

commit 7caa56f006e9d712b44f27b32520c66420d5cbc6 upstream.

It means userspace gave us a ruleset where there is some other
data after the ebtables target but before the beginning of the next rule.

Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Reported-by: syzbot+659574e7bcc7f7eb4...@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 
Signed-off-by: Greg Kroah-Hartman 

---
 net/bridge/netfilter/ebtables.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_eb
if (match_kern)
match_kern->match_size = ret;
 
-   if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+   /* rule should have no remaining data after target */
+   if (type == EBT_COMPAT_TARGET && size_left)
return -EINVAL;
 
match32 = (struct compat_ebt_entry_mwt *) buf;




[PATCH 5.0 44/89] Revert "drm/i915/fbdev: Actually configure untiled displays"

2019-04-30 Thread Greg Kroah-Hartman
From: Dave Airlie 

commit 9fa246256e09dc30820524401cdbeeaadee94025 upstream.

This reverts commit d179b88deb3bf6fed4991a31fd6f0f2cad21fab5.

This commit is documented to break userspace X.org modesetting driver in 
certain configurations.

The X.org modesetting userspace driver is broken. No fixes are available yet. 
In order for this patch to be applied it either needs a config option or a 
workaround developed.

This has been reported a few times, saying it's a userspace problem is clearly 
against the regression rules.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109806
Signed-off-by: Dave Airlie 
Cc:  # v3.19+
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpu/drm/i915/intel_fbdev.c |   12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -336,8 +336,8 @@ static bool intel_fb_initial_config(stru
bool *enabled, int width, int height)
 {
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
+   unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
-   unsigned long conn_configured, conn_seq;
int i, j;
bool *save_enabled;
bool fallback = true, ret = true;
@@ -355,9 +355,10 @@ static bool intel_fb_initial_config(stru
drm_modeset_backoff();
 
memcpy(save_enabled, enabled, count);
-   conn_seq = GENMASK(count - 1, 0);
+   mask = GENMASK(count - 1, 0);
conn_configured = 0;
 retry:
+   conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
@@ -370,8 +371,7 @@ retry:
if (conn_configured & BIT(i))
continue;
 
-   /* First pass, only consider tiled connectors */
-   if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+   if (conn_seq == 0 && !connector->has_tile)
continue;
 
if (connector->status == connector_status_connected)
@@ -475,10 +475,8 @@ retry:
conn_configured |= BIT(i);
}
 
-   if (conn_configured != conn_seq) { /* repeat until no more are found */
-   conn_seq = conn_configured;
+   if ((conn_configured & mask) != mask && conn_configured != conn_seq)
goto retry;
-   }
 
/*
 * If the BIOS didn't enable everything it could, fall back to have the




[PATCH 5.0 46/89] USB: Add new USB LPM helpers

2019-04-30 Thread Greg Kroah-Hartman
From: Kai-Heng Feng 

commit 7529b2574a7aaf902f1f8159fbc2a7caa74be559 upstream.

Use new helpers to make LPM enabling/disabling more clear.

This is a preparation to subsequent patch.

Signed-off-by: Kai-Heng Feng 
Cc: stable  # after much soaking
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/usb/core/driver.c  |   12 +++-
 drivers/usb/core/hub.c |   12 ++--
 drivers/usb/core/message.c |2 +-
 drivers/usb/core/sysfs.c   |5 -
 drivers/usb/core/usb.h |   10 --
 5 files changed, 30 insertions(+), 11 deletions(-)

--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1896,7 +1896,7 @@ int usb_runtime_idle(struct device *dev)
return -EBUSY;
 }
 
-int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
+static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
 {
struct usb_hcd *hcd = bus_to_hcd(udev->bus);
int ret = -EPERM;
@@ -1913,6 +1913,16 @@ int usb_set_usb2_hardware_lpm(struct usb
return ret;
 }
 
+int usb_enable_usb2_hardware_lpm(struct usb_device *udev)
+{
+   return usb_set_usb2_hardware_lpm(udev, 1);
+}
+
+int usb_disable_usb2_hardware_lpm(struct usb_device *udev)
+{
+   return usb_set_usb2_hardware_lpm(udev, 0);
+}
+
 #endif /* CONFIG_PM */
 
 struct bus_type usb_bus_type = {
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3221,7 +3221,7 @@ int usb_port_suspend(struct usb_device *
 
/* disable USB2 hardware LPM */
if (udev->usb2_hw_lpm_enabled == 1)
-   usb_set_usb2_hardware_lpm(udev, 0);
+   usb_disable_usb2_hardware_lpm(udev);
 
if (usb_disable_ltm(udev)) {
dev_err(>dev, "Failed to disable LTM before suspend\n");
@@ -3260,7 +3260,7 @@ int usb_port_suspend(struct usb_device *
  err_ltm:
/* Try to enable USB2 hardware LPM again */
if (udev->usb2_hw_lpm_capable == 1)
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
 
if (udev->do_remote_wakeup)
(void) usb_disable_remote_wakeup(udev);
@@ -3544,7 +3544,7 @@ int usb_port_resume(struct usb_device *u
} else  {
/* Try to enable USB2 hardware LPM */
if (udev->usb2_hw_lpm_capable == 1)
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
 
/* Try to enable USB3 LTM */
usb_enable_ltm(udev);
@@ -4435,7 +4435,7 @@ static void hub_set_initial_usb2_lpm_pol
if ((udev->bos->ext_cap->bmAttributes & cpu_to_le32(USB_BESL_SUPPORT)) 
||
connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) {
udev->usb2_hw_lpm_allowed = 1;
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
}
 }
 
@@ -5650,7 +5650,7 @@ static int usb_reset_and_verify_device(s
 * It will be re-enabled by the enumeration process.
 */
if (udev->usb2_hw_lpm_enabled == 1)
-   usb_set_usb2_hardware_lpm(udev, 0);
+   usb_disable_usb2_hardware_lpm(udev);
 
/* Disable LPM while we reset the device and reinstall the alt settings.
 * Device-initiated LPM, and system exit latency settings are cleared
@@ -5753,7 +5753,7 @@ static int usb_reset_and_verify_device(s
 
 done:
/* Now that the alt settings are re-installed, enable LTM and LPM. */
-   usb_set_usb2_hardware_lpm(udev, 1);
+   usb_enable_usb2_hardware_lpm(udev);
usb_unlocked_enable_lpm(udev);
usb_enable_ltm(udev);
usb_release_bos_descriptor(udev);
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1244,7 +1244,7 @@ void usb_disable_device(struct usb_devic
}
 
if (dev->usb2_hw_lpm_enabled == 1)
-   usb_set_usb2_hardware_lpm(dev, 0);
+   usb_disable_usb2_hardware_lpm(dev);
usb_unlocked_disable_lpm(dev);
usb_disable_ltm(dev);
 
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -528,7 +528,10 @@ static ssize_t usb2_hardware_lpm_store(s
 
if (!ret) {
udev->usb2_hw_lpm_allowed = value;
-   ret = usb_set_usb2_hardware_lpm(udev, value);
+   if (value)
+   ret = usb_enable_usb2_hardware_lpm(udev);
+   else
+   ret = usb_disable_usb2_hardware_lpm(udev);
}
 
usb_unlock_device(udev);
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -92,7 +92,8 @@ extern int usb_remote_wakeup(struct usb_
 extern int usb_runtime_suspend(struct device *dev);
 extern int usb_runtime_resume(struct device *dev);
 extern int usb_runtime_idle(struct device *dev);
-extern int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable);
+extern int 

[PATCH 5.0 48/89] ext4: fix some error pointer dereferences

2019-04-30 Thread Greg Kroah-Hartman
From: Dan Carpenter 

commit 7159a986b4202343f6cca3bb8079ecace5816fd6 upstream.

We can't pass error pointers to brelse().

Fixes: fb265c9cb49e ("ext4: add ext4_sb_bread() to disambiguate ENOMEM cases")
Signed-off-by: Dan Carpenter 
Signed-off-by: Theodore Ts'o 
Reviewed-by: Jan Kara 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/ext4/xattr.c |3 +++
 1 file changed, 3 insertions(+)

--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *i
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, 
REQ_PRIO);
if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
+   bh = NULL;
goto out;
}
 
@@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *ha
if (error == -EIO)
EXT4_ERROR_INODE(inode, "block %llu read error",
 EXT4_I(inode)->i_file_acl);
+   bh = NULL;
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
@@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
+   bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
 (unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {




[PATCH 5.0 60/89] tipc: check link name with right length in tipc_nl_compat_link_set

2019-04-30 Thread Greg Kroah-Hartman
From: Xin Long 

commit 8c63bf9ab4be8b83bd8c34aacfd2f1d2c8901c8a upstream.

A similar issue as fixed by Patch "tipc: check bearer name with right
length in tipc_nl_compat_bearer_enable" was also found by syzbot in
tipc_nl_compat_link_set().

The length to check with should be 'TLV_GET_DATA_LEN(msg->req) -
offsetof(struct tipc_link_config, name)'.

Reported-by: syzbot+de00a87b8644a582a...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/tipc/netlink_compat.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -777,7 +777,12 @@ static int tipc_nl_compat_link_set(struc
 
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-   len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+   len = TLV_GET_DATA_LEN(msg->req);
+   len -= offsetof(struct tipc_link_config, name);
+   if (len <= 0)
+   return -EINVAL;
+
+   len = min_t(int, len, TIPC_MAX_LINK_NAME);
if (!string_is_valid(lc->name, len))
return -EINVAL;
 




[PATCH 5.0 63/89] rxrpc: fix race condition in rxrpc_input_packet()

2019-04-30 Thread Greg Kroah-Hartman
From: Eric Dumazet 

commit 032be5f19a94de51093851757089133dcc1e92aa upstream.

After commit 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook"),
rxrpc_input_packet() is directly called from lockless UDP receive
path, under rcu_read_lock() protection.

It must therefore use RCU rules :

- udp_sk->sk_user_data can be cleared at any point in this function.
  rcu_dereference_sk_user_data() is what we need here.

- Also, since sk_user_data might have been set in rxrpc_open_socket()
  we must observe a proper RCU grace period before kfree(local) in
  rxrpc_lookup_local()

v4: @local can be NULL in xrpc_lookup_local() as reported by kbuild test robot 

and Julia Lawall , thanks !

v3,v2 : addressed David Howells feedback, thanks !

syzbot reported :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] PREEMPT SMP KASAN
CPU: 0 PID: 19236 Comm: syz-executor703 Not tainted 5.1.0-rc6 #79
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:__lock_acquire+0xbef/0x3fb0 kernel/locking/lockdep.c:3573
Code: 00 0f 85 a5 1f 00 00 48 81 c4 10 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d 
c3 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 4a 21 
00 00 49 81 7d 00 20 54 9c 89 0f 84 cf f4
RSP: 0018:88809d7aef58 EFLAGS: 00010002
RAX: dc00 RBX:  RCX: 
RDX: 0026 RSI:  RDI: 0001
RBP: 88809d7af090 R08: 0001 R09: 0001
R10: ed1015d05bc7 R11: 888089428600 R12: 
R13: 0130 R14: 0001 R15: 0001
FS:  7f059044d700() GS:8880ae80() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 004b6040 CR3: 955ca000 CR4: 001406f0
Call Trace:
 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:4211
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152
 skb_queue_tail+0x26/0x150 net/core/skbuff.c:2972
 rxrpc_reject_packet net/rxrpc/input.c:1126 [inline]
 rxrpc_input_packet+0x4a0/0x5536 net/rxrpc/input.c:1414
 udp_queue_rcv_one_skb+0xaf2/0x1780 net/ipv4/udp.c:2011
 udp_queue_rcv_skb+0x128/0x730 net/ipv4/udp.c:2085
 udp_unicast_rcv_skb.isra.0+0xb9/0x360 net/ipv4/udp.c:2245
 __udp4_lib_rcv+0x701/0x2ca0 net/ipv4/udp.c:2301
 udp_rcv+0x22/0x30 net/ipv4/udp.c:2482
 ip_protocol_deliver_rcu+0x60/0x8f0 net/ipv4/ip_input.c:208
 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:255
 dst_input include/net/dst.h:450 [inline]
 ip_rcv_finish+0x1e1/0x300 net/ipv4/ip_input.c:413
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:523
 __netif_receive_skb_one_core+0x115/0x1a0 net/core/dev.c:4987
 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5099
 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5202
 napi_frags_finish net/core/dev.c:5769 [inline]
 napi_gro_frags+0xade/0xd10 net/core/dev.c:5843
 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981
 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027
 call_write_iter include/linux/fs.h:1866 [inline]
 do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681
 do_iter_write fs/read_write.c:957 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:938
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002
 do_writev+0x15e/0x370 fs/read_write.c:1037
 __do_sys_writev fs/read_write.c:1110 [inline]
 __se_sys_writev fs/read_write.c:1107 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook")
Signed-off-by: Eric Dumazet 
Reported-by: syzbot 
Acked-by: David Howells 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/rxrpc/input.c|   12 
 net/rxrpc/local_object.c |3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1155,19 +1155,19 @@ int rxrpc_extract_header(struct rxrpc_sk
  * handle data received on the local endpoint
  * - may be called in interrupt context
  *
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
  *
  * Called with the RCU read lock held from the IP layer via UDP.
  */
 int 

[PATCH 5.0 66/89] aio: keep io_event in aio_kiocb

2019-04-30 Thread Greg Kroah-Hartman
From: Al Viro 

commit a9339b7855094ba11a97e8822ae038135e879e79 upstream.

We want to separate forming the resulting io_event from putting it
into the ring buffer.

Signed-off-by: Al Viro 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -204,8 +204,7 @@ struct aio_kiocb {
struct kioctx   *ki_ctx;
kiocb_cancel_fn *ki_cancel;
 
-   struct iocb __user  *ki_user_iocb;  /* user's aiocb */
-   __u64   ki_user_data;   /* user's data for completion */
+   struct io_event ki_res;
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
@@ -1084,15 +1083,6 @@ static inline void iocb_put(struct aio_k
iocb_destroy(iocb);
 }
 
-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
-  long res, long res2)
-{
-   ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
-   ev->data = iocb->ki_user_data;
-   ev->res = res;
-   ev->res2 = res2;
-}
-
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1104,6 +1094,8 @@ static void aio_complete(struct aio_kioc
unsigned tail, pos, head;
unsigned long   flags;
 
+   iocb->ki_res.res = res;
+   iocb->ki_res.res2 = res2;
/*
 * Add a completion event to the ring buffer. Must be done holding
 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1120,14 +1112,14 @@ static void aio_complete(struct aio_kioc
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-   aio_fill_event(event, iocb, res, res2);
+   *event = iocb->ki_res;
 
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 
-   pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
-res, res2);
+   pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
+(void __user *)(unsigned long)iocb->ki_res.obj,
+iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
 
/* after flagging the request as done, we
 * must never even look at it again
@@ -1844,8 +1836,10 @@ static int __io_submit_one(struct kioctx
goto out_put_req;
}
 
-   req->ki_user_iocb = user_iocb;
-   req->ki_user_data = iocb->aio_data;
+   req->ki_res.obj = (u64)(unsigned long)user_iocb;
+   req->ki_res.data = iocb->aio_data;
+   req->ki_res.res = 0;
+   req->ki_res.res2 = 0;
 
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
@@ -2019,6 +2013,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
struct aio_kiocb *kiocb;
int ret = -EINVAL;
u32 key;
+   u64 obj = (u64)(unsigned long)iocb;
 
if (unlikely(get_user(key, >aio_key)))
return -EFAULT;
@@ -2032,7 +2027,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
spin_lock_irq(>ctx_lock);
/* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, >active_reqs, ki_list) {
-   if (kiocb->ki_user_iocb == iocb) {
+   if (kiocb->ki_res.obj == obj) {
ret = kiocb->ki_cancel(>rw);
list_del_init(>ki_list);
break;




Re: [PATCH v1 1/2] Add polling support to pidfd

2019-04-30 Thread Oleg Nesterov
On 04/29, Joel Fernandes wrote:
>
> On Mon, Apr 29, 2019 at 04:20:30PM +0200, Oleg Nesterov wrote:
> > On 04/29, Joel Fernandes wrote:
> > >
> > > However, in your code above, it is avoided because we get:
> > >
> > > Task A (poller)   Task B (exiting task being polled)
> > > 
> > > poll() called
> > > add_wait_queue()
> > >   exit_state is set to non-zero
> > > read exit_state
> > > remove_wait_queue()
> > >   wake_up_all()
> >
> > just to clarify... No, sys_poll() path doesn't do remove_wait_queue() until
> > it returns to user mode, and that is why we can't race with set-exit_code +
> > wake_up().
>
> I didn't follow what you mean, the removal from the waitqueue happens in
> free_poll_entry() called from poll_freewait() which happens from
> do_sys_poll() which is before the syscall returns to user mode. Could you
> explain more?

Hmm. I do not really understand the question... Sure, do_sys_poll() does
poll_freewait() before sysret or even before return from syscall, but why
does this matter? This is the exit path, it frees the memory, does fput(),
etc, f_op->poll() won't be call after that.

> > pidfd_poll() can race with the exiting task, miss exit_code != 0, and return
> > zero. However, do_poll() won't block after that and pidfd_poll() will be 
> > called
> > again.
>
> Here also I didn't follow what you mean. If exit_code is read as 0 in
> pidfd_poll(), then in do_poll() the count will be 0 and it will block in
> poll_schedule_timeout(). Right?

No. Please note the pwq->triggered check and please read __pollwake().

But if you want to understand this you can forget about poll/select. It is
a bit complicated, in particular because it has to do set_current_state()
right  before schedule() and thus it plays games with pwq->triggered. But in
essence this doesn't differ too much from the plain wait_event-like code
(although you can also look at wait_woken/woken_wake_function).

If remove_wait_queue() could happem before wake_up_all() (like in your pseudo-
code above), then pidfd_poll() or any other ->poll() method could miss _both_
the condition and wakeup. But sys_poll() doesn't do this, so it is fine to miss
the condition and rely on wake_up_all() which ensures we won't block and the
next iteration must see condition == T.

Oleg.



[PATCH 5.0 49/89] loop: do not print warn message if partition scan is successful

2019-04-30 Thread Greg Kroah-Hartman
From: Dongli Zhang 

commit 40853d6fc619a6fd3d3177c3973a2eac9b598a80 upstream.

Do not print warn message when the partition scan returns 0.

Fixes: d57f3374ba48 ("loop: Move special partition reread handling in 
loop_clr_fd()")
Signed-off-by: Dongli Zhang 
Reviewed-by: Jan Kara 
Signed-off-by: Jens Axboe 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/block/loop.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -,8 +,9 @@ out_unlock:
err = __blkdev_reread_part(bdev);
else
err = blkdev_reread_part(bdev);
-   pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
-   __func__, lo_number, err);
+   if (err)
+   pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
+   __func__, lo_number, err);
/* Device is gone, no point in returning error */
err = 0;
}




[PATCH 5.0 68/89] Fix aio_poll() races

2019-04-30 Thread Greg Kroah-Hartman
From: Al Viro 

commit af5c72b1fc7a00aa484e90b0c4e0eeb582545634 upstream.

aio_poll() has to cope with several unpleasant problems:
* requests that might stay around indefinitely need to
be made visible for io_cancel(2); that must not be done to
a request already completed, though.
* in cases when ->poll() has placed us on a waitqueue,
wakeup might have happened (and request completed) before ->poll()
returns.
* worse, in some early wakeup cases request might end
up re-added into the queue later - we can't treat "woken up and
currently not in the queue" as "it's not going to stick around
indefinitely"
* ... moreover, ->poll() might have decided not to
put it on any queues to start with, and that needs to be distinguished
from the previous case
* ->poll() might have tried to put us on more than one queue.
Only the first will succeed for aio poll, so we might end up missing
wakeups.  OTOH, we might very well notice that only after the
wakeup hits and request gets completed (all before ->poll() gets
around to the second poll_wait()).  In that case it's too late to
decide that we have an error.

req->woken was an attempt to deal with that.  Unfortunately, it was
broken.  What we need to keep track of is not that wakeup has happened -
the thing might come back after that.  It's that async reference is
already gone and won't come back, so we can't (and needn't) put the
request on the list of cancellables.

The easiest case is "request hadn't been put on any waitqueues"; we
can tell by seeing NULL apt.head, and in that case there won't be
anything async.  We should either complete the request ourselves
(if vfs_poll() reports anything of interest) or return an error.

In all other cases we get exclusion with wakeups by grabbing the
queue lock.

If request is currently on queue and we have something interesting
from vfs_poll(), we can steal it and complete the request ourselves.

If it's on queue and vfs_poll() has not reported anything interesting,
we either put it on the cancellable list, or, if we know that it
hadn't been put on all queues ->poll() wanted it on, we steal it and
return an error.

If it's _not_ on queue, it's either been already dealt with (in which
case we do nothing), or there's aio_poll_complete_work() about to be
executed.  In that case we either put it on the cancellable list,
or, if we know it hadn't been put on all queues ->poll() wanted it on,
simulate what cancel would've done.

It's a lot more convoluted than I'd like it to be.  Single-consumer APIs
suck, and unfortunately aio is not an exception...

Signed-off-by: Al Viro 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   90 ---
 1 file changed, 40 insertions(+), 50 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -181,7 +181,7 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head  *head;
__poll_tevents;
-   boolwoken;
+   booldone;
boolcancelled;
struct wait_queue_entry wait;
struct work_struct  work;
@@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *
return 0;
 }
 
-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
-   iocb->ki_res.res = mangle_poll(mask);
-   iocb_put(iocb);
-}
-
 static void aio_poll_complete_work(struct work_struct *work)
 {
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struc
return;
}
list_del_init(>ki_list);
+   iocb->ki_res.res = mangle_poll(mask);
+   req->done = true;
spin_unlock_irq(>ctx_lock);
 
-   aio_poll_complete(iocb, mask);
+   iocb_put(iocb);
 }
 
 /* assumes we are called with irqs disabled */
@@ -1667,31 +1663,27 @@ static int aio_poll_wake(struct wait_que
__poll_t mask = key_to_poll(key);
unsigned long flags;
 
-   req->woken = true;
-
/* for instances that support it check for an event match first: */
-   if (mask) {
-   if (!(mask & req->events))
-   return 0;
+   if (mask && !(mask & req->events))
+   return 0;
 
+   list_del_init(>wait.entry);
+
+   if (mask && spin_trylock_irqsave(>ki_ctx->ctx_lock, flags)) {
/*
 * Try to complete the iocb inline if we can. Use
 * irqsave/irqrestore because not all filesystems (e.g. fuse)
 * call this function with IRQs disabled and because IRQs
 * have to be disabled before ctx_lock is obtained.
 */
-   if (spin_trylock_irqsave(>ki_ctx->ctx_lock, flags)) {
-   list_del(>ki_list);
-   spin_unlock_irqrestore(>ki_ctx->ctx_lock, flags);
-
-

[PATCH 5.0 73/89] ipv4: set the tcp_min_rtt_wlen range from 0 to one day

2019-04-30 Thread Greg Kroah-Hartman
From: ZhangXiaoxu 

[ Upstream commit 19fad20d15a6494f47f85d869f00b11343ee5c78 ]

There is a UBSAN report as below:
UBSAN: Undefined behaviour in net/ipv4/tcp_input.c:2877:56
signed integer overflow:
2147483647 * 1000 cannot be represented in type 'int'
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.1.0-rc4-00058-g582549e #1
Call Trace:
 
 dump_stack+0x8c/0xba
 ubsan_epilogue+0x11/0x60
 handle_overflow+0x12d/0x170
 ? ttwu_do_wakeup+0x21/0x320
 __ubsan_handle_mul_overflow+0x12/0x20
 tcp_ack_update_rtt+0x76c/0x780
 tcp_clean_rtx_queue+0x499/0x14d0
 tcp_ack+0x69e/0x1240
 ? __wake_up_sync_key+0x2c/0x50
 ? update_group_capacity+0x50/0x680
 tcp_rcv_established+0x4e2/0xe10
 tcp_v4_do_rcv+0x22b/0x420
 tcp_v4_rcv+0xfe8/0x1190
 ip_protocol_deliver_rcu+0x36/0x180
 ip_local_deliver+0x15b/0x1a0
 ip_rcv+0xac/0xd0
 __netif_receive_skb_one_core+0x7f/0xb0
 __netif_receive_skb+0x33/0xc0
 netif_receive_skb_internal+0x84/0x1c0
 napi_gro_receive+0x2a0/0x300
 receive_buf+0x3d4/0x2350
 ? detach_buf_split+0x159/0x390
 virtnet_poll+0x198/0x840
 ? reweight_entity+0x243/0x4b0
 net_rx_action+0x25c/0x770
 __do_softirq+0x19b/0x66d
 irq_exit+0x1eb/0x230
 do_IRQ+0x7a/0x150
 common_interrupt+0xf/0xf
 

It can be reproduced by:
  echo 2147483647 > /proc/sys/net/ipv4/tcp_min_rtt_wlen

Fixes: f672258391b42 ("tcp: track min RTT using windowed min-filter")
Signed-off-by: ZhangXiaoxu 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 Documentation/networking/ip-sysctl.txt |1 +
 net/ipv4/sysctl_net_ipv4.c |5 -
 2 files changed, 5 insertions(+), 1 deletion(-)

--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER
minimum RTT when it is moved to a longer path (e.g., due to traffic
engineering). A longer window makes the filter more resistant to RTT
inflations such as transient congestion. The unit is seconds.
+   Possible values: 0 - 86400 (1 day)
Default: 300
 
 tcp_moderate_rcvbuf - BOOLEAN
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = {
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static int comp_sack_nr_max = 255;
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[]
.data   = _net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode   = 0644,
-   .proc_handler   = proc_dointvec
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = ,
+   .extra2 = _day_secs
},
{
.procname   = "tcp_autocorking",




[PATCH 5.0 82/89] net: socionext: replace napi_alloc_frag with the netdev variant on init

2019-04-30 Thread Greg Kroah-Hartman
From: Ilias Apalodimas 

[ Upstream commit ffbf9870dcf1342592a1a26f4cf70bda39046134 ]

The netdev variant is usable on any context since it disables interrupts.
The napi variant of the call should only be used within softirq context.
Replace napi_alloc_frag on driver init with the correct netdev_alloc_frag
call

Changes since v1:
- Adjusted commit message

Acked-by: Ard Biesheuvel 
Acked-by: Jassi Brar 
Fixes: 4acb20b46214 ("net: socionext: different approach on DMA")
Signed-off-by: Ilias Apalodimas 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/socionext/netsec.c |   11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -673,7 +673,8 @@ static void netsec_process_tx(struct net
 }
 
 static void *netsec_alloc_rx_data(struct netsec_priv *priv,
- dma_addr_t *dma_handle, u16 *desc_len)
+ dma_addr_t *dma_handle, u16 *desc_len,
+ bool napi)
 {
size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
size_t payload_len = NETSEC_RX_BUF_SZ;
@@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct
 
total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
 
-   buf = napi_alloc_frag(total_len);
+   buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
if (!buf)
return NULL;
 
@@ -765,7 +766,8 @@ static int netsec_process_rx(struct nets
/* allocate a fresh buffer and map it to the hardware.
 * This will eventually replace the old buffer in the hardware
 */
-   buf_addr = netsec_alloc_rx_data(priv, _handle, _len);
+   buf_addr = netsec_alloc_rx_data(priv, _handle, _len,
+   true);
if (unlikely(!buf_addr))
break;
 
@@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct
void *buf;
u16 len;
 
-   buf = netsec_alloc_rx_data(priv, _handle, );
+   buf = netsec_alloc_rx_data(priv, _handle, ,
+  false);
if (!buf) {
netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
goto err_out;




[PATCH 5.0 54/89] sched/deadline: Correctly handle active 0-lag timers

2019-04-30 Thread Greg Kroah-Hartman
From: luca abeni 

commit 1b02cd6a2d7f3e2a6a5262887d2cb2912083e42f upstream.

syzbot reported the following warning:

   [ ] WARNING: CPU: 4 PID: 17089 at kernel/sched/deadline.c:255 
task_non_contending+0xae0/0x1950

line 255 of deadline.c is:

WARN_ON(hrtimer_active(_se->inactive_timer));

in task_non_contending().

Unfortunately, in some cases (for example, a deadline task
continuosly blocking and waking immediately) it can happen that
a task blocks (and task_non_contending() is called) while the
0-lag timer is still active.

In this case, the safest thing to do is to immediately decrease
the running bandwidth of the task, without trying to re-arm the 0-lag timer.

Signed-off-by: luca abeni 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Juri Lelli 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: chengjian (D) 
Link: 
https://lkml.kernel.org/r/20190325131530.34706-1-luca.ab...@santannapisa.it
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/sched/deadline.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,6 @@ static void task_non_contending(struct t
if (dl_entity_is_special(dl_se))
return;
 
-   WARN_ON(hrtimer_active(_se->inactive_timer));
WARN_ON(dl_se->dl_non_contending);
 
zerolag_time = dl_se->deadline -
@@ -269,7 +268,7 @@ static void task_non_contending(struct t
 * If the "0-lag time" already passed, decrease the active
 * utilization now, instead of starting a timer
 */
-   if (zerolag_time < 0) {
+   if ((zerolag_time < 0) || hrtimer_active(_se->inactive_timer)) {
if (dl_task(p))
sub_running_bw(dl_se, dl_rq);
if (!dl_task(p) || p->state == TASK_DEAD) {




[PATCH 5.0 52/89] workqueue: Try to catch flush_work() without INIT_WORK().

2019-04-30 Thread Greg Kroah-Hartman
From: Tetsuo Handa 

commit 4d43d395fed124631ca02356c711facb90185175 upstream.

syzbot found a flush_work() caller who forgot to call INIT_WORK()
because that work_struct was allocated by kzalloc() [1]. But the message

  INFO: trying to register non-static key.
  the code is fine but needs lockdep annotation.
  turning off the locking correctness validator.

by lock_map_acquire() is failing to tell that INIT_WORK() is missing.

Since flush_work() without INIT_WORK() is a bug, and INIT_WORK() should
set ->func field to non-zero, let's warn if ->func field is zero.

[1] 
https://syzkaller.appspot.com/bug?id=a5954455fcfa51c29ca2ab55b203076337e1c770

Signed-off-by: Tetsuo Handa 
Signed-off-by: Tejun Heo 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/workqueue.c |3 +++
 1 file changed, 3 insertions(+)

--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2931,6 +2931,9 @@ static bool __flush_work(struct work_str
if (WARN_ON(!wq_online))
return false;
 
+   if (WARN_ON(!work->func))
+   return false;
+
if (!from_cancel) {
lock_map_acquire(>lockdep_map);
lock_map_release(>lockdep_map);




[PATCH 5.0 87/89] net/mlx5e: Fix use-after-free after xdp_return_frame

2019-04-30 Thread Greg Kroah-Hartman
From: Maxim Mikityanskiy 

[ Upstream commit 12fc512f5741443a03adde2ead20724da8ad550a ]

xdp_return_frame releases the frame. It leads to releasing the page, so
it's not allowed to access xdpi.xdpf->len after that, because xdpi.xdpf
is at xdp->data_hard_start after convert_to_xdp_frame. This patch moves
the memory access to precede the return of the frame.

Fixes: 58b99ee3e3ebe ("net/mlx5e: Add support for XDP_REDIRECT in device-out 
side")
Signed-off-by: Maxim Mikityanskiy 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -324,9 +324,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq
mlx5e_xdpi_fifo_pop(xdpi_fifo);
 
if (is_redirect) {
-   xdp_return_frame(xdpi.xdpf);
dma_unmap_single(sq->pdev, 
xdpi.dma_addr,
 xdpi.xdpf->len, 
DMA_TO_DEVICE);
+   xdp_return_frame(xdpi.xdpf);
} else {
/* Recycle RX page */
mlx5e_page_release(rq, , true);
@@ -365,9 +365,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e
mlx5e_xdpi_fifo_pop(xdpi_fifo);
 
if (is_redirect) {
-   xdp_return_frame(xdpi.xdpf);
dma_unmap_single(sq->pdev, xdpi.dma_addr,
 xdpi.xdpf->len, DMA_TO_DEVICE);
+   xdp_return_frame(xdpi.xdpf);
} else {
/* Recycle RX page */
mlx5e_page_release(rq, , false);




[PATCH 5.0 88/89] net/tls: avoid potential deadlock in tls_set_device_offload_rx()

2019-04-30 Thread Greg Kroah-Hartman
From: Jakub Kicinski 

[ Upstream commit 62ef81d5632634d5e310ed25b9b940b2b6612b46 ]

If device supports offload, but offload fails tls_set_device_offload_rx()
will call tls_sw_free_resources_rx() which (unhelpfully) releases
and reacquires the socket lock.

For a small fix release and reacquire the device_offload_lock.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski 
Reviewed-by: Dirk van der Merwe 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/tls/tls_device.c |2 ++
 1 file changed, 2 insertions(+)

--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -884,7 +884,9 @@ int tls_set_device_offload_rx(struct soc
goto release_netdev;
 
 free_sw_resources:
+   up_read(_offload_lock);
tls_sw_free_resources_rx(sk);
+   down_read(_offload_lock);
 release_ctx:
ctx->priv_ctx_rx = NULL;
 release_netdev:




[PATCH 5.0 85/89] mlxsw: spectrum: Put MC TCs into DWRR mode

2019-04-30 Thread Greg Kroah-Hartman
From: Petr Machata 

[ Upstream commit f476b3f809fa02f47af6333ed63715058c3fc348 ]

Both Spectrum-1 and Spectrum-2 chips are currently configured such that
pairs of TC n (which is used for UC traffic) and TC n+8 (which is used
for MC traffic) are feeding into the same subgroup. Strict
prioritization is configured between the two TCs, and by enabling
MC-aware mode on the switch, the lower-numbered (UC) TCs are favored
over the higher-numbered (MC) TCs.

On Spectrum-2 however, there is an issue in configuration of the
MC-aware mode. As a result, MC traffic is prioritized over UC traffic.
To work around the issue, configure the MC TCs with DWRR mode (while
keeping the UC TCs in strict mode).

With this patch, the multicast-unicast arbitration results in the same
behavior on both Spectrum-1 and Spectrum-2 chips.

Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports")
Signed-off-by: Petr Machata 
Signed-off-by: Ido Schimmel 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2961,7 +2961,7 @@ static int mlxsw_sp_port_ets_init(struct
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
-   false, 0);
+   true, 100);
if (err)
return err;
}




[PATCH 5.0 55/89] mac80211_hwsim: calculate if_combination.max_interfaces

2019-04-30 Thread Greg Kroah-Hartman
From: Johannes Berg 

commit 45fcef8b727b6f171bc5443e8153181a367d7a15 upstream.

If we just set this to 2048, and have multiple limits you
can select from, the total number might run over and cause
a warning in cfg80211. This doesn't make sense, so we just
calculate the total max_interfaces now.

Reported-by: syzbot+8f91bd563bbff230d...@syzkaller.appspotmail.com
Fixes: 99e3a44bac37 ("mac80211_hwsim: allow setting iftype support")
Signed-off-by: Johannes Berg 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/wireless/mac80211_hwsim.c |   19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2642,7 +2642,7 @@ static int mac80211_hwsim_new_radio(stru
enum nl80211_band band;
const struct ieee80211_ops *ops = _hwsim_ops;
struct net *net;
-   int idx;
+   int idx, i;
int n_limits = 0;
 
if (WARN_ON(param->channels > 1 && !param->use_chanctx))
@@ -2766,12 +2766,23 @@ static int mac80211_hwsim_new_radio(stru
goto failed_hw;
}
 
+   data->if_combination.max_interfaces = 0;
+   for (i = 0; i < n_limits; i++)
+   data->if_combination.max_interfaces +=
+   data->if_limits[i].max;
+
data->if_combination.n_limits = n_limits;
-   data->if_combination.max_interfaces = 2048;
data->if_combination.limits = data->if_limits;
 
-   hw->wiphy->iface_combinations = >if_combination;
-   hw->wiphy->n_iface_combinations = 1;
+   /*
+* If we actually were asked to support combinations,
+* advertise them - if there's only a single thing like
+* only IBSS then don't advertise it as combinations.
+*/
+   if (data->if_combination.max_interfaces > 1) {
+   hw->wiphy->iface_combinations = >if_combination;
+   hw->wiphy->n_iface_combinations = 1;
+   }
 
if (param->ciphers) {
memcpy(data->ciphers, param->ciphers,




[PATCH 5.0 89/89] net/tls: dont leak IV and record seq when offload fails

2019-04-30 Thread Greg Kroah-Hartman
From: Jakub Kicinski 

[ Upstream commit 12c7686111326148b4b5db189130522a4ad1be4a ]

When device refuses the offload in tls_set_device_offload_rx()
it calls tls_sw_free_resources_rx() to clean up software context
state.

Unfortunately, tls_sw_free_resources_rx() does not free all
the state tls_set_sw_offload() allocated - it leaks IV and
sequence number buffers.  All other code paths which lead to
tls_sw_release_resources_rx() (which tls_sw_free_resources_rx()
calls) free those right before the call.

Avoid the leak by moving freeing of iv and rec_seq into
tls_sw_release_resources_rx().

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski 
Reviewed-by: Dirk van der Merwe 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/tls/tls_device.c |2 --
 net/tls/tls_main.c   |5 +
 net/tls/tls_sw.c |3 +++
 3 files changed, 4 insertions(+), 6 deletions(-)

--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -921,8 +921,6 @@ void tls_device_offload_cleanup_rx(struc
}
 out:
up_read(_offload_lock);
-   kfree(tls_ctx->rx.rec_seq);
-   kfree(tls_ctx->rx.iv);
tls_sw_release_resources_rx(sk);
 }
 
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -304,11 +304,8 @@ static void tls_sk_proto_close(struct so
 #endif
}
 
-   if (ctx->rx_conf == TLS_SW) {
-   kfree(ctx->rx.rec_seq);
-   kfree(ctx->rx.iv);
+   if (ctx->rx_conf == TLS_SW)
tls_sw_free_resources_rx(sk);
-   }
 
 #ifdef CONFIG_TLS_DEVICE
if (ctx->rx_conf == TLS_HW)
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1830,6 +1830,9 @@ void tls_sw_release_resources_rx(struct
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
+   kfree(tls_ctx->rx.rec_seq);
+   kfree(tls_ctx->rx.iv);
+
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;




[PATCH 5.0 86/89] net/mlx5e: Fix the max MTU check in case of XDP

2019-04-30 Thread Greg Kroah-Hartman
From: Maxim Mikityanskiy 

[ Upstream commit d460c2718906252a2a69bc6f89b537071f792e6e ]

MLX5E_XDP_MAX_MTU was calculated incorrectly. It didn't account for
NET_IP_ALIGN and MLX5E_HW2SW_MTU, and it also misused MLX5_SKB_FRAG_SZ.
This commit fixes the calculations and adds a brief explanation for the
formula used.

Fixes: a26a5bdf3ee2d ("net/mlx5e: Restrict the combination of large MTU and 
XDP")
Signed-off-by: Maxim Mikityanskiy 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c  |   20 
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h  |3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |5 +++--
 3 files changed, 24 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -33,6 +33,26 @@
 #include 
 #include "en/xdp.h"
 
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+   int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+   /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+* The condition checked in mlx5e_rx_is_linear_skb is:
+*   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+*   (Note that hw_mtu == sw_mtu + hard_mtu.)
+* What is returned from this function is:
+*   max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+* After assigning sw_mtu := max_mtu, the left side of (1) turns to
+* SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+* because both PAGE_SIZE and S are already aligned. Any number greater
+* than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+* so max_mtu is the maximum MTU allowed.
+*/
+
+   return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
 static inline bool
 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
struct xdp_buff *xdp)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -34,13 +34,12 @@
 
 #include "en.h"
 
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
-MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 #define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
 #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
  void *va, u16 *rx_headroom, u32 *len);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3816,7 +3816,7 @@ int mlx5e_change_mtu(struct net_device *
if (params->xdp_prog &&
!mlx5e_rx_is_linear_skb(priv->mdev, _channels.params)) {
netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP 
enabled\n",
-  new_mtu, MLX5E_XDP_MAX_MTU);
+  new_mtu, mlx5e_xdp_max_mtu(params));
err = -EINVAL;
goto out;
}
@@ -4280,7 +4280,8 @@ static int mlx5e_xdp_allowed(struct mlx5
 
if (!mlx5e_rx_is_linear_skb(priv->mdev, _channels.params)) {
netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
-   new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+   new_channels.params.sw_mtu,
+   mlx5e_xdp_max_mtu(_channels.params));
return -EINVAL;
}
 




[PATCH 5.0 56/89] NFS: Forbid setting AF_INET6 to "struct sockaddr_in"->sin_family.

2019-04-30 Thread Greg Kroah-Hartman
From: Tetsuo Handa 

commit 7c2bd9a39845bfb6d72ddb55ce737650271f6f96 upstream.

syzbot is reporting uninitialized value at rpc_sockaddr2uaddr() [1]. This
is because syzbot is setting AF_INET6 to "struct sockaddr_in"->sin_family
(which is embedded into user-visible "struct nfs_mount_data" structure)
despite nfs23_validate_mount_data() cannot pass sizeof(struct sockaddr_in6)
bytes of AF_INET6 address to rpc_sockaddr2uaddr().

Since "struct nfs_mount_data" structure is user-visible, we can't change
"struct nfs_mount_data" to use "struct sockaddr_storage". Therefore,
assuming that everybody is using AF_INET family when passing address via
"struct nfs_mount_data"->addr, reject if its sin_family is not AF_INET.

[1] 
https://syzkaller.appspot.com/bug?id=53614e7cbbf66bc2656a919ab2a95fb5d75c

Reported-by: syzbot 
Signed-off-by: Tetsuo Handa 
Signed-off-by: Trond Myklebust 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/nfs/super.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2041,7 +2041,8 @@ static int nfs23_validate_mount_data(voi
memcpy(sap, >addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
args->nfs_server.port = ntohs(data->addr.sin_port);
-   if (!nfs_verify_server_address(sap))
+   if (sap->sa_family != AF_INET ||
+   !nfs_verify_server_address(sap))
goto out_no_address;
 
if (!(data->flags & NFS_MOUNT_TCP))




[PATCH 5.0 47/89] USB: Consolidate LPM checks to avoid enabling LPM twice

2019-04-30 Thread Greg Kroah-Hartman
From: Kai-Heng Feng 

commit d7a6c0ce8d26412903c7981503bad9e1cc7c45d2 upstream.

USB Bluetooth controller QCA ROME (0cf3:e007) sometimes stops working
after S3:
[ 165.110742] Bluetooth: hci0: using NVM file: qca/nvm_usb_0302.bin
[ 168.432065] Bluetooth: hci0: Failed to send body at 4 of 1953 (-110)

After some experiments, I found that disabling LPM can workaround the
issue.

On some platforms, the USB power is cut during S3, so the driver uses
reset-resume to resume the device. During port resume, LPM gets enabled
twice, by usb_reset_and_verify_device() and usb_port_resume().

Consolidate all checks into new LPM helpers to make sure LPM only gets
enabled once.

Fixes: de68bab4fa96 ("usb: Don't enable USB 2.0 Link PM by default.”)
Signed-off-by: Kai-Heng Feng 
Cc: stable  # after much soaking
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/usb/core/driver.c  |   11 ---
 drivers/usb/core/hub.c |   12 
 drivers/usb/core/message.c |3 +--
 3 files changed, 13 insertions(+), 13 deletions(-)

--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1901,9 +1901,6 @@ static int usb_set_usb2_hardware_lpm(str
struct usb_hcd *hcd = bus_to_hcd(udev->bus);
int ret = -EPERM;
 
-   if (enable && !udev->usb2_hw_lpm_allowed)
-   return 0;
-
if (hcd->driver->set_usb2_hw_lpm) {
ret = hcd->driver->set_usb2_hw_lpm(hcd, udev, enable);
if (!ret)
@@ -1915,11 +1912,19 @@ static int usb_set_usb2_hardware_lpm(str
 
 int usb_enable_usb2_hardware_lpm(struct usb_device *udev)
 {
+   if (!udev->usb2_hw_lpm_capable ||
+   !udev->usb2_hw_lpm_allowed ||
+   udev->usb2_hw_lpm_enabled)
+   return 0;
+
return usb_set_usb2_hardware_lpm(udev, 1);
 }
 
 int usb_disable_usb2_hardware_lpm(struct usb_device *udev)
 {
+   if (!udev->usb2_hw_lpm_enabled)
+   return 0;
+
return usb_set_usb2_hardware_lpm(udev, 0);
 }
 
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3220,8 +3220,7 @@ int usb_port_suspend(struct usb_device *
}
 
/* disable USB2 hardware LPM */
-   if (udev->usb2_hw_lpm_enabled == 1)
-   usb_disable_usb2_hardware_lpm(udev);
+   usb_disable_usb2_hardware_lpm(udev);
 
if (usb_disable_ltm(udev)) {
dev_err(>dev, "Failed to disable LTM before suspend\n");
@@ -3259,8 +3258,7 @@ int usb_port_suspend(struct usb_device *
usb_enable_ltm(udev);
  err_ltm:
/* Try to enable USB2 hardware LPM again */
-   if (udev->usb2_hw_lpm_capable == 1)
-   usb_enable_usb2_hardware_lpm(udev);
+   usb_enable_usb2_hardware_lpm(udev);
 
if (udev->do_remote_wakeup)
(void) usb_disable_remote_wakeup(udev);
@@ -3543,8 +3541,7 @@ int usb_port_resume(struct usb_device *u
hub_port_logical_disconnect(hub, port1);
} else  {
/* Try to enable USB2 hardware LPM */
-   if (udev->usb2_hw_lpm_capable == 1)
-   usb_enable_usb2_hardware_lpm(udev);
+   usb_enable_usb2_hardware_lpm(udev);
 
/* Try to enable USB3 LTM */
usb_enable_ltm(udev);
@@ -5649,8 +5646,7 @@ static int usb_reset_and_verify_device(s
/* Disable USB2 hardware LPM.
 * It will be re-enabled by the enumeration process.
 */
-   if (udev->usb2_hw_lpm_enabled == 1)
-   usb_disable_usb2_hardware_lpm(udev);
+   usb_disable_usb2_hardware_lpm(udev);
 
/* Disable LPM while we reset the device and reinstall the alt settings.
 * Device-initiated LPM, and system exit latency settings are cleared
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1243,8 +1243,7 @@ void usb_disable_device(struct usb_devic
dev->actconfig->interface[i] = NULL;
}
 
-   if (dev->usb2_hw_lpm_enabled == 1)
-   usb_disable_usb2_hardware_lpm(dev);
+   usb_disable_usb2_hardware_lpm(dev);
usb_unlocked_disable_lpm(dev);
usb_disable_ltm(dev);
 




[PATCH 5.0 84/89] mlxsw: pci: Reincrease PCI reset timeout

2019-04-30 Thread Greg Kroah-Hartman
From: Ido Schimmel 

[ Upstream commit 1ab3030193d25878b3b1409060e1e0a879800c95 ]

During driver initialization the driver sends a reset to the device and
waits for the firmware to signal that it is ready to continue.

Commit d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout")
increased the timeout to 13 seconds due to longer PHY calibration in
Spectrum-2 compared to Spectrum-1.

Recently it became apparent that this timeout is too short and therefore
this patch increases it again to a safer limit that will be reduced in
the future.

Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC")
Fixes: d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout")
Signed-off-by: Ido Schimmel 
Acked-by: Jiri Pirko 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlxsw/pci_hw.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
 
 #define MLXSW_PCI_SW_RESET 0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS   13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS   2
 #define MLXSW_PCI_SW_RESET_WAIT_MSECS  100
 #define MLXSW_PCI_FW_READY 0xA1844
 #define MLXSW_PCI_FW_READY_MASK0x




[PATCH 5.0 53/89] binder: fix handling of misaligned binder object

2019-04-30 Thread Greg Kroah-Hartman
From: Todd Kjos 

commit 26528be6720bb40bc8844e97ee73a37e530e9c5e upstream.

Fixes crash found by syzbot:
kernel BUG at drivers/android/binder_alloc.c:LINE! (2)

Reported-and-tested-by: syzbot+55de1eb4975dec156...@syzkaller.appspotmail.com
Signed-off-by: Todd Kjos 
Reviewed-by: Joel Fernandes (Google) 
Cc: stable  # 5.0, 4.19, 4.14
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/android/binder_alloc.c |   18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -959,14 +959,13 @@ enum lru_status binder_alloc_free_page(s
 
index = page - alloc->pages;
page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE;
+
+   mm = alloc->vma_vm_mm;
+   if (!mmget_not_zero(mm))
+   goto err_mmget;
+   if (!down_write_trylock(>mmap_sem))
+   goto err_down_write_mmap_sem_failed;
vma = binder_alloc_get_vma(alloc);
-   if (vma) {
-   if (!mmget_not_zero(alloc->vma_vm_mm))
-   goto err_mmget;
-   mm = alloc->vma_vm_mm;
-   if (!down_write_trylock(>mmap_sem))
-   goto err_down_write_mmap_sem_failed;
-   }
 
list_lru_isolate(lru, item);
spin_unlock(lock);
@@ -979,10 +978,9 @@ enum lru_status binder_alloc_free_page(s
   PAGE_SIZE);
 
trace_binder_unmap_user_end(alloc, index);
-
-   up_write(>mmap_sem);
-   mmput(mm);
}
+   up_write(>mmap_sem);
+   mmput(mm);
 
trace_binder_unmap_kernel_start(alloc, index);
 




[PATCH 5.0 79/89] net/tls: fix refcount adjustment in fallback

2019-04-30 Thread Greg Kroah-Hartman
From: Jakub Kicinski 

[ Upstream commit 9188d5ca454fd665145904267e726e9e8d122f5c ]

Unlike atomic_add(), refcount_add() does not deal well
with a negative argument.  TLS fallback code reallocates
the skb and is very likely to shrink the truesize, leading to:

[  189.513254] WARNING: CPU: 5 PID: 0 at lib/refcount.c:81 
refcount_add_not_zero_checked+0x15c/0x180
 Call Trace:
  refcount_add_checked+0x6/0x40
  tls_enc_skb+0xb93/0x13e0 [tls]

Once wmem_allocated count saturates the application can no longer
send data on the socket.  This is similar to Eric's fixes for GSO,
TCP:
commit 7ec318feeed1 ("tcp: gso: avoid refcount_t warning from 
tcp_gso_segment()")
and UDP:
commit 575b65bc5bff ("udp: avoid refcount_t saturation in __udp_gso_segment()").

Unlike the GSO case, for TLS fallback it's likely that the skb has
shrunk, so the "likely" annotation is the other way around (likely
branch being "sub").

Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
Signed-off-by: Jakub Kicinski 
Reviewed-by: John Hurley 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/tls/tls_device_fallback.c |   13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -193,6 +193,9 @@ static void update_chksum(struct sk_buff
 
 static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 {
+   struct sock *sk = skb->sk;
+   int delta;
+
skb_copy_header(nskb, skb);
 
skb_put(nskb, skb->len);
@@ -200,11 +203,15 @@ static void complete_skb(struct sk_buff
update_chksum(nskb, headln);
 
nskb->destructor = skb->destructor;
-   nskb->sk = skb->sk;
+   nskb->sk = sk;
skb->destructor = NULL;
skb->sk = NULL;
-   refcount_add(nskb->truesize - skb->truesize,
->sk->sk_wmem_alloc);
+
+   delta = nskb->truesize - skb->truesize;
+   if (likely(delta < 0))
+   WARN_ON_ONCE(refcount_sub_and_test(-delta, >sk_wmem_alloc));
+   else if (delta)
+   refcount_add(delta, >sk_wmem_alloc);
 }
 
 /* This function may be called after the user socket is already




[PATCH 5.0 51/89] slip: make slhc_free() silently accept an error pointer

2019-04-30 Thread Greg Kroah-Hartman
From: Linus Torvalds 

commit baf76f0c58aec435a3a864075b8f6d8ee5d1f17e upstream.

This way, slhc_free() accepts what slhc_init() returns, whether that is
an error or not.

In particular, the pattern in sl_alloc_bufs() is

slcomp = slhc_init(16, 16);
...
slhc_free(slcomp);

for the error handling path, and rather than complicate that code, just
make it ok to always free what was returned by the init function.

That's what the code used to do before commit 4ab42d78e37a ("ppp, slip:
Validate VJ compression slot parameters completely") when slhc_init()
just returned NULL for the error case, with no actual indication of the
details of the error.

Reported-by: syzbot+45474c076a4927533...@syzkaller.appspotmail.com
Fixes: 4ab42d78e37a ("ppp, slip: Validate VJ compression slot parameters 
completely")
Acked-by: Ben Hutchings 
Cc: David Miller 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/slip/slhc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -153,7 +153,7 @@ out_fail:
 void
 slhc_free(struct slcompress *comp)
 {
-   if ( comp == NULLSLCOMPR )
+   if ( IS_ERR_OR_NULL(comp) )
return;
 
if ( comp->tstate != NULLSLSTATE )




[PATCH 5.0 78/89] net: stmmac: move stmmac_check_ether_addr() to driver probe

2019-04-30 Thread Greg Kroah-Hartman
From: Vinod Koul 

[ Upstream commit b561af36b1841088552464cdc3f6371d92f17710 ]

stmmac_check_ether_addr() checks the MAC address and assigns one in
driver open(). In many cases when we create slave netdevice, the dev
addr is inherited from master but the master dev addr maybe NULL at
that time, so move this call to driver probe so that address is
always valid.

Signed-off-by: Xiaofei Shen 
Tested-by: Xiaofei Shen 
Signed-off-by: Sneh Shah 
Signed-off-by: Vinod Koul 
Reviewed-by: Andrew Lunn 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2590,8 +2590,6 @@ static int stmmac_open(struct net_device
u32 chan;
int ret;
 
-   stmmac_check_ether_addr(priv);
-
if (priv->hw->pcs != STMMAC_PCS_RGMII &&
priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -4265,6 +4263,8 @@ int stmmac_dvr_probe(struct device *devi
if (ret)
goto error_hw_init;
 
+   stmmac_check_ether_addr(priv);
+
/* Configure real RX and TX queues */
netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);




[PATCH 5.0 81/89] team: fix possible recursive locking when add slaves

2019-04-30 Thread Greg Kroah-Hartman
From: Hangbin Liu 

[ Upstream commit 925b0c841e066b488cc3a60272472b2c56300704 ]

If we add a bond device which is already the master of the team interface,
we will hold the team->lock in team_add_slave() first and then request the
lock in team_set_mac_address() again. The functions are called like:

- team_add_slave()
 - team_port_add()
   - team_port_enter()
 - team_modeop_port_enter()
   - __set_port_dev_addr()
 - dev_set_mac_address()
   - bond_set_mac_address()
 - dev_set_mac_address()
   - team_set_mac_address

Although team_upper_dev_link() would check the upper devices but it is
called too late. Fix it by adding a checking before processing the slave.

v2: Do not split the string in netdev_err()

Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Acked-by: Jiri Pirko 
Signed-off-by: Hangbin Liu 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/team/team.c |7 +++
 1 file changed, 7 insertions(+)

--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1157,6 +1157,13 @@ static int team_port_add(struct team *te
return -EINVAL;
}
 
+   if (netdev_has_upper_dev(dev, port_dev)) {
+   NL_SET_ERR_MSG(extack, "Device is already an upper device of 
the team interface");
+   netdev_err(dev, "Device %s is already an upper device of the 
team interface\n",
+  portname);
+   return -EBUSY;
+   }
+
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team 
device has VLAN set up");




[PATCH 5.0 80/89] stmmac: pci: Adjust IOT2000 matching

2019-04-30 Thread Greg Kroah-Hartman
From: Su Bao Cheng 

[ Upstream commit e0c1d14a1a3211dccf0540a6703ffbd5d2a75bdb ]

Since there are more IOT2040 variants with identical hardware but
different asset tags, the asset tag matching should be adjusted to
support them.

For the board name "SIMATIC IOT2000", currently there are 2 types of
hardware, IOT2020 and IOT2040. The IOT2020 is identified by its unique
asset tag. Match on it first. If we then match on the board name only,
we will catch all IOT2040 variants. In the future there will be no other
devices with the "SIMATIC IOT2000" DMI board name but different
hardware.

Signed-off-by: Su Bao Cheng 
Reviewed-by: Jan Kiszka 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -159,6 +159,12 @@ static const struct dmi_system_id quark_
},
.driver_data = (void *)_stmmac_dmi_data,
},
+   /*
+* There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+* The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+* has only one pci network device while other asset tags are
+* for IOT2040 which has two.
+*/
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +176,6 @@ static const struct dmi_system_id quark_
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
-   DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
-   "6ES7647-0AA00-1YA2"),
},
.driver_data = (void *)_stmmac_dmi_data,
},




[PATCH 5.0 83/89] net/ncsi: handle overflow when incrementing mac address

2019-04-30 Thread Greg Kroah-Hartman
From: Tao Ren 

[ Upstream commit 1c5c12ee308aacf635c8819cd4baa3bd58f8a8b7 ]

Previously BMC's MAC address is calculated by simply adding 1 to the
last byte of network controller's MAC address, and it produces incorrect
result when network controller's MAC address ends with 0xFF.

The problem can be fixed by calling eth_addr_inc() function to increment
MAC address; besides, the MAC address is also validated before assigning
to BMC.

Fixes: cb10c7c0dfd9 ("net/ncsi: Add NCSI Broadcom OEM command")
Signed-off-by: Tao Ren 
Acked-by: Jakub Kicinski 
Acked-by: Samuel Mendoza-Jonas 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 include/linux/etherdevice.h |   12 
 net/ncsi/ncsi-rsp.c |6 +-
 2 files changed, 17 insertions(+), 1 deletion(-)

--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -448,6 +448,18 @@ static inline void eth_addr_dec(u8 *addr
 }
 
 /**
+ * eth_addr_inc() - Increment the given MAC address.
+ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
+ */
+static inline void eth_addr_inc(u8 *addr)
+{
+   u64 u = ether_addr_to_u64(addr);
+
+   u++;
+   u64_to_ether_addr(u, addr);
+}
+
+/**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
  * @addr: Pointer to a six-byte array containing the Ethernet address
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
memcpy(saddr.sa_data, >data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
/* Increase mac address by 1 for BMC's address */
-   saddr.sa_data[ETH_ALEN - 1]++;
+   eth_addr_inc((u8 *)saddr.sa_data);
+   if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+   return -ENXIO;
+
ret = ops->ndo_set_mac_address(ndev, );
if (ret < 0)
netdev_warn(ndev, "NCSI: 'Writing mac address to device 
failed\n");




[PATCH 5.0 77/89] net/rose: fix unbound loop in rose_loopback_timer()

2019-04-30 Thread Greg Kroah-Hartman
From: Eric Dumazet 

[ Upstream commit 0453c682459583910d611a96de928f4442205493 ]

This patch adds a limit on the number of skbs that fuzzers can queue
into loopback_queue. 1000 packets for rose loopback seems more than enough.

Then, since we now have multiple cpus in most linux hosts,
we also need to limit the number of skbs rose_loopback_timer()
can dequeue at each round.

rose_loopback_queue() can be drop-monitor friendly, calling
consume_skb() or kfree_skb() appropriately.

Finally, use mod_timer() instead of del_timer() + add_timer()

syzbot report was :

rcu: INFO: rcu_preempt self-detected stall on CPU
rcu:0-...!: (10499 ticks this GP) idle=536/1/0x4002 
softirq=103291/103291 fqs=34
rcu: (t=10500 jiffies g=140321 q=323)
rcu: rcu_preempt kthread starved for 10426 jiffies! g140321 f0x0 
RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=1
rcu: RCU grace-period kthread stack dump:
rcu_preempt I2916810  2 0x8000
Call Trace:
 context_switch kernel/sched/core.c:2877 [inline]
 __schedule+0x813/0x1cc0 kernel/sched/core.c:3518
 schedule+0x92/0x180 kernel/sched/core.c:3562
 schedule_timeout+0x4db/0xfd0 kernel/time/timer.c:1803
 rcu_gp_fqs_loop kernel/rcu/tree.c:1971 [inline]
 rcu_gp_kthread+0x962/0x17b0 kernel/rcu/tree.c:2128
 kthread+0x357/0x430 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
NMI backtrace for cpu 0
CPU: 0 PID: 7632 Comm: kworker/0:4 Not tainted 5.1.0-rc5+ #172
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: events iterate_cleanup_work
Call Trace:
 
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 nmi_cpu_backtrace.cold+0x63/0xa4 lib/nmi_backtrace.c:101
 nmi_trigger_cpumask_backtrace+0x1be/0x236 lib/nmi_backtrace.c:62
 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
 rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree.c:1223
 print_cpu_stall kernel/rcu/tree.c:1360 [inline]
 check_cpu_stall kernel/rcu/tree.c:1434 [inline]
 rcu_pending kernel/rcu/tree.c:3103 [inline]
 rcu_sched_clock_irq.cold+0x500/0xa4a kernel/rcu/tree.c:2544
 update_process_times+0x32/0x80 kernel/time/timer.c:1635
 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161
 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271
 __run_hrtimer kernel/time/hrtimer.c:1389 [inline]
 __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451
 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline]
 smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50 kernel/kcov.c:95
Code: 89 25 b4 6e ec 08 41 bc f4 ff ff ff e8 cd 5d ea ff 48 c7 05 9e 6e ec 08 
00 00 00 00 e9 a4 e9 ff ff 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 48 8b 75 08 
65 48 8b 04 25 00 ee 01 00 65 8b 15 c8 60
RSP: 0018:8880ae807ce0 EFLAGS: 0286 ORIG_RAX: ff13
RAX: 88806fd40640 RBX: dc00 RCX: 863fbc56
RDX: 0100 RSI: 863fbc1d RDI: 88808cf94228
RBP: 8880ae807d10 R08: 88806fd40640 R09: ed1015d00f8b
R10: ed1015d00f8a R11: 0003 R12: 88808cf941c0
R13: f034 R14: 8882166cd840 R15: 
 rose_loopback_timer+0x30d/0x3f0 net/rose/rose_loopback.c:91
 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325
 expire_timers kernel/time/timer.c:1362 [inline]
 __run_timers kernel/time/timer.c:1681 [inline]
 __run_timers kernel/time/timer.c:1649 [inline]
 run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694
 __do_softirq+0x266/0x95a kernel/softirq.c:293
 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1027

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet 
Reported-by: syzbot 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/rose/rose_loopback.c |   27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -16,6 +16,7 @@
 #include 
 
 static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
 static struct timer_list loopback_timer;
 
 static void rose_set_loopback_timer(void);
@@ -35,29 +36,27 @@ static int rose_loopback_running(void)
 
 int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
 {
-   struct sk_buff *skbn;
+   struct sk_buff *skbn = NULL;
 
-   skbn = skb_clone(skb, GFP_ATOMIC);
+   if (skb_queue_len(_queue) < ROSE_LOOPBACK_LIMIT)
+   skbn = skb_clone(skb, GFP_ATOMIC);
 
-   kfree_skb(skb);
-
-   if (skbn != NULL) {
+   if (skbn) {
+   consume_skb(skb);
skb_queue_tail(_queue, skbn);
 
if (!rose_loopback_running())
rose_set_loopback_timer();
+   } 

[PATCH 5.0 76/89] net: rds: exchange of 8K and 1M pool

2019-04-30 Thread Greg Kroah-Hartman
From: Zhu Yanjun 

[ Upstream commit 4b9fc7146249a6e0e3175d0acc033fdcd2bfcb17 ]

Before the commit 490ea5967b0d ("RDS: IB: move FMR code to its own file"),
when the dirty_count is greater than 9/10 of max_items of 8K pool,
1M pool is used, Vice versa. After the commit 490ea5967b0d ("RDS: IB: move
FMR code to its own file"), the above is removed. When we make the
following tests.

Server:
  rds-stress -r 1.1.1.16 -D 1M

Client:
  rds-stress -r 1.1.1.14 -s 1.1.1.16 -D 1M

The following will appear.
"
connecting to 1.1.1.16:4000
negotiated options, tasks will start in 2 seconds
Starting up..header from 1.1.1.166:4001 to id 4001 bogus
..
tsks  tx/s  rx/s tx+rx K/s  mbi K/s  mbo K/s tx us/c  rtt us
cpu %
   100 0.00 0.00 0.000.00 0.00 -1.00
   100 0.00 0.00 0.000.00 0.00 -1.00
   100 0.00 0.00 0.000.00 0.00 -1.00
   100 0.00 0.00 0.000.00 0.00 -1.00
   100 0.00 0.00 0.000.00 0.00 -1.00
...
"
So this exchange between 8K and 1M pool is added back.

Fixes: commit 490ea5967b0d ("RDS: IB: move FMR code to its own file")
Signed-off-by: Zhu Yanjun 
Acked-by: Santosh Shilimkar 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/rds/ib_fmr.c  |   11 +++
 net/rds/ib_rdma.c |3 ---
 2 files changed, 11 insertions(+), 3 deletions(-)

--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struc
else
pool = rds_ibdev->mr_1m_pool;
 
+   if (atomic_read(>dirty_count) >= pool->max_items / 10)
+   queue_delayed_work(rds_ib_mr_wq, >flush_worker, 10);
+
+   /* Switch pools if one of the pool is reaching upper limit */
+   if (atomic_read(>dirty_count) >=  pool->max_items * 9 / 10) {
+   if (pool->pool_type == RDS_IB_MR_8K_POOL)
+   pool = rds_ibdev->mr_1m_pool;
+   else
+   pool = rds_ibdev->mr_8k_pool;
+   }
+
ibmr = rds_ib_try_reuse_ibmr(pool);
if (ibmr)
return ibmr;
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(
struct rds_ib_mr *ibmr = NULL;
int iter = 0;
 
-   if (atomic_read(>dirty_count) >= pool->max_items_soft / 10)
-   queue_delayed_work(rds_ib_mr_wq, >flush_worker, 10);
-
while (1) {
ibmr = rds_ib_reuse_mr(pool);
if (ibmr)




[PATCH 5.0 75/89] net/mlx5e: ethtool, Remove unsupported SFP EEPROM high pages query

2019-04-30 Thread Greg Kroah-Hartman
From: Erez Alfasi 

[ Upstream commit ace329f4ab3ba434be2adf618073c752d083b524 ]

Querying EEPROM high pages data for SFP module is currently
not supported by our driver and yet queried, resulting in
invalid FW queries.

Set the EEPROM ethtool data length to 256 for SFP module will
limit the reading for page 0 only and prevent invalid FW queries.

Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM")
Signed-off-by: Erez Alfasi 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |2 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c   |4 
 2 files changed, 1 insertion(+), 5 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1470,7 +1470,7 @@ static int mlx5e_get_module_info(struct
break;
case MLX5_MODULE_ID_SFP:
modinfo->type   = ETH_MODULE_SFF_8472;
-   modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+   modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
break;
default:
netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -404,10 +404,6 @@ int mlx5_query_module_eeprom(struct mlx5
size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
 
i2c_addr = MLX5_I2C_ADDR_LOW;
-   if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
-   i2c_addr = MLX5_I2C_ADDR_HIGH;
-   offset -= MLX5_EEPROM_PAGE_LENGTH;
-   }
 
MLX5_SET(mcia_reg, in, l, 0);
MLX5_SET(mcia_reg, in, module, module_num);




[PATCH 5.0 70/89] x86/retpolines: Disable switch jump tables when retpolines are enabled

2019-04-30 Thread Greg Kroah-Hartman
From: Daniel Borkmann 

commit a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 upstream.

Commit ce02ef06fcf7 ("x86, retpolines: Raise limit for generating indirect
calls from switch-case") raised the limit under retpolines to 20 switch
cases where gcc would only then start to emit jump tables, and therefore
effectively disabling the emission of slow indirect calls in this area.

After this has been brought to attention to gcc folks [0], Martin Liska
has then fixed gcc to align with clang by avoiding to generate switch jump
tables entirely under retpolines. This is taking effect in gcc starting
from stable version 8.4.0. Given kernel supports compilation with older
versions of gcc where the fix is not being available or backported anymore,
we need to keep the extra KBUILD_CFLAGS around for some time and generally
set the -fno-jump-tables to align with what more recent gcc is doing
automatically today.

More than 20 switch cases are not expected to be fast-path critical, but
it would still be good to align with gcc behavior for versions < 8.4.0 in
order to have consistency across supported gcc versions. vmlinux size is
slightly growing by 0.27% for older gcc. This flag is only set to work
around affected gcc, no change for clang.

  [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952

Suggested-by: Martin Liska 
Signed-off-by: Daniel Borkmann 
Signed-off-by: Thomas Gleixner 
Cc: David Woodhouse 
Cc: Linus Torvalds 
Cc: Jesper Dangaard Brouer 
Cc: Björn Töpel
Cc: Magnus Karlsson 
Cc: Alexei Starovoitov 
Cc: H.J. Lu 
Cc: Alexei Starovoitov 
Cc: David S. Miller 
Link: https://lkml.kernel.org/r/20190325135620.14882-1-dan...@iogearbox.net
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/Makefile |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,8 +220,12 @@ ifdef CONFIG_RETPOLINE
   # Additionally, avoid generating expensive indirect jumps which
   # are subject to retpolines for small number of switch cases.
   # clang turns off jump table generation by default when under
-  # retpoline builds, however, gcc does not for x86.
-  KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
+  # retpoline builds, however, gcc does not for x86. This has
+  # only been fixed starting from gcc stable version 8.4.0 and
+  # onwards, but not for older ones. See gcc bug #86952.
+  ifndef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables)
+  endif
 endif
 
 archscripts: scripts_basic




[PATCH 5.0 50/89] tipc: handle the err returned from cmd header function

2019-04-30 Thread Greg Kroah-Hartman
From: Xin Long 

commit 2ac695d1d602ce00b12170242f58c3d3a8e36d04 upstream.

Syzbot found a crash:

  BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0 
net/tipc/netlink_compat.c:872
  Call Trace:
tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872
__tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215
tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280
tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline]
tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265
genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg net/socket.c:632 [inline]

  Uninit was created at:
__alloc_skb+0x309/0xa20 net/core/skbuff.c:208
alloc_skb include/linux/skbuff.h:1012 [inline]
netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
sock_sendmsg_nosec net/socket.c:622 [inline]
sock_sendmsg net/socket.c:632 [inline]

It was supposed to be fixed on commit 974cb0e3e7c9 ("tipc: fix uninit-value
in tipc_nl_compat_name_table_dump") by checking TLV_GET_DATA_LEN(msg->req)
in cmd->header()/tipc_nl_compat_name_table_dump_header(), which is called
ahead of tipc_nl_compat_name_table_dump().

However, tipc_nl_compat_dumpit() doesn't handle the error returned from cmd
header function. It means even when the check added in that fix fails, it
won't stop calling tipc_nl_compat_name_table_dump(), and the issue will be
triggered again.

So this patch is to add the process for the err returned from cmd header
function in tipc_nl_compat_dumpit().

Reported-by: syzbot+3ce8520484b0d4e26...@syzkaller.appspotmail.com
Signed-off-by: Xin Long 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/tipc/netlink_compat.c |   10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct
if (msg->rep_type)
tipc_tlv_init(msg->rep, msg->rep_type);
 
-   if (cmd->header)
-   (*cmd->header)(msg);
+   if (cmd->header) {
+   err = (*cmd->header)(msg);
+   if (err) {
+   kfree_skb(msg->rep);
+   msg->rep = NULL;
+   return err;
+   }
+   }
 
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {




[PATCH 5.0 62/89] net/rds: Check address length before reading address family

2019-04-30 Thread Greg Kroah-Hartman
From: Tetsuo Handa 

commit dd3ac9a684358b8c1d5c432ca8322aaf5e4f28ee upstream.

syzbot is reporting uninitialized value at rds_connect() [1] and
rds_bind() [2]. This is because syzbot is passing ulen == 0 whereas
these functions expect that it is safe to access sockaddr->family field
in order to determine minimal address length for validation.

[1] 
https://syzkaller.appspot.com/bug?id=f4e61c010416c1e6f0fa3ffe247561b60a50ad71
[2] 
https://syzkaller.appspot.com/bug?id=a4bf9e41b7e055c3823fdcd83e8c58ca7270e38f

Reported-by: syzbot 
Reported-by: syzbot 
Signed-off-by: Tetsuo Handa 
Acked-by: Santosh Shilimkar 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/rds/af_rds.c |3 +++
 net/rds/bind.c   |2 ++
 2 files changed, 5 insertions(+)

--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -506,6 +506,9 @@ static int rds_connect(struct socket *so
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;
 
+   if (addr_len < offsetofend(struct sockaddr, sa_family))
+   return -EINVAL;
+
lock_sock(sk);
 
switch (uaddr->sa_family) {
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct
/* We allow an RDS socket to be bound to either IPv4 or IPv6
 * address.
 */
+   if (addr_len < offsetofend(struct sockaddr, sa_family))
+   return -EINVAL;
if (uaddr->sa_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 




[PATCH 5.0 74/89] mlxsw: spectrum: Fix autoneg status in ethtool

2019-04-30 Thread Greg Kroah-Hartman
From: Amit Cohen 

[ Upstream commit 151f0dddbbfe4c35c9c5b64873115aafd436af9d ]

If link is down and autoneg is set to on/off, the status in ethtool does
not change.

The reason is when the link is down the function returns with zero
before changing autoneg value.

Move the checking of link state (up/down) to be performed after setting
autoneg value, in order to be sure that autoneg will change in any case.

Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
Signed-off-by: Amit Cohen 
Signed-off-by: Ido Schimmel 
Acked-by: Jiri Pirko 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2667,11 +2667,11 @@ mlxsw_sp_port_set_link_ksettings(struct
if (err)
return err;
 
+   mlxsw_sp_port->link.autoneg = autoneg;
+
if (!netif_running(dev))
return 0;
 
-   mlxsw_sp_port->link.autoneg = autoneg;
-
mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
 




[PATCH 5.0 65/89] aio: fold lookup_kiocb() into its sole caller

2019-04-30 Thread Greg Kroah-Hartman
From: Al Viro 

commit 833f4154ed560232120bc475935ee1d6a20e159f upstream.

Signed-off-by: Al Viro 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   29 +++--
 1 file changed, 7 insertions(+), 22 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2002,24 +2002,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat
 }
 #endif
 
-/* lookup_kiocb
- * Finds a given iocb for cancellation.
- */
-static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
-{
-   struct aio_kiocb *kiocb;
-
-   assert_spin_locked(>ctx_lock);
-
-   /* TODO: use a hash or array, this sucks. */
-   list_for_each_entry(kiocb, >active_reqs, ki_list) {
-   if (kiocb->ki_user_iocb == iocb)
-   return kiocb;
-   }
-   return NULL;
-}
-
 /* sys_io_cancel:
  * Attempts to cancel an iocb previously passed to io_submit.  If
  * the operation is successfully cancelled, the resulting event is
@@ -2048,10 +2030,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t
return -EINVAL;
 
spin_lock_irq(>ctx_lock);
-   kiocb = lookup_kiocb(ctx, iocb);
-   if (kiocb) {
-   ret = kiocb->ki_cancel(>rw);
-   list_del_init(>ki_list);
+   /* TODO: use a hash or array, this sucks. */
+   list_for_each_entry(kiocb, >active_reqs, ki_list) {
+   if (kiocb->ki_user_iocb == iocb) {
+   ret = kiocb->ki_cancel(>rw);
+   list_del_init(>ki_list);
+   break;
+   }
}
spin_unlock_irq(>ctx_lock);
 




[PATCH 5.0 67/89] aio: store event at final iocb_put()

2019-04-30 Thread Greg Kroah-Hartman
From: Al Viro 

commit 2bb874c0d873d13bd9b9b9c6d7b7c4edab18c8b4 upstream.

Instead of having aio_complete() set ->ki_res.{res,res2}, do that
explicitly in its callers, drop the reference (as aio_complete()
used to do) and delay the rest until the final iocb_put().

Signed-off-by: Al Viro 
Cc: Guenter Roeck 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/aio.c |   33 +
 1 file changed, 17 insertions(+), 16 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1077,16 +1077,10 @@ static inline void iocb_destroy(struct a
kmem_cache_free(kiocb_cachep, iocb);
 }
 
-static inline void iocb_put(struct aio_kiocb *iocb)
-{
-   if (refcount_dec_and_test(>ki_refcnt))
-   iocb_destroy(iocb);
-}
-
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb)
 {
struct kioctx   *ctx = iocb->ki_ctx;
struct aio_ring *ring;
@@ -1094,8 +1088,6 @@ static void aio_complete(struct aio_kioc
unsigned tail, pos, head;
unsigned long   flags;
 
-   iocb->ki_res.res = res;
-   iocb->ki_res.res2 = res2;
/*
 * Add a completion event to the ring buffer. Must be done holding
 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1161,7 +1153,14 @@ static void aio_complete(struct aio_kioc
 
if (waitqueue_active(>wait))
wake_up(>wait);
-   iocb_put(iocb);
+}
+
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (refcount_dec_and_test(>ki_refcnt)) {
+   aio_complete(iocb);
+   iocb_destroy(iocb);
+   }
 }
 
 /* aio_read_events_ring
@@ -1435,7 +1434,9 @@ static void aio_complete_rw(struct kiocb
file_end_write(kiocb->ki_filp);
}
 
-   aio_complete(iocb, res, res2);
+   iocb->ki_res.res = res;
+   iocb->ki_res.res2 = res2;
+   iocb_put(iocb);
 }
 
 static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
@@ -1583,11 +1584,10 @@ static ssize_t aio_write(struct kiocb *r
 
 static void aio_fsync_work(struct work_struct *work)
 {
-   struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
-   int ret;
+   struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, 
fsync.work);
 
-   ret = vfs_fsync(req->file, req->datasync);
-   aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+   iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+   iocb_put(iocb);
 }
 
 static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
@@ -1608,7 +1608,8 @@ static int aio_fsync(struct fsync_iocb *
 
 static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
 {
-   aio_complete(iocb, mangle_poll(mask), 0);
+   iocb->ki_res.res = mangle_poll(mask);
+   iocb_put(iocb);
 }
 
 static void aio_poll_complete_work(struct work_struct *work)




[PATCH 5.0 72/89] ipv4: add sanity checks in ipv4_link_failure()

2019-04-30 Thread Greg Kroah-Hartman
From: Eric Dumazet 

[ Upstream commit 20ff83f10f113c88d0bb74589389b05250994c16 ]

Before calling __ip_options_compile(), we need to ensure the network
header is a an IPv4 one, and that it is already pulled in skb->head.

RAW sockets going through a tunnel can end up calling ipv4_link_failure()
with total garbage in the skb, or arbitrary lengthes.

syzbot report :

BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:355 [inline]
BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x294/0x1120 
net/ipv4/ip_options.c:123
Write of size 69 at addr 888096abf068 by task syz-executor.4/9204

CPU: 0 PID: 9204 Comm: syz-executor.4 Not tainted 5.1.0-rc5+ #77
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 check_memory_region_inline mm/kasan/generic.c:185 [inline]
 check_memory_region+0x123/0x190 mm/kasan/generic.c:191
 memcpy+0x38/0x50 mm/kasan/common.c:133
 memcpy include/linux/string.h:355 [inline]
 __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123
 __icmp_send+0x725/0x1400 net/ipv4/icmp.c:695
 ipv4_link_failure+0x29f/0x550 net/ipv4/route.c:1204
 dst_link_failure include/net/dst.h:427 [inline]
 vti6_xmit net/ipv6/ip6_vti.c:514 [inline]
 vti6_tnl_xmit+0x10d4/0x1c0c net/ipv6/ip6_vti.c:553
 __netdev_start_xmit include/linux/netdevice.h:4414 [inline]
 netdev_start_xmit include/linux/netdevice.h:4423 [inline]
 xmit_one net/core/dev.c:3292 [inline]
 dev_hard_start_xmit+0x1b2/0x980 net/core/dev.c:3308
 __dev_queue_xmit+0x271d/0x3060 net/core/dev.c:3878
 dev_queue_xmit+0x18/0x20 net/core/dev.c:3911
 neigh_direct_output+0x16/0x20 net/core/neighbour.c:1527
 neigh_output include/net/neighbour.h:508 [inline]
 ip_finish_output2+0x949/0x1740 net/ipv4/ip_output.c:229
 ip_finish_output+0x73c/0xd50 net/ipv4/ip_output.c:317
 NF_HOOK_COND include/linux/netfilter.h:278 [inline]
 ip_output+0x21f/0x670 net/ipv4/ip_output.c:405
 dst_output include/net/dst.h:444 [inline]
 NF_HOOK include/linux/netfilter.h:289 [inline]
 raw_send_hdrinc net/ipv4/raw.c:432 [inline]
 raw_sendmsg+0x1d2b/0x2f20 net/ipv4/raw.c:663
 inet_sendmsg+0x147/0x5d0 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:661
 sock_write_iter+0x27c/0x3e0 net/socket.c:988
 call_write_iter include/linux/fs.h:1866 [inline]
 new_sync_write+0x4c7/0x760 fs/read_write.c:474
 __vfs_write+0xe4/0x110 fs/read_write.c:487
 vfs_write+0x20c/0x580 fs/read_write.c:549
 ksys_write+0x14f/0x2d0 fs/read_write.c:599
 __do_sys_write fs/read_write.c:611 [inline]
 __se_sys_write fs/read_write.c:608 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:608
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x458c29
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 
7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:7f293b44bc78 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 0003 RCX: 00458c29
RDX: 0014 RSI: 22c0 RDI: 0003
RBP: 0073bf00 R08:  R09: 
R10:  R11: 0246 R12: 7f293b44c6d4
R13: 004c8623 R14: 004ded68 R15: 

The buggy address belongs to the page:
page:ea00025aafc0 count:0 mapcount:0 mapping: index:0x0
flags: 0x1fffc00()
raw: 01fffc00  025a0101 
raw:    
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 888096abef80: 00 00 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 f2
 888096abf000: f2 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00
>888096abf080: 00 00 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00
 ^
 888096abf100: 00 00 00 00 f1 f1 f1 f1 00 00 f3 f3 00 00 00 00
 888096abf180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Signed-off-by: Eric Dumazet 
Cc: Stephen Suryaputra 
Acked-by: Willem de Bruijn 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv4/route.c |   34 --
 1 file changed, 24 insertions(+), 10 deletions(-)

--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1183,25 +1183,39 @@ static struct dst_entry *ipv4_dst_check(
return dst;
 }
 
-static void ipv4_link_failure(struct sk_buff *skb)
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
 {
struct ip_options opt;
-   struct rtable *rt;
int res;
 
/* Recompile ip options 

[PATCH 5.0 61/89] net: netrom: Fix error cleanup path of nr_proto_init

2019-04-30 Thread Greg Kroah-Hartman
From: YueHaibing 

commit d3706566ae3d92677b932dd156157fd6c72534b1 upstream.

Syzkaller report this:

BUG: unable to handle kernel paging request at fbfff830524b
PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c9716067 PTE 0
Oops:  [#1] SMP KASAN PTI
CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 5.0.0+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 
04/01/2014
RIP: 0010:__list_add_valid+0x21/0xe0 lib/list_debug.c:23
Code: 8b 0c 24 e9 17 fd ff ff 90 55 48 89 fd 48 8d 7a 08 53 48 89 d3 48 b8 00 
00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 0f 85 8b 00 
00 00 48 8b 53 08 48 39 f2 75 35 48 89 f2
RSP: 0018:8881ea2278d0 EFLAGS: 00010282
RAX: dc00 RBX: c1829250 RCX: 11103d444ef4
RDX: 1830524b RSI: 85659300 RDI: c1829258
RBP: c1879250 R08: fbfff0acb269 R09: fbfff0acb269
R10: 8881ea2278f0 R11: fbfff0acb268 R12: c1829250
R13: dc00 R14: 0008 R15: c187c830
FS:  7fe0361df700() GS:8881f730() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: fbfff830524b CR3: 0001eb39a001 CR4: 007606e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
PKRU: 5554
Call Trace:
 __list_add include/linux/list.h:60 [inline]
 list_add include/linux/list.h:79 [inline]
 proto_register+0x444/0x8f0 net/core/sock.c:3375
 nr_proto_init+0x73/0x4b3 [netrom]
 ? 0xc1628000
 ? 0xc1628000
 do_one_initcall+0xbc/0x47d init/main.c:887
 do_init_module+0x1b5/0x547 kernel/module.c:3456
 load_module+0x6405/0x8c10 kernel/module.c:3804
 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 
c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:7fe0361dec58 EFLAGS: 0246 ORIG_RAX: 0139
RAX: ffda RBX: 0073bf00 RCX: 00462e99
RDX:  RSI: 2100 RDI: 0003
RBP: 7fe0361dec70 R08:  R09: 
R10:  R11: 0246 R12: 7fe0361df6bc
R13: 004bcefa R14: 006f6fb0 R15: 0004
Modules linked in: netrom(+) ax25 fcrypt pcbc af_alg arizona_ldo1 v4l2_common 
videodev media v4l2_dv_timings hdlc ide_cd_mod snd_soc_sigmadsp_regmap 
snd_soc_sigmadsp intel_spi_platform intel_spi mtd spi_nor snd_usbmidi_lib 
usbcore lcd ti_ads7950 hi6421_regulator snd_soc_kbl_rt5663_max98927 
snd_soc_hdac_hdmi snd_hda_ext_core snd_hda_core snd_soc_rt5663 snd_soc_core 
snd_pcm_dmaengine snd_compress snd_soc_rl6231 mac80211 rtc_rc5t583 
spi_slave_time leds_pwm hid_gt683r hid industrialio_triggered_buffer kfifo_buf 
industrialio ir_kbd_i2c rc_core led_class_flash dwc_xlgmac snd_ymfpci gameport 
snd_mpu401_uart snd_rawmidi snd_ac97_codec snd_pcm ac97_bus snd_opl3_lib 
snd_timer snd_seq_device snd_hwdep snd soundcore iptable_security iptable_raw 
iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 
iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr 
veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team 
bonding vcan
 bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm 
kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel 
ghash_clmulni_intel ide_pci_generic piix aesni_intel aes_x86_64 crypto_simd 
cryptd glue_helper ide_core psmouse input_leds i2c_piix4 serio_raw intel_agp 
intel_gtt ata_generic agpgart pata_acpi parport_pc rtc_cmos parport floppy 
sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: 
rxrpc]
Dumping ftrace buffer:
   (ftrace buffer empty)
CR2: fbfff830524b
---[ end trace 039ab24b305c4b19 ]---

If nr_proto_init failed, it may forget to call proto_unregister,
tiggering this issue.This patch rearrange code of nr_proto_init
to avoid such issues.

Reported-by: Hulk Robot 
Signed-off-by: YueHaibing 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 include/net/netrom.h   |2 -
 net/netrom/af_netrom.c |   76 +
 net/netrom/nr_loopback.c   |2 -
 net/netrom/nr_route.c  |2 -
 net/netrom/sysctl_net_netrom.c |5 ++
 5 files changed, 61 insertions(+), 26 deletions(-)

--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *);
 int nr_t1timer_running(struct sock *);
 
 /* sysctl_net_netrom.c */
-void nr_register_sysctl(void);
+int nr_register_sysctl(void);
 void nr_unregister_sysctl(void);
 
 #endif
--- a/net/netrom/af_netrom.c
+++ 

[PATCH 5.0 71/89] rdma: fix build errors on s390 and MIPS due to bad ZERO_PAGE use

2019-04-30 Thread Greg Kroah-Hartman
From: Linus Torvalds 

commit 6a5c5d26c4c6c3cc486fef0bf04ff9551132611b upstream.

The parameter to ZERO_PAGE() was wrong, but since all architectures
except for MIPS and s390 ignore it, it wasn't noticed until 0-day
reported the build error.

Fixes: 67f269b37f9b ("RDMA/ucontext: Fix regression with disassociate")
Cc: sta...@vger.kernel.org
Cc: Andrea Arcangeli 
Cc: Leon Romanovsky 
Cc: Jason Gunthorpe 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/infiniband/core/uverbs_main.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -894,7 +894,7 @@ static vm_fault_t rdma_umap_fault(struct
 
/* Read only pages can just use the system zero page. */
if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
-   vmf->page = ZERO_PAGE(vmf->vm_start);
+   vmf->page = ZERO_PAGE(vmf->address);
get_page(vmf->page);
return 0;
}




<    1   2   3   4   5   6   7   8   9   10   >