[PATCH 3.16 111/254] powerpc/perf: Dereference BHRB entries safely

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Ravi Bangoria 

commit f41d84dddc66b164ac16acf3f584c276146f1c48 upstream.

It's theoretically possible that branch instructions recorded in
BHRB (Branch History Rolling Buffer) entries have already been
unmapped before they are processed by the kernel. Hence, trying to
dereference such memory location will result in a crash. eg:

Unable to handle kernel paging request for data at address 
0xd00019c41764
Faulting instruction address: 0xc0084a14
NIP [c0084a14] branch_target+0x4/0x70
LR [c00eb828] record_and_restart+0x568/0x5c0
Call Trace:
[c00eb3b4] record_and_restart+0xf4/0x5c0 (unreliable)
[c00ec378] perf_event_interrupt+0x298/0x460
[c0027964] performance_monitor_exception+0x54/0x70
[c0009ba4] performance_monitor_common+0x114/0x120

Fix it by deferefencing the addresses safely.

Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB")
Suggested-by: Naveen N. Rao 
Signed-off-by: Ravi Bangoria 
Reviewed-by: Naveen N. Rao 
[mpe: Use probe_kernel_read() which is clearer, tweak change log]
Signed-off-by: Michael Ellerman 
Signed-off-by: Ben Hutchings 
---
 arch/powerpc/perf/core-book3s.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -391,8 +391,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
int ret;
__u64 target;
 
-   if (is_kernel_addr(addr))
-   return branch_target((unsigned int *)addr);
+   if (is_kernel_addr(addr)) {
+   if (probe_kernel_read(, (void *)addr, sizeof(instr)))
+   return 0;
+
+   return branch_target();
+   }
 
/* Userspace: need copy instruction here then translate it */
pagefault_disable();



[PATCH v16 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

So far, only one call in fw_cfg_register_dir_entries() is using
kmalloc'ed buf and is thus clearly eligible to DMA read.

Initially, I didn't implement DMA read to speed up boot time, but as a
first step before introducing DMA write (since read operations were
already presents). Even more, I didn't realize fw-cfg entries were
being read by the kernel during boot by default. But actally fw-cfg
entries are being populated during module probe. I knew DMA improved a
lot bios boot time (the main reason the DMA interface was added
afaik). Let see the time it would take to read the whole ACPI
tables (128kb allocated)

 # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
  - with DMA: sys 0m0.003s
  - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s

FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
boot to populate sysfs qemu_fw_cfg directory, and it is quite
small (1-2kb). Since it does not expose itself, in order to measure
the time it takes to read such small file, I took a comparable sized
file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
modified read_raw enabling DMA)

 # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
  - with DMA:
  0.636037  task-clock (msec) #0.141 CPUs utilized  
  ( +-  1.19% )
  - without DMA:
  6.430128  task-clock (msec) #0.622 CPUs utilized  
  ( +-  0.22% )

That's a few msec saved during boot by enabling DMA read (the gain
would be more substantial if other & bigger fw-cfg entries are read by
others from sysfs, unfortunately, it's not clear if we can always
enable DMA there)

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 68 ++
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 14fedbeca724..db1cba4f99bd 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -66,7 +66,6 @@ static void fw_cfg_sel_endianness(u16 key)
iowrite16(key, fw_cfg_reg_ctrl);
 }
 
-#ifdef CONFIG_CRASH_CORE
 static inline bool fw_cfg_dma_enabled(void)
 {
return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
@@ -124,14 +123,49 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
 
return ret;
 }
-#endif
+
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_dma(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   ssize_t ret;
+
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+   | FW_CFG_DMA_CTL_SELECT
+   | FW_CFG_DMA_CTL_READ);
+   } else {
+   fw_cfg_sel_endianness(key);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   return ret;
+   ret = fw_cfg_dma_transfer(buf, count,
+   FW_CFG_DMA_CTL_READ);
+   }
+
+   return ret;
+}
+
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_io(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   fw_cfg_sel_endianness(key);
+   while (pos-- > 0)
+   ioread8(fw_cfg_reg_data);
+   ioread8_rep(fw_cfg_reg_data, buf, count);
+   return count;
+}
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static ssize_t fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+   void *buf, loff_t pos, size_t count,
+   ssize_t (*readfn)(u16 key, void *buf,
+   loff_t pos, size_t count))
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -145,14 +179,19 @@ static ssize_t fw_cfg_read_blob(u16 key,
}
 
mutex_lock(_cfg_dev_lock);
-   fw_cfg_sel_endianness(key);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+
+   /* fallback to IO if DMA is not available */
+   if (readfn == fw_cfg_read_blob_dma && !fw_cfg_dma_enabled()) {
+   readfn = fw_cfg_read_blob_io;
+   }
+
+   ret = readfn(key, buf, pos, count);
+
mutex_unlock(_cfg_dev_lock);
 
acpi_release_global_lock(glk);
-   return count;
+
+   return ret;
 }
 
 #ifdef CONFIG_CRASH_CORE
@@ -286,7 +325,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)

[tip:x86/pti] x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend

2018-02-28 Thread tip-bot for Juergen Gross
Commit-ID:  71c208dd54ab971036d83ff6d9837bae4976e623
Gitweb: https://git.kernel.org/tip/71c208dd54ab971036d83ff6d9837bae4976e623
Author: Juergen Gross 
AuthorDate: Mon, 26 Feb 2018 15:08:18 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Feb 2018 16:03:19 +0100

x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend

Older Xen versions (4.5 and before) might have problems migrating pv
guests with MSR_IA32_SPEC_CTRL having a non-zero value. So before
suspending zero that MSR and restore it after being resumed.

Signed-off-by: Juergen Gross 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Jan Beulich 
Cc: sta...@vger.kernel.org
Cc: xen-de...@lists.xenproject.org
Cc: boris.ostrov...@oracle.com
Link: https://lkml.kernel.org/r/20180226140818.4849-1-jgr...@suse.com

---
 arch/x86/xen/suspend.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9f96cc5d743..1d83152c761b 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,12 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -15,6 +18,8 @@
 #include "mmu.h"
 #include "pmu.h"
 
+static DEFINE_PER_CPU(u64, spec_ctrl);
+
 void xen_arch_pre_suspend(void)
 {
xen_save_time_memory_area();
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
+   if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+   wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+
/* Boot processor notified via generic timekeeping_resume() */
if (smp_processor_id() == 0)
return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
 
 static void xen_vcpu_notify_suspend(void *data)
 {
+   u64 tmp;
+
tick_suspend_local();
+
+   if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+   rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+   this_cpu_write(spec_ctrl, tmp);
+   wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+   }
 }
 
 void xen_arch_resume(void)


[PATCH v5 02/12] usb: typec: Start using ERR_PTR

2018-02-28 Thread Hans de Goede
From: Heikki Krogerus 

In order to allow the USB Type-C Class driver take care of
things like muxes and other possible dependencies for the
port drivers, returning ERR_PTR instead of NULL from the
registration functions in case of failure.

The reason for taking over control of the muxes for example
is because handling them in the port drivers would be just
boilerplate.

Signed-off-by: Heikki Krogerus 
Reviewed-by: Hans de Goede 
Reviewed-by: Guenter Roeck 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Hans de Goede 
---
Changes in v4:
-Add Andy's Reviewed-by

Changes in v3:
-Add Guenter's Reviewed-by

Changes in v2:
-Add IS_ERR_OR_NULL() checks to the unregister functions
---
 drivers/usb/typec/tcpm.c  | 16 +---
 drivers/usb/typec/tps6598x.c  | 15 ---
 drivers/usb/typec/typec.c | 44 +--
 drivers/usb/typec/ucsi/ucsi.c | 31 ++
 4 files changed, 58 insertions(+), 48 deletions(-)

diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index f4d563ee7690..7cd28b700a7f 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -1044,7 +1044,7 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const 
__le32 *payload, int cnt,
break;
case CMDT_RSP_ACK:
/* silently drop message if we are not connected */
-   if (!port->partner)
+   if (IS_ERR_OR_NULL(port->partner))
break;
 
switch (cmd) {
@@ -3743,8 +3743,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, 
struct tcpc_dev *tcpc)
port->port_type = tcpc->config->type;
 
port->typec_port = typec_register_port(port->dev, >typec_caps);
-   if (!port->typec_port) {
-   err = -ENOMEM;
+   if (IS_ERR(port->typec_port)) {
+   err = PTR_ERR(port->typec_port);
goto out_destroy_wq;
}
 
@@ -3753,15 +3753,17 @@ struct tcpm_port *tcpm_register_port(struct device 
*dev, struct tcpc_dev *tcpc)
 
i = 0;
while (paltmode->svid && i < ARRAY_SIZE(port->port_altmode)) {
-   port->port_altmode[i] =
- typec_port_register_altmode(port->typec_port,
- paltmode);
-   if (!port->port_altmode[i]) {
+   struct typec_altmode *alt;
+
+   alt = typec_port_register_altmode(port->typec_port,
+ paltmode);
+   if (IS_ERR(alt)) {
tcpm_log(port,
 "%s: failed to register port alternate 
mode 0x%x",
 dev_name(dev), paltmode->svid);
break;
}
+   port->port_altmode[i] = alt;
i++;
paltmode++;
}
diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c
index 2719f5d382f7..37a15c14a6c6 100644
--- a/drivers/usb/typec/tps6598x.c
+++ b/drivers/usb/typec/tps6598x.c
@@ -158,15 +158,15 @@ static int tps6598x_connect(struct tps6598x *tps, u32 
status)
desc.identity = >partner_identity;
}
 
-   tps->partner = typec_register_partner(tps->port, );
-   if (!tps->partner)
-   return -ENODEV;
-
typec_set_pwr_opmode(tps->port, mode);
typec_set_pwr_role(tps->port, TPS_STATUS_PORTROLE(status));
typec_set_vconn_role(tps->port, TPS_STATUS_VCONN(status));
typec_set_data_role(tps->port, TPS_STATUS_DATAROLE(status));
 
+   tps->partner = typec_register_partner(tps->port, );
+   if (IS_ERR(tps->partner))
+   return PTR_ERR(tps->partner);
+
if (desc.identity)
typec_partner_set_identity(tps->partner);
 
@@ -175,7 +175,8 @@ static int tps6598x_connect(struct tps6598x *tps, u32 
status)
 
 static void tps6598x_disconnect(struct tps6598x *tps, u32 status)
 {
-   typec_unregister_partner(tps->partner);
+   if (!IS_ERR(tps->partner))
+   typec_unregister_partner(tps->partner);
tps->partner = NULL;
typec_set_pwr_opmode(tps->port, TYPEC_PWR_MODE_USB);
typec_set_pwr_role(tps->port, TPS_STATUS_PORTROLE(status));
@@ -418,8 +419,8 @@ static int tps6598x_probe(struct i2c_client *client)
tps->typec_cap.prefer_role = TYPEC_NO_PREFERRED_ROLE;
 
tps->port = typec_register_port(>dev, >typec_cap);
-   if (!tps->port)
-   return -ENODEV;
+   if (IS_ERR(tps->port))
+   return PTR_ERR(tps->port);
 
if (status & TPS_STATUS_PLUG_PRESENT) {
ret = 

[PATCH v5 00/12] USB Type-C device-connection, mux and switch support

2018-02-28 Thread Hans de Goede
Hi All,

Here is version 5 of Heikki's and my USB Type-C device-connection, mux and
switch support series. Versions 2 - 5 bring various small code and style
fixes based on review (no major changes).

Here is the original cover-letter of v1:

Some devices with an USB Type-C connector have a bunch of muxes
behind that connector which need to be controlled by the kernel (rather
then having them controlled by firmware as on most devices).

Quite a while back I submitted a patch-series to tie together these muxes
and the Type-C Port Manager (tcpm) code, using the then new drivers/mux
framework. But the way I used the mux framework went against what it was
designed for, so in the end that series got nowhere.

Heikki Krogerus from Intel, who maintains the USB TYPEC subsystem, has
recently been working on solving the same problem for some boards he is
doing hardware-enablement for.

Heikki has come up with a number of infrastructure patches for this.
The first one is a new device-connection framework. This solves the
problem of describing non bus device-links on x86 in what in my experience
with this problematic area is a really nice simple, clean and *generic*
way. This could for example in the near future also replace the custom
lookup code in the pwm subsys and the custom pwm_add_table() /
pwm_remove_table() functions.

The other 3 patches add a framework for the different type of Type-C /
USB "muxes".

Heikki and I have gone through a number of iterations of these patches
together and we believe these are now ready for merging. Since merging
infrastructure patches without users is not done and Heikki's own use-case
for these is not yet ready for merging, the rest of this series consists
of patches by me to make the Type-C connector found on some Cherry Trail
devices (finally) be able to actually work as an USB port and not just
a charge port.

The last patch uses the new usb-role-switch framework to also do proper
devcie / host switching on CHT devices with a USB micro AB connector.
This is also a big feature for CHT users, because before this they had
to do a reboot to get an OTG-host cable recognized (on some devices).

Part of this series is an usb-role-switch driver for the role-switch
found inside the xhci controller on e.g. CHT devices, this is currently
implemented as the generic xhci controller instantiating a platform
child-device for this, since this really is a separate chunk of HW
which happens to sit in the XHCI mmio space. This approach may not be
universally liked, given that in this new series the role-switch driver
is much smaller and does not have any external deps anymore we could
just integrate it into the xhci code if that is preferred.

About merging this series (once everything is reviewed, etc.), there are
quite some interdependencies in it esp. a lot of the patches depend on
the first patch. Luckily patches 1-10 all apply to subsystems which are
maintained by Greg (most to the USB subsys). Which just leaves patches
11 and 12 once 1-10 are merged. Greg, can you create an immutable branch
for the platform/x86 and extcon maintainers to merge once this is done?

Regards,

Hans


[PATCH v5 06/12] usb: typec: tcpm: Use new Type-C switch/mux and usb-role-switch functions

2018-02-28 Thread Hans de Goede
Remove the unused (not implemented anywhere) tcpc_mux_dev abstraction
and replace it with calling the new typec_set_orientation,
usb_role_switch_set and typec_set_mode functions.

Reviewed-by: Heikki Krogerus 
Reviewed-by: Guenter Roeck 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Hans de Goede 
---
Changes in v4:
-Add Andy's Reviewed-by

Changes in v3:
-Add Guenter's Reviewed-by

Changes in v2:
-Added Heikki's Reviewed-by
---
 drivers/usb/typec/Kconfig   |  1 +
 drivers/usb/typec/fusb302/fusb302.c |  1 -
 drivers/usb/typec/tcpm.c| 46 -
 include/linux/usb/tcpm.h| 10 
 4 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index bcb2744c5977..a2a0684e7c82 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -48,6 +48,7 @@ if TYPEC
 config TYPEC_TCPM
tristate "USB Type-C Port Controller Manager"
depends on USB
+   select USB_ROLE_SWITCH
help
  The Type-C Port Controller Manager provides a USB PD and USB Type-C
  state machine for use with Type-C Port Controllers.
diff --git a/drivers/usb/typec/fusb302/fusb302.c 
b/drivers/usb/typec/fusb302/fusb302.c
index dcd8ef085b30..a7b06053a538 100644
--- a/drivers/usb/typec/fusb302/fusb302.c
+++ b/drivers/usb/typec/fusb302/fusb302.c
@@ -1249,7 +1249,6 @@ static void init_tcpc_dev(struct tcpc_dev 
*fusb302_tcpc_dev)
fusb302_tcpc_dev->set_roles = tcpm_set_roles;
fusb302_tcpc_dev->start_drp_toggling = tcpm_start_drp_toggling;
fusb302_tcpc_dev->pd_transmit = tcpm_pd_transmit;
-   fusb302_tcpc_dev->mux = NULL;
 }
 
 static const char * const cc_polarity_name[] = {
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index 00ca2822432f..bfcaf6618a1f 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -176,6 +177,7 @@ struct tcpm_port {
struct typec_port *typec_port;
 
struct tcpc_dev *tcpc;
+   struct usb_role_switch *role_sw;
 
enum typec_role vconn_role;
enum typec_role pwr_role;
@@ -618,18 +620,25 @@ void tcpm_pd_transmit_complete(struct tcpm_port *port,
 EXPORT_SYMBOL_GPL(tcpm_pd_transmit_complete);
 
 static int tcpm_mux_set(struct tcpm_port *port, enum tcpc_mux_mode mode,
-   enum usb_role usb_role)
+   enum usb_role usb_role,
+   enum typec_orientation orientation)
 {
-   int ret = 0;
+   int ret;
 
-   tcpm_log(port, "Requesting mux mode %d, usb-role %d, polarity %d",
-mode, usb_role, port->polarity);
+   tcpm_log(port, "Requesting mux mode %d, usb-role %d, orientation %d",
+mode, usb_role, orientation);
 
-   if (port->tcpc->mux)
-   ret = port->tcpc->mux->set(port->tcpc->mux, mode, usb_role,
-  port->polarity);
+   ret = typec_set_orientation(port->typec_port, orientation);
+   if (ret)
+   return ret;
 
-   return ret;
+   if (port->role_sw) {
+   ret = usb_role_switch_set_role(port->role_sw, usb_role);
+   if (ret)
+   return ret;
+   }
+
+   return typec_set_mode(port->typec_port, mode);
 }
 
 static int tcpm_set_polarity(struct tcpm_port *port,
@@ -742,15 +751,21 @@ static int tcpm_set_attached_state(struct tcpm_port 
*port, bool attached)
 static int tcpm_set_roles(struct tcpm_port *port, bool attached,
  enum typec_role role, enum typec_data_role data)
 {
+   enum typec_orientation orientation;
enum usb_role usb_role;
int ret;
 
+   if (port->polarity == TYPEC_POLARITY_CC1)
+   orientation = TYPEC_ORIENTATION_NORMAL;
+   else
+   orientation = TYPEC_ORIENTATION_REVERSE;
+
if (data == TYPEC_HOST)
usb_role = USB_ROLE_HOST;
else
usb_role = USB_ROLE_DEVICE;
 
-   ret = tcpm_mux_set(port, TYPEC_MUX_USB, usb_role);
+   ret = tcpm_mux_set(port, TYPEC_MUX_USB, usb_role, orientation);
if (ret < 0)
return ret;
 
@@ -2097,7 +2112,8 @@ static int tcpm_src_attach(struct tcpm_port *port)
 out_disable_pd:
port->tcpc->set_pd_rx(port->tcpc, false);
 out_disable_mux:
-   tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE);
+   tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE,
+TYPEC_ORIENTATION_NONE);
return ret;
 }
 
@@ -2141,7 +2157,8 @@ static void tcpm_reset_port(struct tcpm_port *port)
tcpm_init_vconn(port);
tcpm_set_current_limit(port, 0, 0);
tcpm_set_polarity(port, TYPEC_POLARITY_CC1);
-   tcpm_mux_set(port, 

Re: [PATCH v5 08/13] iommu/rockchip: Control clocks needed to access the IOMMU

2018-02-28 Thread Robin Murphy

On 28/02/18 13:00, JeffyChen wrote:

Hi Robin,

Thanks for your reply.

On 02/28/2018 12:59 AM, Robin Murphy wrote:

the rockchip IOMMU is part of the master block in hardware, so it needs
to control the master's power domain and some of the master's clocks
when access it's registers.

and the number of clocks needed here, might be different between each
IOMMUs(according to which master block it belongs), it's a little like
our power domain:
https://elixir.free-electrons.com/linux/latest/source/arch/arm64/boot/dts/rockchip/rk3399.dtsi#L935 





i'm not sure how to describe this correctly, is it ok use something 
like

"the same as it's master block"?


would it make sense to add a property to specify the master who owns
the iommu, and we can get all clocks(only some of those clocks are
actually needed) from it in the of_xlate()? and we can also reuse the
clock-names of that master to build clk_bulk_data and log errors in
clk_bulk_get.


I'm inclined to agree with Rob here - if we're to add anything to the
binding, it should only be whatever clock inputs are defined for the
IOMMU IP block itself. If Linux doesn't properly handle the interconnect
clock hierarchy external to a particular integration, that's a separate
issue and it's not the binding's problem.

I actually quite like the hack of "borrowing" the clocks from
dev->of_node in of_xlate() - you shouldn't need any DT changes for that,
because you already know that each IOMMU instance only has the one
master device anyway.


Thanks:) but actually we are going to support sharing IOMMU between 
multiple masters(one of them is the main master i think) in the newer 
chips(not yet supported on upstream kernel)...


Ha! OK, fair enough, back to the first point then...

So we might have to get all clocks from all masters, or find a way to 
specify the main master...and for the multiple masters case, do it in 
of_xlate() turns out to be a little racy...maybe we can add a property 
to specify main master, and get it's clocks in probe()?


I notice that the 4.4 BSP kernel consistently specifies "aclk" and 
"hclk" for the IOMMU instances - it feels unusual to say "why don't we 
follow the downstream binding?", but it does look a lot like what I 
would expect (I'd guess at one for the register slave interface and one 
for the master interface/general operation?)


If we can implement conceptually-correct clock handling based on an 
accurate binding, which should cover most cases, and *then* look at 
hacking around those where it doesn't quite work in practice due to 
shortcomings elsewhere, that would be ideal, and of course a lot nicer 
than just jumping straight into piles of hacks.


Robin.


[PATCH v16 01/11] fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()

2018-02-28 Thread Marc-André Lureau
Dispatch to the appropriate iowrite() instead of casting restricted
type to u16.

- if fw_cfg_is_mmio:
  before: iowrite16(cpu_to_be16(key))
  after: iowrite16be(key)
- if !fw_cfg_is_mmio:
  before: iowrite16(cpu_to_le16(key))
  after: iowrite16(key)
  which is equivalent on little-endian systems, where fw_cfg IO is supported.

Fixes:
$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:55:33: warning: restricted __be16 degrades to 
integer
drivers/firmware/qemu_fw_cfg.c:55:52: warning: restricted __le16 degrades to 
integer

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index a41b572eeeb1..e7ea2b3b1d11 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -68,9 +68,12 @@ static void __iomem *fw_cfg_reg_data;
 static DEFINE_MUTEX(fw_cfg_dev_lock);
 
 /* pick appropriate endianness for selector key */
-static inline u16 fw_cfg_sel_endianness(u16 key)
+static void fw_cfg_sel_endianness(u16 key)
 {
-   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
+   if (fw_cfg_is_mmio)
+   iowrite16be(key, fw_cfg_reg_ctrl);
+   else
+   iowrite16(key, fw_cfg_reg_ctrl);
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
@@ -92,7 +95,7 @@ static inline void fw_cfg_read_blob(u16 key,
}
 
mutex_lock(_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   fw_cfg_sel_endianness(key);
while (pos-- > 0)
ioread8(fw_cfg_reg_data);
ioread8_rep(fw_cfg_reg_data, buf, count);
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 06/11] fw_cfg: handle fw_cfg_read_blob() error

2018-02-28 Thread Marc-André Lureau
fw_cfg_read_blob() may fail, but does not return error. This may lead
to surprising behaviours, like populating zero file entries (in
register_file() or during read). Return an error if ACPI locking
failed. Also, the following DMA read/write extension will add more
error paths that should be handled appropriately.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0cc71d028ae3..45bfc389b226 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -77,8 +77,8 @@ static void fw_cfg_sel_endianness(u16 key)
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count)
 {
u32 glk = -1U;
acpi_status status;
@@ -91,7 +91,7 @@ static void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(_cfg_dev_lock);
@@ -102,6 +102,7 @@ static void fw_cfg_read_blob(u16 key,
mutex_unlock(_cfg_dev_lock);
 
acpi_release_global_lock(glk);
+   return count;
 }
 
 /* clean up fw_cfg device i/o */
@@ -183,8 +184,9 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
}
 
/* verify fw_cfg device signature */
-   fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
-   if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
+   if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
+   0, FW_CFG_SIG_SIZE) < 0 ||
+   memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
fw_cfg_io_cleanup();
return -ENODEV;
}
@@ -344,8 +346,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
if (count > entry->size - pos)
count = entry->size - pos;
 
-   fw_cfg_read_blob(entry->select, buf, pos, count);
-   return count;
+   return fw_cfg_read_blob(entry->select, buf, pos, count);
 }
 
 static struct bin_attribute fw_cfg_sysfs_attr_raw = {
@@ -501,7 +502,11 @@ static int fw_cfg_register_dir_entries(void)
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, _count, 0, sizeof(files_count));
+   ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, _count,
+   0, sizeof(files_count));
+   if (ret < 0)
+   return ret;
+
count = be32_to_cpu(files_count);
dir_size = count * sizeof(struct fw_cfg_file);
 
@@ -509,7 +514,10 @@ static int fw_cfg_register_dir_entries(void)
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
+   ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
+   sizeof(files_count), dir_size);
+   if (ret < 0)
+   goto end;
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file([i]);
@@ -517,6 +525,7 @@ static int fw_cfg_register_dir_entries(void)
break;
}
 
+end:
kfree(dir);
return ret;
 }
@@ -557,7 +566,10 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
 
/* get revision number, add matching top-level attribute */
-   fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev));
+   err = fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev));
+   if (err < 0)
+   goto err_probe;
+
fw_cfg_rev = le32_to_cpu(rev);
err = sysfs_create_file(fw_cfg_top_ko, _cfg_rev_attr.attr);
if (err)
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 04/11] fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read

2018-02-28 Thread Marc-André Lureau
Use struct fw_cfg_files to read the directory size, fixing the sparse
warnings:

drivers/firmware/qemu_fw_cfg.c:485:17: warning: cast to restricted __be32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0eb155fdfb35..00ad9b862414 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -496,19 +496,20 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
 static int fw_cfg_register_dir_entries(void)
 {
int ret = 0;
+   __be32 files_count;
u32 count, i;
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, , 0, sizeof(count));
-   count = be32_to_cpu(count);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, _count, 0, sizeof(files_count));
+   count = be32_to_cpu(files_count);
dir_size = count * sizeof(struct fw_cfg_file);
 
dir = kmalloc(dir_size, GFP_KERNEL);
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file([i]);
-- 
2.16.1.73.g5832b7e9f2



Re: [PATCH 3/3] x86/kvm/hyper-v: inject #GP only when invalid SINTx vector is unmasked

2018-02-28 Thread Roman Kagan
On Wed, Feb 28, 2018 at 02:44:01PM +0100, Vitaly Kuznetsov wrote:
> Hyper-V 2016 on KVM with SynIC enabled doesn't boot with the following
> trace:
> 
> kvm_entry:vcpu 0
> kvm_exit: reason MSR_WRITE rip 0xf8000131c1e5 info 0 0
> kvm_hv_synic_set_msr: vcpu_id 0 msr 0x4090 data 0x1 host 0
> kvm_msr:  msr_write 4090 = 0x1 (#GP)
> kvm_inj_exception:#GP (0x0)

I don't remember having seen this...  Does this happen with the mainline
QEMU, which doesn't set the SintPollingModeAvailable (17) bit in cpuid
0x4003:edx?

> 
> KVM acts according to the following statement from TLFS:
> 
> "
> 11.8.4 SINTx Registers
> ...
> Valid values for vector are 16-255 inclusive. Specifying an invalid
> vector number results in #GP.
> "
> 
> However, I checked and genuine Hyper-V doesn't #GP when we write 0x1
> to SINTx. I checked with Microsoft and they confirmed that if either the
> Masked bit (bit 16) or the Polling bit (bit 18) is set to 1, then they
> ignore the value of Vector. Make KVM act accordingly.

I wonder if that cpuid setting affects this behavior?  Also curious what
exactly the guest is trying to achieve writing this bogus value?

> 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/include/uapi/asm/hyperv.h | 1 +
>  arch/x86/kvm/hyperv.c  | 7 ++-
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/uapi/asm/hyperv.h 
> b/arch/x86/include/uapi/asm/hyperv.h
> index 62c778a303a1..a492dc357bd7 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -326,6 +326,7 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
>  #define HV_SYNIC_SIEFP_ENABLE(1ULL << 0)
>  #define HV_SYNIC_SINT_MASKED (1ULL << 16)
>  #define HV_SYNIC_SINT_AUTO_EOI   (1ULL << 17)
> +#define HV_SYNIC_SINT_POLLING(1ULL << 18)
>  #define HV_SYNIC_SINT_VECTOR_MASK(0xFF)
>  
>  #define HV_SYNIC_STIMER_COUNT(4)
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 6d14f808145d..d3d866c32976 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -95,9 +95,14 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, 
> int sint,
> u64 data, bool host)
>  {
>   int vector, old_vector;
> + bool masked, polling;
>  
>   vector = data & HV_SYNIC_SINT_VECTOR_MASK;
> - if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host)
> + masked = data & HV_SYNIC_SINT_MASKED;
> + polling = data & HV_SYNIC_SINT_POLLING;
> +
> + if (vector < HV_SYNIC_FIRST_VALID_VECTOR &&
> + !host && !masked && !polling)
>   return 1;
>   /*
>* Guest may configure multiple SINTs to use the same vector, so

I'm not sure this is enough to implement the polling mode: per spec,

> Setting the polling bit will have the effect of unmasking an interrupt
> source, except that an actual interrupt is not generated.

However, if the guest sets a valid vector and the masked bit cleared,
we'll consider it a usual SINT and add to masks and inject interrupts,
etc, regardless of the polling bit.

I must admit I'm confused by the above quote from the spec: is the
polling bit supposed to come together with the masked bit?  If so, then
we probably should validate it here (but your logs indicate otherwise).
In general I'm missing the utility of this mode: why should an interrupt
controller be involved in polling at all?

Roman.


[PATCH 3.16 075/254] X.509: fix buffer overflow detection in sprint_oid()

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Eric Biggers 

commit 47e0a208fb9d91e3f3c86309e752b13a36470ae8 upstream.

In sprint_oid(), if the input buffer were to be more than 1 byte too
small for the first snprintf(), 'bufsize' would underflow, causing a
buffer overflow when printing the remainder of the OID.

Fortunately this cannot actually happen currently, because no users pass
in a buffer that can be too small for the first snprintf().

Regardless, fix it by checking the snprintf() return value correctly.

For consistency also tweak the second snprintf() check to look the same.

Fixes: 4f73175d0375 ("X.509: Add utility functions to render OIDs as strings")
Cc: Takashi Iwai 
Signed-off-by: Eric Biggers 
Signed-off-by: David Howells 
Reviewed-by: James Morris 
Signed-off-by: Ben Hutchings 
---
 lib/oid_registry.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/lib/oid_registry.c
+++ b/lib/oid_registry.c
@@ -120,10 +120,10 @@ int sprint_oid(const void *data, size_t
 
n = *v++;
ret = count = snprintf(buffer, bufsize, "%u.%u", n / 40, n % 40);
+   if (count >= bufsize)
+   return -ENOBUFS;
buffer += count;
bufsize -= count;
-   if (bufsize == 0)
-   return -ENOBUFS;
 
while (v < end) {
num = 0;
@@ -141,9 +141,9 @@ int sprint_oid(const void *data, size_t
} while (n & 0x80);
}
ret += count = snprintf(buffer, bufsize, ".%lu", num);
-   buffer += count;
-   if (bufsize <= count)
+   if (count >= bufsize)
return -ENOBUFS;
+   buffer += count;
bufsize -= count;
}
 



[PATCH 3.2 053/140] net: bridge: fix early call to br_stp_change_bridge_id and plug newlink leaks

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Nikolay Aleksandrov 

commit 84aeb437ab98a2bce3d4b2111c79723aedfceb33 upstream.

The early call to br_stp_change_bridge_id in bridge's newlink can cause
a memory leak if an error occurs during the newlink because the fdb
entries are not cleaned up if a different lladdr was specified, also
another minor issue is that it generates fdb notifications with
ifindex = 0. Another unrelated memory leak is the bridge sysfs entries
which get added on NETDEV_REGISTER event, but are not cleaned up in the
newlink error path. To remove this special case the call to
br_stp_change_bridge_id is done after netdev register and we cleanup the
bridge on changelink error via br_dev_delete to plug all leaks.

This patch makes netlink bridge destruction on newlink error the same as
dellink and ioctl del which is necessary since at that point we have a
fully initialized bridge device.

To reproduce the issue:
$ ip l add br0 address 00:11:22:33:44:55 type bridge group_fwd_mask 1
RTNETLINK answers: Invalid argument

$ rmmod bridge
[ 1822.142525] 
=
[ 1822.143640] BUG bridge_fdb_cache (Tainted: G   O): Objects 
remaining in bridge_fdb_cache on __kmem_cache_shutdown()
[ 1822.144821] 
-

[ 1822.145990] Disabling lock debugging due to kernel taint
[ 1822.146732] INFO: Slab 0x92a844b2 objects=32 used=2 
fp=0xfef011b0 flags=0x1800100
[ 1822.147700] CPU: 2 PID: 13584 Comm: rmmod Tainted: GB  O 
4.15.0-rc2+ #87
[ 1822.148578] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.7.5-20140531_083030-gandalf 04/01/2014
[ 1822.150008] Call Trace:
[ 1822.150510]  dump_stack+0x78/0xa9
[ 1822.151156]  slab_err+0xb1/0xd3
[ 1822.151834]  ? __kmalloc+0x1bb/0x1ce
[ 1822.152546]  __kmem_cache_shutdown+0x151/0x28b
[ 1822.153395]  shutdown_cache+0x13/0x144
[ 1822.154126]  kmem_cache_destroy+0x1c0/0x1fb
[ 1822.154669]  SyS_delete_module+0x194/0x244
[ 1822.155199]  ? trace_hardirqs_on_thunk+0x1a/0x1c
[ 1822.155773]  entry_SYSCALL_64_fastpath+0x23/0x9a
[ 1822.156343] RIP: 0033:0x7f929bd38b17
[ 1822.156859] RSP: 002b:7ffd160e9a98 EFLAGS: 0202 ORIG_RAX: 
00b0
[ 1822.157728] RAX: ffda RBX: 5578316ba090 RCX: 7f929bd38b17
[ 1822.158422] RDX: 7f929bd9ec60 RSI: 0800 RDI: 5578316ba0f0
[ 1822.159114] RBP: 0003 R08: 7f929bff5f20 R09: 7ffd160e8a11
[ 1822.159808] R10: 7ffd160e9860 R11: 0202 R12: 7ffd160e8a80
[ 1822.160513] R13:  R14:  R15: 5578316ba090
[ 1822.161278] INFO: Object 0x7645de29 @offset=0
[ 1822.161666] INFO: Object 0xd5df2ab5 @offset=128

Fixes: 30313a3d5794 ("bridge: Handle IFLA_ADDRESS correctly when creating 
bridge device")
Fixes: 5b8d5429daa0 ("bridge: netlink: register netdevice before executing 
changelink")
Signed-off-by: Nikolay Aleksandrov 
Signed-off-by: David S. Miller 
[bwh: Backported to 3.2: register_netdevice() was the last thing done in
 br_dev_newlink(), so no extra cleanup is needed]
Signed-off-by: Ben Hutchings 
---
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -215,6 +215,11 @@ static int br_dev_newlink(struct net *sr
  struct nlattr *tb[], struct nlattr *data[])
 {
struct net_bridge *br = netdev_priv(dev);
+   int err;
+
+   err = register_netdevice(dev);
+   if (err)
+   return err;
 
if (tb[IFLA_ADDRESS]) {
spin_lock_bh(>lock);
@@ -222,7 +227,7 @@ static int br_dev_newlink(struct net *sr
spin_unlock_bh(>lock);
}
 
-   return register_netdevice(dev);
+   return 0;
 }
 
 struct rtnl_link_ops br_link_ops __read_mostly = {



[PATCH 3.2 011/140] USB: serial: option: add Quectel BG96 id

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Sebastian Sjoholm 

commit c654b21ede93845863597de9ad774fd30db5f2ab upstream.

Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both
CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel
development board (EVB). The USB id is added to option.c to allow
DIAG,GPS,AT and modem communication with the BG96.

Signed-off-by: Sebastian Sjoholm 
Signed-off-by: Johan Hovold 
Signed-off-by: Ben Hutchings 
---
 drivers/usb/serial/option.c | 3 +++
 1 file changed, 3 insertions(+)

--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -242,6 +242,7 @@ static void option_instat_callback(struc
 /* These Quectel products use Quectel's vendor ID */
 #define QUECTEL_PRODUCT_EC21   0x0121
 #define QUECTEL_PRODUCT_EC25   0x0125
+#define QUECTEL_PRODUCT_BG96   0x0296
 
 #define CMOTECH_VENDOR_ID  0x16d8
 #define CMOTECH_PRODUCT_6001   0x6001
@@ -1165,6 +1166,8 @@ static const struct usb_device_id option
  .driver_info = (kernel_ulong_t)_intf4_blacklist },
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
  .driver_info = (kernel_ulong_t)_intf4_blacklist },
+   { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+ .driver_info = (kernel_ulong_t)_intf4_blacklist },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),



[PATCH 3.2 002/140] KVM: VMX: do not try to reexecute failed instruction while emulating invalid guest state

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Gleb Natapov 

commit 991eebf9f8e523e7ff1e4d31ac80641582b2e57a upstream.

During invalid guest state emulation vcpu cannot enter guest mode to try
to reexecute instruction that emulator failed to emulate, so emulation
will happen again and again.  Prevent that by telling the emulator that
instruction reexecution should not be attempted.

Signed-off-by: Gleb Natapov 
[bwh: Backported to 3.2: adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -694,6 +694,7 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE (1 << 0)
 #define EMULTYPE_TRAP_UD   (1 << 1)
 #define EMULTYPE_SKIP  (1 << 2)
+#define EMULTYPE_NO_REEXECUTE  (1 << 4)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
 
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4891,7 +4891,7 @@ static int handle_invalid_guest_state(st
&& (kvm_get_rflags(>vcpu) & X86_EFLAGS_IF))
return handle_interrupt_window(>vcpu);
 
-   err = emulate_instruction(vcpu, 0);
+   err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
if (err == EMULATE_DO_MMIO) {
ret = 0;
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4888,10 +4888,14 @@ static int handle_emulation_failure(stru
return r;
 }
 
-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
+static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva,
+ int emulation_type)
 {
gpa_t gpa;
 
+   if (emulation_type & EMULTYPE_NO_REEXECUTE)
+   return false;
+
if (tdp_enabled)
return false;
 
@@ -4942,7 +4946,7 @@ int x86_emulate_instruction(struct kvm_v
if (r != EMULATION_OK)  {
if (emulation_type & EMULTYPE_TRAP_UD)
return EMULATE_FAIL;
-   if (reexecute_instruction(vcpu, cr2))
+   if (reexecute_instruction(vcpu, cr2, emulation_type))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
@@ -4969,7 +4973,7 @@ restart:
return EMULATE_DONE;
 
if (r == EMULATION_FAILED) {
-   if (reexecute_instruction(vcpu, cr2))
+   if (reexecute_instruction(vcpu, cr2, emulation_type))
return EMULATE_DONE;
 
return handle_emulation_failure(vcpu);



[PATCH 3.16 065/254] btrfs: Fix possible off-by-one in btrfs_search_path_in_tree

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Nikolay Borisov 

commit c8bcbfbd239ed60a6562964b58034ac8a25f4c31 upstream.

The name char array passed to btrfs_search_path_in_tree is of size
BTRFS_INO_LOOKUP_PATH_MAX (4080). So the actual accessible char indexes
are in the range of [0, 4079]. Currently the code uses the define but this
represents an off-by-one.

Implications:

Size of btrfs_ioctl_ino_lookup_args is 4096, so the new byte will be
written to extra space, not some padding that could be provided by the
allocator.

btrfs-progs store the arguments on stack, but kernel does own copy of
the ioctl buffer and the off-by-one overwrite does not affect userspace,
but the ending 0 might be lost.

Kernel ioctl buffer is allocated dynamically so we're overwriting
somebody else's memory, and the ioctl is privileged if args.objectid is
not 256. Which is in most cases, but resolving a subvolume stored in
another directory will trigger that path.

Before this patch the buffer was one byte larger, but then the -1 was
not added.

Fixes: ac8e9819d71f907 ("Btrfs: add search and inode lookup ioctls")
Signed-off-by: Nikolay Borisov 
Reviewed-by: David Sterba 
[ added implications ]
Signed-off-by: David Sterba 
Signed-off-by: Ben Hutchings 
---
 fs/btrfs/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2253,7 +2253,7 @@ static noinline int btrfs_search_path_in
if (!path)
return -ENOMEM;
 
-   ptr = [BTRFS_INO_LOOKUP_PATH_MAX];
+   ptr = [BTRFS_INO_LOOKUP_PATH_MAX - 1];
 
key.objectid = tree_id;
key.type = BTRFS_ROOT_ITEM_KEY;



[PATCH 3.2 031/140] net_sched: red: Avoid illegal values

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Nogah Frankel 

commit 8afa10cbe281b10371fee5a87ab266e48d71a7f9 upstream.

Check the qmin & qmax values doesn't overflow for the given Wlog value.
Check that qmin <= qmax.

Fixes: a783474591f2 ("[PKT_SCHED]: Generic RED layer")
Signed-off-by: Nogah Frankel 
Signed-off-by: David S. Miller 
[bwh: Backported to 3.2:
 - Drop changes in sch_sfq
 - Adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -124,6 +124,17 @@ static inline u32 red_rmask(u8 Plog)
return Plog < 32 ? ((1 << Plog) - 1) : ~0UL;
 }
 
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
+{
+   if (fls(qth_min) + Wlog > 32)
+   return false;
+   if (fls(qth_max) + Wlog > 32)
+   return false;
+   if (qth_max < qth_min)
+   return false;
+   return true;
+}
+
 static inline void red_set_parms(struct red_parms *p,
 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
 u8 Scell_log, u8 *stab)
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -479,6 +479,9 @@ static int choke_change(struct Qdisc *sc
 
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 
+   if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+   return -EINVAL;
+
if (ctl->limit > CHOKE_MAX_QUEUE)
return -EINVAL;
 
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -384,6 +384,9 @@ static inline int gred_change_vq(struct
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q;
 
+   if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+   return -EINVAL;
+
if (table->tab[dp] == NULL) {
table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC);
if (table->tab[dp] == NULL)
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -189,6 +189,8 @@ static int red_change(struct Qdisc *sch,
return -EINVAL;
 
ctl = nla_data(tb[TCA_RED_PARMS]);
+   if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+   return -EINVAL;
 
if (ctl->limit > 0) {
child = fifo_create_dflt(sch, _qdisc_ops, ctl->limit);



[PATCH 3.2 035/140] can: ems_usb: cancel urb on -EPIPE and -EPROTO

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Martin Kelly 

commit bd352e1adfe0d02d3ea7c8e3fb19183dc317e679 upstream.

In mcba_usb, we have observed that when you unplug the device, the driver will
endlessly resubmit failing URBs, which can cause CPU stalls. This issue
is fixed in mcba_usb by catching the codes seen on device disconnect
(-EPIPE and -EPROTO).

This driver also resubmits in the case of -EPIPE and -EPROTO, so fix it
in the same way.

Signed-off-by: Martin Kelly 
Signed-off-by: Marc Kleine-Budde 
Signed-off-by: Ben Hutchings 
---
 drivers/net/can/usb/ems_usb.c | 2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -292,6 +292,8 @@ static void ems_usb_read_interrupt_callb
 
case -ECONNRESET: /* unlink */
case -ENOENT:
+   case -EPIPE:
+   case -EPROTO:
case -ESHUTDOWN:
return;
 



[PATCH 3.2 064/140] n_tty: fix EXTPROC vs ICANON interaction with TIOCINQ (aka FIONREAD)

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Linus Torvalds 

commit 966031f340185eddd05affcf72b740549f056348 upstream.

We added support for EXTPROC back in 2010 in commit 26df6d13406d ("tty:
Add EXTPROC support for LINEMODE") and the intent was to allow it to
override some (all?) ICANON behavior.  Quoting from that original commit
message:

 There is a new bit in the termios local flag word, EXTPROC.
 When this bit is set, several aspects of the terminal driver
 are disabled.  Input line editing, character echo, and mapping
 of signals are all disabled.  This allows the telnetd to turn
 off these functions when in linemode, but still keep track of
 what state the user wants the terminal to be in.

but the problem turns out that "several aspects of the terminal driver
are disabled" is a bit ambiguous, and you can really confuse the n_tty
layer by setting EXTPROC and then causing some of the ICANON invariants
to no longer be maintained.

This fixes at least one such case (TIOCINQ) becoming unhappy because of
the confusion over whether ICANON really means ICANON when EXTPROC is set.

This basically makes TIOCINQ match the case of read: if EXTPROC is set,
we ignore ICANON.  Also, make sure to reset the ICANON state ie EXTPROC
changes, not just if ICANON changes.

Fixes: 26df6d13406d ("tty: Add EXTPROC support for LINEMODE")
Reported-by: Tetsuo Handa 
Reported-by: syzkaller 
Cc: Jiri Slaby 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
[bwh: Backported to 3.2: adjust context]
Signed-off-by: Ben Hutchings 
---
 drivers/tty/n_tty.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1459,7 +1459,7 @@ static void n_tty_set_termios(struct tty
BUG_ON(!tty);
 
if (old)
-   canon_change = (old->c_lflag ^ tty->termios->c_lflag) & ICANON;
+   canon_change = (old->c_lflag ^ tty->termios->c_lflag) & (ICANON 
| EXTPROC);
if (canon_change) {
memset(>read_flags, 0, sizeof tty->read_flags);
tty->canon_head = tty->read_tail;
@@ -2096,7 +2096,7 @@ static int n_tty_ioctl(struct tty_struct
case TIOCINQ:
/* FIXME: Locking */
retval = tty->read_cnt;
-   if (L_ICANON(tty))
+   if (L_ICANON(tty) && !L_EXTPROC(tty))
retval = inq_canon(tty);
return put_user(retval, (unsigned int __user *) arg);
default:



[PATCH 3.2 041/140] net: ipv4: fix for a race condition in raw_sendmsg

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Mohamed Ghannam 

commit 8f659a03a0ba9289b9aeb9b4470e6fb263d6f483 upstream.

inet->hdrincl is racy, and could lead to uninitialized stack pointer
usage, so its value should be read only once.

Fixes: c008ba5bdc9f ("ipv4: Avoid reading user iov twice after 
raw_probe_proto_opt")
Signed-off-by: Mohamed Ghannam 
Reviewed-by: Eric Dumazet 
Signed-off-by: David S. Miller 
[bwh: Backported to 3.2:
 - flowi4 flags don't depend on hdrincl
 - Adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -484,11 +484,16 @@ static int raw_sendmsg(struct kiocb *ioc
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+   int hdrincl;
 
err = -EMSGSIZE;
if (len > 0x)
goto out;
 
+   /* hdrincl should be READ_ONCE(inet->hdrincl)
+* but READ_ONCE() doesn't work with bit fields
+*/
+   hdrincl = inet->hdrincl;
/*
 *  Check the flags.
 */
@@ -564,7 +569,7 @@ static int raw_sendmsg(struct kiocb *ioc
/* Linux does not mangle headers on raw sockets,
 * so that IP options + IP_HDRINCL is non-sense.
 */
-   if (inet->hdrincl)
+   if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -585,11 +590,11 @@ static int raw_sendmsg(struct kiocb *ioc
 
flowi4_init_output(, ipc.oif, sk->sk_mark, tos,
   RT_SCOPE_UNIVERSE,
-  inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+  hdrincl ? IPPROTO_RAW : sk->sk_protocol,
   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
   daddr, saddr, 0, 0);
 
-   if (!inet->hdrincl) {
+   if (!hdrincl) {
rfv.iov = msg->msg_iov;
rfv.hlen = 0;
 
@@ -614,7 +619,7 @@ static int raw_sendmsg(struct kiocb *ioc
goto do_confirm;
 back_from_confirm:
 
-   if (inet->hdrincl)
+   if (hdrincl)
err = raw_send_hdrinc(sk, , msg->msg_iov, len,
  , msg->msg_flags);
 



[PATCH 3.2 039/140] ipv4: Use standard iovec primitive in raw_probe_proto_opt

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Herbert Xu 

commit 32b5913a931fd753faf3d4e1124b2bc2edb364da upstream.

The function raw_probe_proto_opt tries to extract the first two
bytes from the user input in order to seed the IPsec lookup for
ICMP packets.  In doing so it's processing iovec by hand and
overcomplicating things.

This patch replaces the manual iovec processing with a call to
memcpy_fromiovecend.

Signed-off-by: Herbert Xu 
Signed-off-by: David S. Miller 
Signed-off-by: Ben Hutchings 
---
 net/ipv4/raw.c | 50 +++---
 1 file changed, 11 insertions(+), 39 deletions(-)

--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -411,48 +411,20 @@ error:
 
 static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg)
 {
-   struct iovec *iov;
-   u8 __user *type = NULL;
-   u8 __user *code = NULL;
-   int probed = 0;
-   unsigned int i;
+   struct icmphdr icmph;
+   int err;
 
-   if (!msg->msg_iov)
+   if (fl4->flowi4_proto != IPPROTO_ICMP)
return 0;
 
-   for (i = 0; i < msg->msg_iovlen; i++) {
-   iov = >msg_iov[i];
-   if (!iov)
-   continue;
-
-   switch (fl4->flowi4_proto) {
-   case IPPROTO_ICMP:
-   /* check if one-byte field is readable or not. */
-   if (iov->iov_base && iov->iov_len < 1)
-   break;
-
-   if (!type) {
-   type = iov->iov_base;
-   /* check if code field is readable or not. */
-   if (iov->iov_len > 1)
-   code = type + 1;
-   } else if (!code)
-   code = iov->iov_base;
-
-   if (type && code) {
-   if (get_user(fl4->fl4_icmp_type, type) ||
-   get_user(fl4->fl4_icmp_code, code))
-   return -EFAULT;
-   probed = 1;
-   }
-   break;
-   default:
-   probed = 1;
-   break;
-   }
-   if (probed)
-   break;
-   }
+   /* We only need the first two bytes. */
+   err = memcpy_fromiovecend((void *), msg->msg_iov, 0, 2);
+   if (err)
+   return err;
+
+   fl4->fl4_icmp_type = icmph.type;
+   fl4->fl4_icmp_code = icmph.code;
+
return 0;
 }
 



Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a valid iova range

2018-02-28 Thread Auger Eric
Hi Shameer,

On 28/02/18 14:39, Shameerali Kolothum Thodi wrote:
> Hi Eric,
> 
>> -Original Message-
>> From: Auger Eric [mailto:eric.au...@redhat.com]
>> Sent: Wednesday, February 28, 2018 11:53 AM
>> To: Shameerali Kolothum Thodi ;
>> Alex Williamson 
>> Cc: pmo...@linux.vnet.ibm.com; k...@vger.kernel.org; linux-
>> ker...@vger.kernel.org; Linuxarm ; John Garry
>> ; xuwei (O) ; Robin Murphy
>> 
>> Subject: Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a 
>> valid
>> iova range
>>
>> Hi Shameer,
>>
>> On 28/02/18 10:25, Shameerali Kolothum Thodi wrote:
>>>
>>>
 -Original Message-
 From: Auger Eric [mailto:eric.au...@redhat.com]
 Sent: Wednesday, February 28, 2018 9:02 AM
 To: Shameerali Kolothum Thodi ;
 Alex Williamson 
 Cc: pmo...@linux.vnet.ibm.com; k...@vger.kernel.org; linux-
 ker...@vger.kernel.org; Linuxarm ; John Garry
 ; xuwei (O) ; Robin
>> Murphy
 
 Subject: Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a
>> valid
 iova range

 Hi Shameer,

 On 27/02/18 10:57, Shameerali Kolothum Thodi wrote:
>
>
>> -Original Message-
>> From: Auger Eric [mailto:eric.au...@redhat.com]
>> Sent: Tuesday, February 27, 2018 8:27 AM
>> To: Alex Williamson 
>> Cc: Shameerali Kolothum Thodi
>> ;
>> pmo...@linux.vnet.ibm.com; k...@vger.kernel.org; linux-
>> ker...@vger.kernel.org; Linuxarm ; John Garry
>> ; xuwei (O) ; Robin
 Murphy
>> 
>> Subject: Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a
 valid
>> iova range
>>
>> Hi,
>> On 27/02/18 00:13, Alex Williamson wrote:
>>> On Mon, 26 Feb 2018 23:05:43 +0100
>>> Auger Eric  wrote:
>>>
 Hi Shameer,

 [Adding Robin in CC]
 On 21/02/18 13:22, Shameer Kolothum wrote:
> This checks and rejects any dma map request outside valid iova
> range.
>
> Signed-off-by: Shameer Kolothum
>> 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 22 ++
>  1 file changed, 22 insertions(+)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c
>> b/drivers/vfio/vfio_iommu_type1.c
> index a80884e..3049393 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -970,6 +970,23 @@ static int vfio_pin_map_dma(struct
>> vfio_iommu
>> *iommu, struct vfio_dma *dma,
>   return ret;
>  }
>
> +/*
> + * Check dma map request is within a valid iova range
> + */
> +static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
> + dma_addr_t start, dma_addr_t end)
> +{
> + struct list_head *iova = >iova_list;
> + struct vfio_iova *node;
> +
> + list_for_each_entry(node, iova, list) {
> + if ((start >= node->start) && (end <= node->end))
> + return true;
 I am now confused by the fact this change will prevent existing QEMU
 from working with this series on some platforms. For instance QEMU
>> virt
 machine GPA space collides with Seattle PCI host bridge windows. On
 ARM
 the smmu and smmuv3 drivers report the PCI host bridge windows as
 reserved regions which does not seem to be the case on other
>> platforms.
 The change happened in commit
>> 273df9635385b2156851c7ee49f40658d7bcb29d
 (iommu/dma: Make PCI window reservation generic).

 For background, we already discussed the topic after LPC 2016. See
 https://www.spinics.net/lists/kernel/msg2379607.html.

 So is it the right choice to expose PCI host bridge windows as reserved
 regions? If yes shouldn't we make a difference between those and MSI
 windows in this series and do not reject any user space DMA_MAP
 attempt
 within PCI host bridge windows.
>>>
>>> If the QEMU machine GPA collides with a reserved region today, then
>>> either:
>>>
>>> a) The mapping through the IOMMU works and the reserved region is
 wrong
>>>
>>> or
>>>
>>> b) The mapping doesn't actually work, QEMU is at risk of data loss by
>>> being told that it worked, and we're justified in changing that
>>> 

RE: [PATCH 4.9 09/66] KVM: arm/arm64: Check pagesize when allocating a hugepage at Stage 2

2018-02-28 Thread Ioana Ciornei
> 4.9-stable review patch.  If anyone has any objections, please let me know.

I know that my response comes late but I just found out that the version of the 
patch applied in 4.9-stable, the one in this email, is different than the one 
applied on 4.14-stable.
This is the one applied on 4.14: https://patchwork.kernel.org/patch/10177715/

As you can see, the check against PMD_SIZE which is present in 4.14 is not 
present in this version of the patch.

Am I missing something or should I send a patch to rectify this?

Thanks,
Ioana Ciornei

> 
> --
> 
> From: Punit Agrawal 
> 
> commit c507babf10ead4d5c8cca704539b170752a8ac84 upstream.
> 
> KVM only supports PMD hugepages at stage 2 but doesn't actually check that
> the provided hugepage memory pagesize is PMD_SIZE before populating
> stage 2 entries.
> 
> In cases where the backing hugepage size is smaller than PMD_SIZE (such as
> when using contiguous hugepages), KVM can end up creating stage 2
> mappings that extend beyond the supplied memory.
> 
> Fix this by checking for the pagesize of userspace vma before creating PMD
> hugepage at stage 2.
> 
> Fixes: 66b3923a1a0f77a ("arm64: hugetlb: add support for PTE contiguous
> bit")
> Signed-off-by: Punit Agrawal 
> Cc: Marc Zyngier 
> Reviewed-by: Christoffer Dall 
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Greg Kroah-Hartman 
> 
> ---
>  arch/arm/kvm/mmu.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcp
>   return -EFAULT;
>   }
> 
> - if (is_vm_hugetlb_page(vma) && !logging_active) {
> + if (vma_kernel_pagesize(vma) && !logging_active) {
>   hugetlb = true;
>   gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
>   } else {
> 



[PATCH 3.2 056/140] USB: serial: option: adding support for YUGA CLM920-NC5

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: SZ Lin (林上智)
 

commit 3920bb713038810f25770e7545b79f204685c8f2 upstream.

This patch adds support for YUGA CLM920-NC5 PID 0x9625 USB modem to option
driver.

Interface layout:
0: QCDM/DIAG
1: ADB
2: MODEM
3: AT
4: RMNET

Signed-off-by: Taiyi Wu 
Signed-off-by: SZ Lin (林上智) 
Signed-off-by: Johan Hovold 
Signed-off-by: Ben Hutchings 
---
 drivers/usb/serial/option.c | 9 +
 1 file changed, 9 insertions(+)

--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -237,6 +237,8 @@ static void option_instat_callback(struc
 /* These Quectel products use Qualcomm's vendor ID */
 #define QUECTEL_PRODUCT_UC20   0x9003
 #define QUECTEL_PRODUCT_UC15   0x9090
+/* These Yuga products use Qualcomm's vendor ID */
+#define YUGA_PRODUCT_CLM920_NC50x9625
 
 #define QUECTEL_VENDOR_ID  0x2c7c
 /* These Quectel products use Quectel's vendor ID */
@@ -657,6 +659,10 @@ static const struct option_blacklist_inf
.reserved = BIT(1) | BIT(2) | BIT(3),
 };
 
+static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
+   .reserved = BIT(1) | BIT(4),
+};
+
 static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1161,6 +1167,9 @@ static const struct usb_device_id option
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
  .driver_info = (kernel_ulong_t)_intf4_blacklist },
+   /* Yuga products use Qualcomm vendor ID */
+   { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
+ .driver_info = (kernel_ulong_t)_clm920_nc5_blacklist },
/* Quectel products using Quectel vendor ID */
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
  .driver_info = (kernel_ulong_t)_intf4_blacklist },



Re: [PATCH v15 08/11] fw_cfg: handle fw_cfg_read_blob() error

2018-02-28 Thread Michael S. Tsirkin
On Wed, Feb 28, 2018 at 12:49:35PM +0100, Marc-André Lureau wrote:
> Hi
> 
> On Tue, Feb 27, 2018 at 1:20 AM, Michael S. Tsirkin  wrote:
> > On Thu, Feb 15, 2018 at 10:33:09PM +0100, Marc-André Lureau wrote:
> >> fw_cfg_read_blob() may fail, but does not return error. This may lead
> >> to undefined behaviours, such as a memcmp(sig, "QEMU") on uninitilized
> >> memory.
> >
> > I don't think that's true - there's a memset there that
> > will initialize the memory. probe is likely the only
> > case where it returns a slightly incorrect data.
> 
> Right, I'll update the commit message.
> 
> >> Return an error if ACPI locking failed. Also, the following
> >> DMA read/write extension will add more error paths that should be
> >> handled appropriately.
> >>
> >> Signed-off-by: Marc-André Lureau 
> >> ---
> >>  drivers/firmware/qemu_fw_cfg.c | 32 ++--
> >>  1 file changed, 22 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/drivers/firmware/qemu_fw_cfg.c 
> >> b/drivers/firmware/qemu_fw_cfg.c
> >> index f6f90bef604c..5e6e5ac71dab 100644
> >> --- a/drivers/firmware/qemu_fw_cfg.c
> >> +++ b/drivers/firmware/qemu_fw_cfg.c
> >> @@ -59,8 +59,8 @@ static void fw_cfg_sel_endianness(u16 key)
> >>  }
> >>
> >>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) 
> >> */
> >> -static void fw_cfg_read_blob(u16 key,
> >> - void *buf, loff_t pos, size_t count)
> >> +static ssize_t fw_cfg_read_blob(u16 key,
> >> + void *buf, loff_t pos, size_t count)
> >>  {
> >>   u32 glk = -1U;
> >>   acpi_status status;
> >> @@ -73,7 +73,7 @@ static void fw_cfg_read_blob(u16 key,
> >>   /* Should never get here */
> >>   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
> >>   memset(buf, 0, count);
> >> - return;
> >> + return -EINVAL;
> >>   }
> >>
> >>   mutex_lock(_cfg_dev_lock);
> >
> > Wouldn't something like -EBUSY be more appropriate?
> 
> In theory, it would be a general failure right? I don't think we want
> the caller to retry. I think in EINVAL fits better, but I don't think
> it matters much this or EBUSY.
> 
> >> @@ -84,6 +84,7 @@ static void fw_cfg_read_blob(u16 key,
> >>   mutex_unlock(_cfg_dev_lock);
> >>
> >>   acpi_release_global_lock(glk);
> >> + return count;
> >>  }
> >>
> >>  /* clean up fw_cfg device i/o */
> >> @@ -165,8 +166,9 @@ static int fw_cfg_do_platform_probe(struct 
> >> platform_device *pdev)
> >>   }
> >>
> >>   /* verify fw_cfg device signature */
> >> - fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
> >> - if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
> >> + if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
> >> + 0, FW_CFG_SIG_SIZE) < 0 ||
> >> + memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
> >>   fw_cfg_io_cleanup();
> >>   return -ENODEV;
> >>   }
> >> @@ -326,8 +328,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file 
> >> *filp, struct kobject *kobj,
> >>   if (count > entry->size - pos)
> >>   count = entry->size - pos;
> >>
> >> - fw_cfg_read_blob(entry->select, buf, pos, count);
> >> - return count;
> >> + return fw_cfg_read_blob(entry->select, buf, pos, count);
> >>  }
> >>
> >>  static struct bin_attribute fw_cfg_sysfs_attr_raw = {
> >> @@ -483,7 +484,11 @@ static int fw_cfg_register_dir_entries(void)
> >>   struct fw_cfg_file *dir;
> >>   size_t dir_size;
> >>
> >> - fw_cfg_read_blob(FW_CFG_FILE_DIR, _count, 0, 
> >> sizeof(files_count));
> >> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, _count,
> >> + 0, sizeof(files_count));
> >> + if (ret < 0)
> >> + return ret;
> >> +
> >>   count = be32_to_cpu(files_count);
> >>   dir_size = count * sizeof(struct fw_cfg_file);
> >>
> >> @@ -491,7 +496,10 @@ static int fw_cfg_register_dir_entries(void)
> >>   if (!dir)
> >>   return -ENOMEM;
> >>
> >> - fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), 
> >> dir_size);
> >> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
> >> + sizeof(files_count), dir_size);
> >> + if (ret < 0)
> >> + goto end;
> >>
> >>   for (i = 0; i < count; i++) {
> >>   ret = fw_cfg_register_file([i]);
> >> @@ -499,6 +507,7 @@ static int fw_cfg_register_dir_entries(void)
> >>   break;
> >>   }
> >>
> >> +end:
> >>   kfree(dir);
> >>   return ret;
> >>  }
> >> @@ -539,7 +548,10 @@ static int fw_cfg_sysfs_probe(struct platform_device 
> >> *pdev)
> >>   goto err_probe;
> >>
> >>   /* get revision number, add matching top-level attribute */
> >> - fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev));
> >> + err = fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev));
> >> + if (err < 0)
> 

[PATCH 3.16 006/254] iommu/vt-d: Fix scatterlist offset handling

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Robin Murphy 

commit 29a90b70893817e2f2bb3cea40a29f5308e21b21 upstream.

The intel-iommu DMA ops fail to correctly handle scatterlists where
sg->offset is greater than PAGE_SIZE - the IOVA allocation is computed
appropriately based on the page-aligned portion of the offset, but the
mapping is set up relative to sg->page, which means it fails to actually
cover the whole buffer (and in the worst case doesn't cover it at all):

(sg->dma_address + sg->dma_len) +
sg->dma_address -+  |
iov_pfn--+   |  |
 |   |  |
 v   v  v
iova:   abcdef
||||||
  <...calculated>
 [_mapped__]
pfn:012345
||||||
 ^   ^  ^
 |   |  |
sg->page +   |  |
sg->offset --+  |
(sg->offset + sg->length) --+

As a result, the caller ends up overrunning the mapping into whatever
lies beyond, which usually goes badly:

[  429.645492] DMAR: DRHD: handling fault status reg 2
[  429.650847] DMAR: [DMA Write] Request device [02:00.4] fault addr f2682000 
...

Whilst this is a fairly rare occurrence, it can happen from the result
of intermediate scatterlist processing such as scatterwalk_ffwd() in the
crypto layer. Whilst that particular site could be fixed up, it still
seems worthwhile to bring intel-iommu in line with other DMA API
implementations in handling this robustly.

To that end, fix the intel_map_sg() path to line up the mapping
correctly (in units of MM pages rather than VT-d pages to match the
aligned_nrpages() calculation) regardless of the offset, and use
sg_phys() consistently for clarity.

Reported-by: Harsh Jain 
Signed-off-by: Robin Murphy 
Reviewed by: Ashok Raj 
Tested by: Jacob Pan 
Signed-off-by: Alex Williamson 
Signed-off-by: Ben Hutchings 
---
 drivers/iommu/intel-iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2008,10 +2008,12 @@ static int __domain_mapping(struct dmar_
uint64_t tmp;
 
if (!sg_res) {
+   unsigned int pgoff = sg->offset & ~PAGE_MASK;
+
sg_res = aligned_nrpages(sg->offset, sg->length);
-   sg->dma_address = ((dma_addr_t)iov_pfn << 
VTD_PAGE_SHIFT) + sg->offset;
+   sg->dma_address = ((dma_addr_t)iov_pfn << 
VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
-   pteval = page_to_phys(sg_page(sg)) | prot;
+   pteval = (sg_phys(sg) - pgoff) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
 
@@ -3345,7 +3347,7 @@ static int intel_nontranslate_map_sg(str
 
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
-   sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
+   sg->dma_address = sg_phys(sg);
sg->dma_length = sg->length;
}
return nelems;



[PATCH 3.2 038/140] xhci: Don't add a virt_dev to the devs array before it's fully allocated

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Mathias Nyman 

commit 5d9b70f7d52eb14bb37861c663bae44de9521c35 upstream.

Avoid null pointer dereference if some function is walking through the
devs array accessing members of a new virt_dev that is mid allocation.

Add the virt_dev to xhci->devs[i] _after_ the virt_device and all its
members are properly allocated.

issue found by KASAN: null-ptr-deref in xhci_find_slot_id_by_port

"Quick analysis suggests that xhci_alloc_virt_device() is not mutex
protected. If so, there is a time frame where xhci->devs[slot_id] is set
but not fully initialized. Specifically, xhci->devs[i]->udev can be NULL."

Signed-off-by: Mathias Nyman 
Signed-off-by: Greg Kroah-Hartman 
[bwh: Backported to 3.2: There is an extra failure path, so we may need to
 free dev->eps[0].ring] 
Signed-off-by: Ben Hutchings 
---
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -873,10 +873,9 @@ int xhci_alloc_virt_device(struct xhci_h
return 0;
}
 
-   xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags);
-   if (!xhci->devs[slot_id])
+   dev = kzalloc(sizeof(*dev), flags);
+   if (!dev)
return 0;
-   dev = xhci->devs[slot_id];
 
/* Allocate the (output) device context that will be used in the HC. */
dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, 
flags);
@@ -925,9 +924,19 @@ int xhci_alloc_virt_device(struct xhci_h
 >dcbaa->dev_context_ptrs[slot_id],
 le64_to_cpu(xhci->dcbaa->dev_context_ptrs[slot_id]));
 
+   xhci->devs[slot_id] = dev;
+
return 1;
 fail:
-   xhci_free_virt_device(xhci, slot_id);
+
+   if (dev->eps[0].ring)
+   xhci_ring_free(xhci, dev->eps[0].ring);
+   if (dev->in_ctx)
+   xhci_free_container_ctx(xhci, dev->in_ctx);
+   if (dev->out_ctx)
+   xhci_free_container_ctx(xhci, dev->out_ctx);
+   kfree(dev);
+
return 0;
 }
 



[PATCH 3.16 007/254] btrfs: clear space cache inode generation always

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Josef Bacik 

commit 8e138e0d92c6c9d3d481674fb14e3439b495be37 upstream.

We discovered a box that had double allocations, and suspected the space
cache may be to blame.  While auditing the write out path I noticed that
if we've already setup the space cache we will just carry on.  This
means that any error we hit after cache_save_setup before we go to
actually write the cache out we won't reset the inode generation, so
whatever was already written will be considered correct, except it'll be
stale.  Fix this by _always_ resetting the generation on the block group
inode, this way we only ever have valid or invalid cache.

With this patch I was no longer able to reproduce cache corruption with
dm-log-writes and my bpf error injection tool.

Signed-off-by: Josef Bacik 
Signed-off-by: David Sterba 
Signed-off-by: Ben Hutchings 
---
 fs/btrfs/extent-tree.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3236,13 +3236,6 @@ again:
goto again;
}
 
-   /* We've already setup this transaction, go ahead and exit */
-   if (block_group->cache_generation == trans->transid &&
-   i_size_read(inode)) {
-   dcs = BTRFS_DC_SETUP;
-   goto out_put;
-   }
-
/*
 * We want to set the generation to 0, that way if anything goes wrong
 * from here on out we know not to trust this cache when we load up next
@@ -3252,6 +3245,13 @@ again:
ret = btrfs_update_inode(trans, root, inode);
WARN_ON(ret);
 
+   /* We've already setup this transaction, go ahead and exit */
+   if (block_group->cache_generation == trans->transid &&
+   i_size_read(inode)) {
+   dcs = BTRFS_DC_SETUP;
+   goto out_put;
+   }
+
if (i_size_read(inode) > 0) {
ret = btrfs_check_trunc_cache_free_space(root,
>fs_info->global_block_rsv);



[PATCH 3.2 032/140] ALSA: pcm: prevent UAF in snd_pcm_info

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Robb Glasser 

commit 362bca57f5d78220f8b5907b875961af9436e229 upstream.

When the device descriptor is closed, the `substream->runtime` pointer
is freed. But another thread may be in the ioctl handler, case
SNDRV_CTL_IOCTL_PCM_INFO. This case calls snd_pcm_info_user() which
calls snd_pcm_info() which accesses the now freed `substream->runtime`.

Note: this fixes CVE-2017-0861

Signed-off-by: Robb Glasser 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Takashi Iwai 
Signed-off-by: Ben Hutchings 
---
 sound/core/pcm.c | 2 ++
 1 file changed, 2 insertions(+)

--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -145,7 +145,9 @@ static int snd_pcm_control_ioctl(struct
err = -ENXIO;
goto _error;
}
+   mutex_lock(>open_mutex);
err = snd_pcm_info_user(substream, info);
+   mutex_unlock(>open_mutex);
_error:
mutex_unlock(_mutex);
return err;



[PATCH 3.16 040/254] media: dvb: i2c transfers over usb cannot be done from stack

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Laurent Caumont 

commit 6d33377f2abbf9f0e561b116dd468d1c3ff36a6a upstream.

Signed-off-by: Laurent Caumont 
Signed-off-by: Sean Young 
Signed-off-by: Mauro Carvalho Chehab 
Signed-off-by: Ben Hutchings 
---
 drivers/media/usb/dvb-usb/dibusb-common.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

--- a/drivers/media/usb/dvb-usb/dibusb-common.c
+++ b/drivers/media/usb/dvb-usb/dibusb-common.c
@@ -179,8 +179,20 @@ EXPORT_SYMBOL(dibusb_i2c_algo);
 
 int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val)
 {
-   u8 wbuf[1] = { offs };
-   return dibusb_i2c_msg(d, 0x50, wbuf, 1, val, 1);
+   u8 *buf;
+   int rc;
+
+   buf = kmalloc(2, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   buf[0] = offs;
+
+   rc = dibusb_i2c_msg(d, 0x50, [0], 1, [1], 1);
+   *val = buf[1];
+   kfree(buf);
+
+   return rc;
 }
 EXPORT_SYMBOL(dibusb_read_eeprom_byte);
 



Re: [PATCH 3/3] x86/kvm/hyper-v: inject #GP only when invalid SINTx vector is unmasked

2018-02-28 Thread Vitaly Kuznetsov
Roman Kagan  writes:

> On Wed, Feb 28, 2018 at 02:44:01PM +0100, Vitaly Kuznetsov wrote:
>> Hyper-V 2016 on KVM with SynIC enabled doesn't boot with the following
>> trace:
>> 
>> kvm_entry:vcpu 0
>> kvm_exit: reason MSR_WRITE rip 0xf8000131c1e5 info 0 0
>> kvm_hv_synic_set_msr: vcpu_id 0 msr 0x4090 data 0x1 host 0
>> kvm_msr:  msr_write 4090 = 0x1 (#GP)
>> kvm_inj_exception:#GP (0x0)
>
> I don't remember having seen this...  Does this happen with the mainline
> QEMU, which doesn't set the SintPollingModeAvailable (17) bit in cpuid
> 0x4003:edx?

Yes, you need to have Hyper-V role enabled, kvm-intel modules needs to
be loaded with 'nesting' support enabled.

>
>> 
>> KVM acts according to the following statement from TLFS:
>> 
>> "
>> 11.8.4 SINTx Registers
>> ...
>> Valid values for vector are 16-255 inclusive. Specifying an invalid
>> vector number results in #GP.
>> "
>> 
>> However, I checked and genuine Hyper-V doesn't #GP when we write 0x1
>> to SINTx. I checked with Microsoft and they confirmed that if either the
>> Masked bit (bit 16) or the Polling bit (bit 18) is set to 1, then they
>> ignore the value of Vector. Make KVM act accordingly.
>
> I wonder if that cpuid setting affects this behavior?  Also curious what
> exactly the guest is trying to achieve writing this bogus value?

The value is actually the default value which is supposed to be there:

"At virtual processor creation time, the default value of all SINTx
(synthetic interrupt source) registers is 0x0001." so I
guess this is just an intialization procedure.

>
>> 
>> Signed-off-by: Vitaly Kuznetsov 
>> ---
>>  arch/x86/include/uapi/asm/hyperv.h | 1 +
>>  arch/x86/kvm/hyperv.c  | 7 ++-
>>  2 files changed, 7 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/x86/include/uapi/asm/hyperv.h 
>> b/arch/x86/include/uapi/asm/hyperv.h
>> index 62c778a303a1..a492dc357bd7 100644
>> --- a/arch/x86/include/uapi/asm/hyperv.h
>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>> @@ -326,6 +326,7 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
>>  #define HV_SYNIC_SIEFP_ENABLE   (1ULL << 0)
>>  #define HV_SYNIC_SINT_MASKED(1ULL << 16)
>>  #define HV_SYNIC_SINT_AUTO_EOI  (1ULL << 17)
>> +#define HV_SYNIC_SINT_POLLING   (1ULL << 18)
>>  #define HV_SYNIC_SINT_VECTOR_MASK   (0xFF)
>>  
>>  #define HV_SYNIC_STIMER_COUNT   (4)
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index 6d14f808145d..d3d866c32976 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -95,9 +95,14 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic 
>> *synic, int sint,
>>u64 data, bool host)
>>  {
>>  int vector, old_vector;
>> +bool masked, polling;
>>  
>>  vector = data & HV_SYNIC_SINT_VECTOR_MASK;
>> -if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host)
>> +masked = data & HV_SYNIC_SINT_MASKED;
>> +polling = data & HV_SYNIC_SINT_POLLING;
>> +
>> +if (vector < HV_SYNIC_FIRST_VALID_VECTOR &&
>> +!host && !masked && !polling)
>>  return 1;
>>  /*
>>   * Guest may configure multiple SINTs to use the same vector, so
>
> I'm not sure this is enough to implement the polling mode: per spec,
>

Oh, no, I wasn't trying to -- and by the way we don't currently announce
SintPollingModeAvailable so guests are not supposed to do that. This is
rather a future proof to 'not forget'.

>> Setting the polling bit will have the effect of unmasking an interrupt
>> source, except that an actual interrupt is not generated.
>
> However, if the guest sets a valid vector and the masked bit cleared,
> we'll consider it a usual SINT and add to masks and inject interrupts,
> etc, regardless of the polling bit.
>
> I must admit I'm confused by the above quote from the spec: is the
> polling bit supposed to come together with the masked bit?  If so, then
> we probably should validate it here (but your logs indicate otherwise).
> In general I'm missing the utility of this mode: why should an interrupt
> controller be involved in polling at all?

"Setting the polling bit will have the effect of unmasking an interrupt
source, except that an actual interrupt is not generated."

So, as I understand it, setting polling bit makes Vector value
irrelevant - the interrupt is not generated so I *assume* we may see
writes with zero Vector and polling bit set. But again, we're not
implementing polling mode for now, I can just drop it from the patch if
you think it is confusing.

-- 
  Vitaly


[PATCH 3.16 021/254] USB: usbfs: Filter flags passed in from user space

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Oliver Neukum 

commit 446f666da9f019ce2ffd03800995487e79a91462 upstream.

USBDEVFS_URB_ISO_ASAP must be accepted only for ISO endpoints.
Improve sanity checking.

Reported-by: Andrey Konovalov 
Signed-off-by: Oliver Neukum 
Acked-by: Alan Stern 
Signed-off-by: Greg Kroah-Hartman 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 drivers/usb/core/devio.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1295,14 +1295,18 @@ static int proc_do_submiturb(struct usb_
int number_of_packets = 0;
unsigned int stream_id = 0;
void *buf;
-
-   if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
-   USBDEVFS_URB_SHORT_NOT_OK |
+   unsigned long mask =USBDEVFS_URB_SHORT_NOT_OK |
USBDEVFS_URB_BULK_CONTINUATION |
USBDEVFS_URB_NO_FSBR |
USBDEVFS_URB_ZERO_PACKET |
-   USBDEVFS_URB_NO_INTERRUPT))
-   return -EINVAL;
+   USBDEVFS_URB_NO_INTERRUPT;
+   /* USBDEVFS_URB_ISO_ASAP is a special case */
+   if (uurb->type == USBDEVFS_URB_TYPE_ISO)
+   mask |= USBDEVFS_URB_ISO_ASAP;
+
+   if (uurb->flags & ~mask)
+   return -EINVAL;
+
if (uurb->buffer_length > 0 && !uurb->buffer)
return -EINVAL;
if (!(uurb->type == USBDEVFS_URB_TYPE_CONTROL &&



Re: [PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
On Wed, Feb 28, 2018 at 4:35 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 28, 2018 at 01:27:02PM +0100, Marc-André Lureau wrote:
>> Hi
>>
>> On Tue, Feb 27, 2018 at 1:04 AM, Michael S. Tsirkin  wrote:
>> > On Thu, Feb 15, 2018 at 10:33:12PM +0100, Marc-André Lureau wrote:
>> >> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> >> Return errors, because the operation may fail.
>> >>
>> >> So far, only one call in fw_cfg_register_dir_entries() is using
>> >> kmalloc'ed buf and is thus clearly eligible to DMA read.
>> >>
>> >> Initially, I didn't implement DMA read to speed up boot time, but as a
>> >> first step before introducing DMA write (since read operations were
>> >> already presents). Even more, I didn't realize fw-cfg entries were
>> >> being read by the kernel during boot by default. But actally fw-cfg
>> >> entries are being populated during module probe. I knew DMA improved a
>> >> lot bios boot time (the main reason the DMA interface was added
>> >> afaik). Let see the time it would take to read the whole ACPI
>> >> tables (128kb allocated)
>> >>
>> >>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>> >>   - with DMA: sys 0m0.003s
>> >>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>> >>
>> >> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> >> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> >> small (1-2kb). Since it does not expose itself, in order to measure
>> >> the time it takes to read such small file, I took a comparable sized
>> >> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> >> modified read_raw enabling DMA)
>> >>
>> >>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw 
>> >> >/dev/null
>> >>   - with DMA:
>> >>   0.636037  task-clock (msec) #0.141 CPUs 
>> >> utilized( +-  1.19% )
>> >>   - without DMA:
>> >>   6.430128  task-clock (msec) #0.622 CPUs 
>> >> utilized( +-  0.22% )
>> >>
>> >> That's a few msec saved during boot by enabling DMA read (the gain
>> >> would be more substantial if other & bigger fw-cfg entries are read by
>> >> others from sysfs, unfortunately, it's not clear if we can always
>> >> enable DMA there)
>> >>
>> >> Signed-off-by: Marc-André Lureau 
>> >> ---
>> >>  drivers/firmware/qemu_fw_cfg.c | 61 
>> >> ++
>> >>  1 file changed, 50 insertions(+), 11 deletions(-)
>> >>
>> >> diff --git a/drivers/firmware/qemu_fw_cfg.c 
>> >> b/drivers/firmware/qemu_fw_cfg.c
>> >> index 3015e77aebca..94df57e9be66 100644
>> >> --- a/drivers/firmware/qemu_fw_cfg.c
>> >> +++ b/drivers/firmware/qemu_fw_cfg.c
>> >> @@ -124,12 +124,47 @@ static ssize_t fw_cfg_dma_transfer(void *address, 
>> >> u32 length, u32 control)
>> >>   return ret;
>> >>  }
>> >>
>> >> +/* with acpi & dev locks taken */
>> >> +static ssize_t fw_cfg_read_blob_dma(u16 key,
>> >> + void *buf, loff_t pos, size_t count)
>> >> +{
>> >> + ssize_t ret;
>> >> +
>> >> + if (pos == 0) {
>> >> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> >> + | FW_CFG_DMA_CTL_SELECT
>> >> + | FW_CFG_DMA_CTL_READ);
>> >> + } else {
>> >> + fw_cfg_sel_endianness(key);
>> >> + ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
>> >> + if (ret < 0)
>> >> + return ret;
>> >> + ret = fw_cfg_dma_transfer(buf, count,
>> >> + FW_CFG_DMA_CTL_READ);
>> >> + }
>> >> +
>> >> + return ret;
>> >> +}
>> >> +
>> >> +/* with acpi & dev locks taken */
>> >> +static ssize_t fw_cfg_read_blob_io(u16 key,
>> >> + void *buf, loff_t pos, size_t count)
>> >> +{
>> >> + fw_cfg_sel_endianness(key);
>> >> + while (pos-- > 0)
>> >> + ioread8(fw_cfg_reg_data);
>> >> + ioread8_rep(fw_cfg_reg_data, buf, count);
>> >> + return count;
>> >> +}
>> >> +
>> >>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) 
>> >> */
>> >>  static ssize_t fw_cfg_read_blob(u16 key,
>> >> - void *buf, loff_t pos, size_t count)
>> >> + void *buf, loff_t pos, size_t count,
>> >> + bool dma)
>> >>  {
>> >>   u32 glk = -1U;
>> >>   acpi_status status;
>> >> + ssize_t ret;
>> >>
>> >>   /* If we have ACPI, ensure mutual exclusion against any potential
>> >>* device access by the firmware, e.g. via AML methods:
>> >
>> > so this adds a dma flag to fw_cfg_read_blob.
>> >
>> >
>> >
>> >> @@ -143,14 +178,17 @@ static ssize_t fw_cfg_read_blob(u16 key,
>> >>   }
>> >>
>> >>   mutex_lock(_cfg_dev_lock);
>> >> - fw_cfg_sel_endianness(key);
>> >> - 

Re: [PATCH 2/2] serial: stm32: update interrupt initialization

2018-02-28 Thread Andy Shevchenko
On Wed, Feb 28, 2018 at 5:40 PM, Andy Shevchenko
 wrote:
> On Wed, Feb 28, 2018 at 12:51 PM, Bich HEMON  wrote:

>> -   port->irq   = platform_get_irq(pdev, 0);
>> -   stm32port->wakeirq = platform_get_irq(pdev, 1);

>> +   stm32port->wakeirq = platform_get_irq_byname(pdev, "wakeup");
>
> But this one is redundant since Tony's patch:
> https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git/commit/?h=bleeding-edge=da997b22c40473b7db60bde6ea188d35565d10c8

Oh, it'a bout getting a resource by name...

Sorry, discard my previous comment.


-- 
With Best Regards,
Andy Shevchenko


[PATCH 3.16 042/254] can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback()

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Jimmy Assarsson 

commit e84f44eb5523401faeb9cc1c97895b68e3cfb78d upstream.

The conditon in the while-loop becomes true when actual_length is less than
2 (MSG_HEADER_LEN). In best case we end up with a former, already
dispatched msg, that got msg->len greater than actual_length. This will
result in a "Format error" error printout.

Problem seen when unplugging a Kvaser USB device connected to a vbox guest.

warning: comparison between signed and unsigned integer expressions
[-Wsign-compare]

Signed-off-by: Jimmy Assarsson 
Signed-off-by: Marc Kleine-Budde 
Signed-off-by: Ben Hutchings 
---
 drivers/net/can/usb/kvaser_usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -983,7 +983,7 @@ static void kvaser_usb_read_bulk_callbac
goto resubmit_urb;
}
 
-   while (pos <= urb->actual_length - MSG_HEADER_LEN) {
+   while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) {
msg = urb->transfer_buffer + pos;
 
/* The Kvaser firmware can only read and write messages that



[PATCH 3.2 006/140] scsi: dma-mapping: always provide dma_get_cache_alignment

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Christoph Hellwig 

commit 860dd4424f344400b491b212ee4acb3a358ba9d9 upstream.

Provide the dummy version of dma_get_cache_alignment that always returns
1 even if CONFIG_HAS_DMA is not set, so that drivers and subsystems can
use it without ifdefs.

Signed-off-by: Christoph Hellwig 
Signed-off-by: Martin K. Petersen 
[bwh: Backported to 3.2: Also delete the conflicting declaration in
 ]
Signed-off-by: Ben Hutchings 
---
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -130,7 +130,6 @@ static inline void *dma_zalloc_coherent(
return ret;
 }
 
-#ifdef CONFIG_HAS_DMA
 static inline int dma_get_cache_alignment(void)
 {
 #ifdef ARCH_DMA_MINALIGN
@@ -138,7 +137,6 @@ static inline int dma_get_cache_alignmen
 #endif
return 1;
 }
-#endif
 
 /* flags for the coherent memory api */
 #defineDMA_MEMORY_MAP  0x01
--- a/include/asm-generic/dma-mapping-broken.h
+++ b/include/asm-generic/dma-mapping-broken.h
@@ -69,9 +69,6 @@ dma_supported(struct device *dev, u64 ma
 extern int
 dma_set_mask(struct device *dev, u64 mask);
 
-extern int
-dma_get_cache_alignment(void);
-
 extern void
 dma_cache_sync(struct device *dev, void *vaddr, size_t size,
   enum dma_data_direction direction);



Re: [PATCH v2 4/6] x86: Consolidate PCI_MMCONFIG configs

2018-02-28 Thread Andy Shevchenko
On Wed, Feb 28, 2018 at 8:34 AM, Jan Kiszka  wrote:
> From: Jan Kiszka 
>
> Not sure if those two worked by design or just by chance so far. In any
> case, it's at least cleaner and clearer to express this in a single
> config statement.

I would add a reference to the commit which brought that in the first place.

>
> Signed-off-by: Jan Kiszka 
> ---
>  arch/x86/Kconfig | 9 +++--
>  1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index eb7f43f23521..63e85e7da12e 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -2641,8 +2641,9 @@ config PCI_DIRECT
> depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC 
> || PCI_GOMMCONFIG))
>
>  config PCI_MMCONFIG
> -   def_bool y
> -   depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || 
> PCI_GOANY)
> +   bool "Support mmconfig PCI config space access" if X86_64
> +   default y
> +   depends on PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY || 
> X86_64)

Looking to the above context I would rather put it like

depends on PCI && (ACPI || SFI) && (X86_64 || (PCI_GOANY || PCI_GOMMCONFIG))

-- 
With Best Regards,
Andy Shevchenko


[PATCH 3.2 055/140] xfrm: Reinject transport-mode packets through tasklet

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Herbert Xu 

commit acf568ee859f098279eadf551612f103afdacb4e upstream.

This is an old bugbear of mine:

https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html

By crafting special packets, it is possible to cause recursion
in our kernel when processing transport-mode packets at levels
that are only limited by packet size.

The easiest one is with DNAT, but an even worse one is where
UDP encapsulation is used in which case you just have to insert
an UDP encapsulation header in between each level of recursion.

This patch avoids this problem by reinjecting tranport-mode packets
through a tasklet.

Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks")
Signed-off-by: Herbert Xu 
Signed-off-by: Steffen Klassert 
[bwh: Backported to 3.2:
 - netfilter finish callbacks only receive an sk_buff pointer
 - Adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1439,6 +1439,7 @@ extern int xfrm_prepare_input(struct xfr
 extern int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi,
  int encap_type);
 extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct sk_buff *));
 extern int xfrm_output_resume(struct sk_buff *skb, int err);
 extern int xfrm_output(struct sk_buff *skb);
 extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff 
*skb);
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,6 +22,11 @@ int xfrm4_extract_input(struct xfrm_stat
return xfrm4_extract_header(skb);
 }
 
+static int xfrm4_rcv_encap_finish2(struct sk_buff *skb)
+{
+   return dst_input(skb);
+}
+
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
if (skb_dst(skb) == NULL) {
@@ -31,7 +36,11 @@ static inline int xfrm4_rcv_encap_finish
 iph->tos, skb->dev))
goto drop;
}
-   return dst_input(skb);
+
+   if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+   goto drop;
+
+   return 0;
 drop:
kfree_skb(skb);
return NET_RX_DROP;
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -29,6 +29,13 @@ int xfrm6_rcv_spi(struct sk_buff *skb, i
 }
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
+static int xfrm6_transport_finish2(struct sk_buff *skb)
+{
+   if (xfrm_trans_queue(skb, ip6_rcv_finish))
+   __kfree_skb(skb);
+   return -1;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
skb_network_header(skb)[IP6CB(skb)->nhoff] =
@@ -43,7 +50,7 @@ int xfrm6_transport_finish(struct sk_buf
__skb_push(skb, skb->data - skb_network_header(skb));
 
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
-   ip6_rcv_finish);
+   xfrm6_transport_finish2);
return -1;
 }
 
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -7,15 +7,31 @@
  *
  */
 
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
+struct xfrm_trans_tasklet {
+   struct tasklet_struct tasklet;
+   struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+   int (*finish)(struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
 static struct kmem_cache *secpath_cachep __read_mostly;
 
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
 void __secpath_destroy(struct sec_path *sp)
 {
int i;
@@ -285,10 +301,50 @@ int xfrm_input_resume(struct sk_buff *sk
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
+static void xfrm_trans_reinject(unsigned long data)
+{
+   struct xfrm_trans_tasklet *trans = (void *)data;
+   struct sk_buff_head queue;
+   struct sk_buff *skb;
+
+   __skb_queue_head_init();
+   skb_queue_splice_init(>queue, );
+
+   while ((skb = __skb_dequeue()))
+   XFRM_TRANS_SKB_CB(skb)->finish(skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct sk_buff *))
+{
+   struct xfrm_trans_tasklet *trans;
+
+   trans = this_cpu_ptr(_trans_tasklet);
+
+   if (skb_queue_len(>queue) >= netdev_max_backlog)
+   return -ENOBUFS;
+
+   XFRM_TRANS_SKB_CB(skb)->finish = finish;
+   skb_queue_tail(>queue, skb);
+   tasklet_schedule(>tasklet);
+   return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
 void __init xfrm_input_init(void)
 {
+   int i;
+
secpath_cachep = kmem_cache_create("secpath_cache",
   sizeof(struct sec_path),
   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
   NULL);
+
+   for_each_possible_cpu(i) {
+  

Re: [PATCH 4.9 09/66] KVM: arm/arm64: Check pagesize when allocating a hugepage at Stage 2

2018-02-28 Thread Punit Agrawal
Hi Ioana,

Ioana Ciornei  writes:

>> 4.9-stable review patch.  If anyone has any objections, please let me know.
>
> I know that my response comes late but I just found out that the version of 
> the patch applied in 4.9-stable, the one in this email, is different than the 
> one applied on 4.14-stable.
> This is the one applied on 4.14: https://patchwork.kernel.org/patch/10177715/
>
> As you can see, the check against PMD_SIZE which is present in 4.14 is not 
> present in this version of the patch.
>
> Am I missing something or should I send a patch to rectify this?

You're right. The check against PMD_SIZE was lost during the rebase to
v4.9. I should've noticed this before it got sent.

Please send a patch to fix this. Alternately, I can fix this with your
reported by.

Thanks,
Punit

>
> Thanks,
> Ioana Ciornei
>
>> 
>> --
>> 
>> From: Punit Agrawal 
>> 
>> commit c507babf10ead4d5c8cca704539b170752a8ac84 upstream.
>> 
>> KVM only supports PMD hugepages at stage 2 but doesn't actually check that
>> the provided hugepage memory pagesize is PMD_SIZE before populating
>> stage 2 entries.
>> 
>> In cases where the backing hugepage size is smaller than PMD_SIZE (such as
>> when using contiguous hugepages), KVM can end up creating stage 2
>> mappings that extend beyond the supplied memory.
>> 
>> Fix this by checking for the pagesize of userspace vma before creating PMD
>> hugepage at stage 2.
>> 
>> Fixes: 66b3923a1a0f77a ("arm64: hugetlb: add support for PTE contiguous
>> bit")
>> Signed-off-by: Punit Agrawal 
>> Cc: Marc Zyngier 
>> Reviewed-by: Christoffer Dall 
>> Signed-off-by: Christoffer Dall 
>> Signed-off-by: Greg Kroah-Hartman 
>> 
>> ---
>>  arch/arm/kvm/mmu.c |2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcp
>>  return -EFAULT;
>>  }
>> 
>> -if (is_vm_hugetlb_page(vma) && !logging_active) {
>> +if (vma_kernel_pagesize(vma) && !logging_active) {
>>  hugetlb = true;
>>  gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
>>  } else {
>> 
>

-- 
 Being overloaded is the sign of a true Debian maintainer.


[PATCH 3.2 062/140] usb: add RESET_RESUME for ELSA MicroLink 56K

2018-02-28 Thread Ben Hutchings
3.2.100-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Oliver Neukum 

commit b9096d9f15c142574ebebe8fbb137012bb9d99c2 upstream.

This modem needs this quirk to operate. It produces timeouts when
resumed without reset.

Signed-off-by: Oliver Neukum 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Ben Hutchings 
---
 drivers/usb/core/quirks.c | 3 +++
 1 file changed, 3 insertions(+)

--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -128,6 +128,9 @@ static const struct usb_device_id usb_qu
/* appletouch */
{ USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* ELSA MicroLink 56K */
+   { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
+
/* Avision AV600U */
{ USB_DEVICE(0x0638, 0x0a13), .driver_info =
  USB_QUIRK_STRING_FETCH_255 },



[PATCH 3.16 041/254] can: kvaser_usb: free buf in error paths

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Jimmy Assarsson 

commit 435019b48033138581a6171093b181fc6b4d3d30 upstream.

The allocated buffer was not freed if usb_submit_urb() failed.

Signed-off-by: Jimmy Assarsson 
Signed-off-by: Marc Kleine-Budde 
Signed-off-by: Ben Hutchings 
---
 drivers/net/can/usb/kvaser_usb.c | 2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -608,6 +608,7 @@ static int kvaser_usb_simple_msg_async(s
if (err) {
netdev_err(netdev, "Error transmitting URB\n");
usb_unanchor_urb(urb);
+   kfree(buf);
usb_free_urb(urb);
return err;
}
@@ -1406,6 +1407,7 @@ static netdev_tx_t kvaser_usb_start_xmit
spin_unlock_irqrestore(>tx_contexts_lock, flags);
 
usb_unanchor_urb(urb);
+   kfree(buf);
 
stats->tx_dropped++;
 



Re: [PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Michael S. Tsirkin
On Wed, Feb 28, 2018 at 04:41:51PM +0100, Marc-André Lureau wrote:
> I don't know if it's always safe to enable dma in read_raw(), how
> could we know? Is there a check we could use to choose one or ther
> other (and thus avoiding explicit dma/readfn argument)?

I'm not sure - but does it really matter? Is anyone reading large files
like this in production where speed matters?
Why even bother with DMA?

-- 
MST


[PATCH 3.16 238/254] media: v4l2-compat-ioctl32.c: add capabilities field to, v4l2_input32

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Hans Verkuil 

commit 037e0865c2ecbaa4558feba239ece08d7e457ec0 upstream.

The v4l2_input32 struct wasn't updated when this field was added.
It didn't cause a failure in the compat code, but it is better to
keep it in sync with v4l2_input to avoid confusion.

Signed-off-by: Hans Verkuil 
Signed-off-by: Mauro Carvalho Chehab 
Signed-off-by: Ben Hutchings 
---
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -576,7 +576,8 @@ struct v4l2_input32 {
__u32tuner; /*  Associated tuner */
compat_u64   std;
__u32status;
-   __u32reserved[4];
+   __u32capabilities;
+   __u32reserved[3];
 };
 
 /* The 64-bit v4l2_input struct has extra padding at the end of the struct.



[PATCH 3.16 247/254] media: v4l2-compat-ioctl32.c: fix ctrl_is_pointer

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Hans Verkuil 

commit b8c601e8af2d08f733d74defa8465303391bb930 upstream.

ctrl_is_pointer just hardcoded two known string controls, but that
caused problems when using e.g. custom controls that use a pointer
for the payload.

Reimplement this function: it now finds the v4l2_ctrl (if the driver
uses the control framework) or it calls vidioc_query_ext_ctrl (if the
driver implements that directly).

In both cases it can now check if the control is a pointer control
or not.

Signed-off-by: Hans Verkuil 
Acked-by: Sakari Ailus 
Signed-off-by: Mauro Carvalho Chehab 
[bwh: Rebased on top of some earlier fixes]
Signed-off-by: Ben Hutchings 
---
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -18,6 +18,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 static long native_ioctl(struct file *file, unsigned int cmd, unsigned long 
arg)
@@ -571,24 +573,32 @@ struct v4l2_ext_control32 {
};
 } __attribute__ ((packed));
 
-/* The following function really belong in v4l2-common, but that causes
-   a circular dependency between modules. We need to think about this, but
-   for now this will do. */
-
-/* Return non-zero if this control is a pointer type. Currently only
-   type STRING is a pointer type. */
-static inline int ctrl_is_pointer(u32 id)
-{
-   switch (id) {
-   case V4L2_CID_RDS_TX_PS_NAME:
-   case V4L2_CID_RDS_TX_RADIO_TEXT:
-   return 1;
-   default:
-   return 0;
+/* Return true if this control is a pointer type. */
+static inline bool ctrl_is_pointer(struct file *file, u32 id)
+{
+   struct video_device *vdev = video_devdata(file);
+   struct v4l2_fh *fh = NULL;
+   struct v4l2_ctrl_handler *hdl = NULL;
+
+   if (test_bit(V4L2_FL_USES_V4L2_FH, >flags))
+   fh = file->private_data;
+
+   if (fh && fh->ctrl_handler)
+   hdl = fh->ctrl_handler;
+   else if (vdev->ctrl_handler)
+   hdl = vdev->ctrl_handler;
+
+   if (hdl) {
+   struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, id);
+
+   return ctrl && ctrl->type == V4L2_CTRL_TYPE_STRING;
}
+   return false;
 }
 
-static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct 
v4l2_ext_controls32 __user *up)
+static int get_v4l2_ext_controls32(struct file *file,
+  struct v4l2_ext_controls *kp,
+  struct v4l2_ext_controls32 __user *up)
 {
struct v4l2_ext_control32 __user *ucontrols;
struct v4l2_ext_control __user *kcontrols;
@@ -620,7 +630,7 @@ static int get_v4l2_ext_controls32(struc
return -EFAULT;
if (get_user(id, >id))
return -EFAULT;
-   if (ctrl_is_pointer(id)) {
+   if (ctrl_is_pointer(file, id)) {
void __user *s;
 
if (get_user(p, >string))
@@ -635,7 +645,9 @@ static int get_v4l2_ext_controls32(struc
return 0;
 }
 
-static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct 
v4l2_ext_controls32 __user *up)
+static int put_v4l2_ext_controls32(struct file *file,
+  struct v4l2_ext_controls *kp,
+  struct v4l2_ext_controls32 __user *up)
 {
struct v4l2_ext_control32 __user *ucontrols;
struct v4l2_ext_control __user *kcontrols =
@@ -667,7 +679,7 @@ static int put_v4l2_ext_controls32(struc
/* Do not modify the pointer when copying a pointer control.
   The contents of the pointer was changed, not the pointer
   itself. */
-   if (ctrl_is_pointer(id))
+   if (ctrl_is_pointer(file, id))
size -= sizeof(ucontrols->value64);
if (copy_in_user(ucontrols, kcontrols, size))
return -EFAULT;
@@ -881,7 +893,7 @@ static long do_video_ioctl(struct file *
case VIDIOC_G_EXT_CTRLS:
case VIDIOC_S_EXT_CTRLS:
case VIDIOC_TRY_EXT_CTRLS:
-   err = get_v4l2_ext_controls32(, up);
+   err = get_v4l2_ext_controls32(file, , up);
compatible_arg = 0;
break;
case VIDIOC_DQEVENT:
@@ -908,7 +920,7 @@ static long do_video_ioctl(struct file *
case VIDIOC_G_EXT_CTRLS:
case VIDIOC_S_EXT_CTRLS:
case VIDIOC_TRY_EXT_CTRLS:
-   if (put_v4l2_ext_controls32(, up))
+   if (put_v4l2_ext_controls32(file, , up))
err = -EFAULT;
break;
}



Re: [PATCH 2/2] arm64: dts: juno: Describe the full GICv2m region

2018-02-28 Thread Sudeep Holla


On 12/02/18 19:17, Robin Murphy wrote:
> On 12/02/18 18:27, Marc Zyngier wrote:
>> Hi Sudeep,
>>
>> On 12/02/18 18:17, Sudeep Holla wrote:
>>>
>>>
>>> On 07/02/18 14:32, Marc Zyngier wrote:
 From: Robin Murphy 

 Juno's GICv2m implementation consists of four frames providing 32
 interrupts each. Since it is possible to plug in enough PCIe endpoints
 to consume more than 32 MSIs, and the driver already has a bodge to
 handle multiple frames, let's expose the other three as well.

>>>
>>> Change on it own looks good. So if you want to merge via your tree:
>>>
>>> Acked-by: Sudeep Holla 
>>>
>>> Let me know if you decide not to take it via your tree and you want me
>>> to send it to arm-soc.
>>
>> If this would usually go via arm-soc, feel free to take it via this
>> route. I'll drop the patch from my tree.
>>
>>> On the side note I just noticed the Juno TRM[1] has 64k for each of
>>> these MSI frames(page 3-24 section 3.3.5 Application memory map summary)
>>>
>>> I am not sure if TRM is wrong. This patch is just copying the 4k size
>>> from frame 0 which got added with initial Juno DTS.
>>
>> I can't see why the TRM would be wrong. This is actually consistent with
>> the expected practice of aligning all devices on a 64kB boundary and
>> size so that you don't get any nasty surprise when passing the device to
>> a VM (*cough* GIC400 *cough*).
>>
>> Robin, any chance you could check this?
> 
> Well, the engineering spec for the v2m widget does claim that only the
> bottom 12 bits of AxADDR are used, but on the other hand it also implies
> that the "real" endpoint here is a single monolithic block of 4 such
> widgets, so a third truth is that there is only a single 256KB region...
> 
> As usual, I've completely forgotten about virtualisation when it comes
> to hardware :) On reflection I do of course appreciate that whilst 60KB
> of RAZ/WI space isn't significant in terms of "a device", it is rather
> more so in terms of "not a device" - if the only reasonable way to
> communicate that is to describe the v2m devices each owning 64KB, then
> I'm quite happy for you to fix up the patch that way if you want.
> 

I have applied this patch as is [1] and added another patch to fix the
size to 64kB for all the frames on top as per Juno TRM. Sorry, I forgot
to send that out, will do that shortly.

-- 
Regards,
Sudeep

[1] https://git.kernel.org/sudeep.holla/linux/h/for-next/juno


[PATCH] arm64: dts: juno: fix size of GICv2m MSI frames

2018-02-28 Thread Sudeep Holla
Currently the size of GICv2m MSI frames are listed as 4kB while the
Juno TRM specifies 64kB for each of these MSI frames.

Though the devices connected themself might just use the first 4kB,
to be consistent with the genaral practice of 64kB boundary alignment
to all the devices, lets keep the size as 64kB. This might also help
in avoiding any surprise when passing the device to a VM.

This patch increase the size of each GICv2m MSI frames from 4kB to 64kB
as per the specification.

Cc: Liviu Dudau 
Cc: Robin Murphy 
Cc: Marc Zyngier 
Signed-off-by: Sudeep Holla 
---
 arch/arm64/boot/dts/arm/juno-base.dtsi | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi 
b/arch/arm64/boot/dts/arm/juno-base.dtsi
index f8088c45b060..eb749c50a736 100644
--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
@@ -72,25 +72,25 @@
v2m_0: v2m@0 {
compatible = "arm,gic-v2m-frame";
msi-controller;
-   reg = <0 0 0 0x1000>;
+   reg = <0 0 0 0x1>;
};
 
v2m@1 {
compatible = "arm,gic-v2m-frame";
msi-controller;
-   reg = <0 0x1 0 0x1000>;
+   reg = <0 0x1 0 0x1>;
};
 
v2m@2 {
compatible = "arm,gic-v2m-frame";
msi-controller;
-   reg = <0 0x2 0 0x1000>;
+   reg = <0 0x2 0 0x1>;
};
 
v2m@3 {
compatible = "arm,gic-v2m-frame";
msi-controller;
-   reg = <0 0x3 0 0x1000>;
+   reg = <0 0x3 0 0x1>;
};
};
 
-- 
2.7.4



[PATCH 3.16 220/254] can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Marc Kleine-Budde 

commit d4689846881d160a4d12a514e991a740bcb5d65a upstream.

If an invalid CANFD frame is received, from a driver or from a tun
interface, a Kernel warning is generated.

This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a
kernel, bootet with panic_on_warn, does not panic. A printk seems to be
more appropriate here.

Reported-by: syzbot+e3b775f40babeff6e...@syzkaller.appspotmail.com
Suggested-by: Dmitry Vyukov 
Acked-by: Oliver Hartkopp 
Signed-off-by: Marc Kleine-Budde 
[bwh: Backported to 3.16:
 - Keep using the 'drop' label, as it has another user
 - Adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -742,13 +742,12 @@ static int canfd_rcv(struct sk_buff *skb
if (unlikely(!net_eq(dev_net(dev), _net)))
goto drop;
 
-   if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN,
- "PF_CAN: dropped non conform CAN FD skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
+   if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+cfd->len > CANFD_MAX_DLEN)) {
+   pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev 
type %d, len %d, datalen %d\n",
+dev->type, skb->len, cfd->len);
goto drop;
+   }
 
can_receive(skb, dev);
return NET_RX_SUCCESS;



[PATCH 3.16 009/254] scsi: use dma_get_cache_alignment() as minimum DMA alignment

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Huacai Chen 

commit 90addc6b3c9cda0146fbd62a08e234c2b224a80c upstream.

In non-coherent DMA mode, kernel uses cache flushing operations to
maintain I/O coherency, so scsi's block queue should be aligned to the
value returned by dma_get_cache_alignment().  Otherwise, If a DMA buffer
and a kernel structure share a same cache line, and if the kernel
structure has dirty data, cache_invalidate (no writeback) will cause
data corruption.

Signed-off-by: Huacai Chen 
[hch: rebased and updated the comment and changelog]
Signed-off-by: Christoph Hellwig 
Signed-off-by: Martin K. Petersen 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 drivers/scsi/scsi_lib.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1671,11 +1671,13 @@ struct request_queue *__scsi_alloc_queue
q->limits.cluster = 0;
 
/*
-* set a reasonable default alignment on word boundaries: the
-* host and device may alter it using
-* blk_queue_update_dma_alignment() later.
+* Set a reasonable default alignment:  The larger of 32-byte (dword),
+* which is a common minimum for HBAs, and the minimum DMA alignment,
+* which is set by the platform.
+*
+* Devices that require a bigger alignment can increase it later.
 */
-   blk_queue_dma_alignment(q, 0x03);
+   blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
 
return q;
 }



Re: [PATCH] nvme-pci: assign separate irq vectors for adminq and ioq0

2018-02-28 Thread Keith Busch
On Wed, Feb 28, 2018 at 11:46:20PM +0800, jianchao.wang wrote:
> 
> the irqbalance may migrate the adminq irq away from cpu0.

No, irqbalance can't touch managed IRQs. See irq_can_set_affinity_usr().


Re: [PATCH] nvme-pci: assign separate irq vectors for adminq and ioq0

2018-02-28 Thread jianchao.wang


On 02/28/2018 11:42 PM, jianchao.wang wrote:
> Hi Keith
> 
> Thanks for your kindly response and directive
> 
> On 02/28/2018 11:27 PM, Keith Busch wrote:
>> On Wed, Feb 28, 2018 at 10:53:31AM +0800, jianchao.wang wrote:
>>> On 02/27/2018 11:13 PM, Keith Busch wrote:
 On Tue, Feb 27, 2018 at 04:46:17PM +0800, Jianchao Wang wrote:
> Currently, adminq and ioq0 share the same irq vector. This is
> unfair for both amdinq and ioq0.
>  - For adminq, its completion irq has to be bound on cpu0.
>  - For ioq0, when the irq fires for io completion, the adminq irq
>action has to be checked also.

 This change log could use some improvements. Why is it bad if admin
 interrupts affinity is with cpu0?
>>>
>>> adminq interrupts should be able to fire everywhere.
>>> do we have any reason to bound it on cpu0 ?
>>
>> Your patch will have the admin vector CPU affinity mask set to
>> 0xff..ff. The first set bit for an online CPU is the one the IRQ handler
>> will run on, so the admin queue will still only run on CPU 0.
> 
> hmmm...yes.
> When I test there is only one irq vector, I get following result:
>  124:  0  0 253541  0  0  0   
>0  0  IR-PCI-MSI 1048576-edge  nvme0q0, nvme0q1
> 

the irqbalance may migrate the adminq irq away from cpu0.

>>  
 Are you able to measure _any_ performance difference on IO queue 1 vs IO
 queue 2 that you can attribute to IO queue 1's sharing vector 0?
>>>
>>> Actually, I didn't get any performance improving on my own NVMe card.
>>> But it may be needed on some enterprise card, especially the media is 
>>> persist memory.
>>> nvme_irq will be invoked twice when ioq0 irq fires, this will introduce 
>>> another unnecessary DMA
>>> accessing on cq entry.
>>
>> A CPU reading its own memory isn't a DMA. It's just a cheap memory read.
> 
> Oh sorry, my bad, I mean it is operation on DMA address, it is uncached.
> nvme_irq
>   -> nvme_process_cq
> -> nvme_read_cqe
>   -> nvme_cqe_valid
> 
> static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
>   u16 phase)
> {
>   return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
> }
> 
> Sincerely
> Jianchao
> 


[PATCH 3.16 022/254] usb: host: fix incorrect updating of offset

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Colin Ian King 

commit 1d5a31582ef046d3b233f0da1a68ae26519b2f0a upstream.

The variable temp is incorrectly being updated, instead it should
be offset otherwise the loop just reads the same capability value
and loops forever.  Thanks to Alan Stern for pointing out the
correct fix to my original fix.  Fix also cleans up clang warning:

drivers/usb/host/ehci-dbg.c:840:4: warning: Value stored to 'temp'
is never read

Fixes: d49d43174400 ("USB: misc ehci updates")
Signed-off-by: Colin Ian King 
Acked-by: Alan Stern 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Ben Hutchings 
---
 drivers/usb/host/ehci-dbg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -850,7 +850,7 @@ static ssize_t fill_registers_buffer(str
default:/* unknown */
break;
}
-   temp = (cap >> 8) & 0xff;
+   offset = (cap >> 8) & 0xff;
}
}
 #endif



[PATCH 3.16 236/254] [media] V4L2: fix VIDIOC_CREATE_BUFS 32-bit compatibility mode data copy-back

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Guennadi Liakhovetski 

commit 6ed9b28504326f8cf542e6b68245b2f7ce009216 upstream.

Similar to an earlier patch, fixing reading user-space data for the
VIDIOC_CREATE_BUFS ioctl() in 32-bit compatibility mode, this patch fixes
writing back of the possibly modified struct to the user. However, unlike
the former bug, this one is much less harmful, because it only results in
the kernel failing to write the .type field back to the user, but in fact
this is likely unneeded, because the kernel will hardly want to change
that field. Therefore this bug is more of a theoretical nature.

Signed-off-by: Guennadi Liakhovetski 
Acked-by: Laurent Pinchart 
Signed-off-by: Mauro Carvalho Chehab 
Signed-off-by: Ben Hutchings 
---
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -222,6 +222,9 @@ static int get_v4l2_create32(struct v4l2
 
 static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 
__user *up)
 {
+   if (put_user(kp->type, >type))
+   return -EFAULT;
+
switch (kp->type) {
case V4L2_BUF_TYPE_VIDEO_CAPTURE:
case V4L2_BUF_TYPE_VIDEO_OUTPUT:
@@ -248,8 +251,7 @@ static int __put_v4l2_format32(struct v4
 
 static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 
__user *up)
 {
-   if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32)) ||
-   put_user(kp->type, >type))
+   if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32)))
return -EFAULT;
return __put_v4l2_format32(kp, up);
 }
@@ -257,8 +259,8 @@ static int put_v4l2_format32(struct v4l2
 static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct 
v4l2_create_buffers32 __user *up)
 {
if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) 
||
-   copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, 
format.fmt)))
-   return -EFAULT;
+   copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, 
format)))
+   return -EFAULT;
return __put_v4l2_format32(>format, >format);
 }
 



Re: [PATCH] arm64: dts: juno: fix size of GICv2m MSI frames

2018-02-28 Thread Sudeep Holla


On 28/02/18 15:51, Robin Murphy wrote:
> Hi Sudeep,
> 
> Cheers for sorting it out...
> 
> On 28/02/18 15:48, Sudeep Holla wrote:
>> Currently the size of GICv2m MSI frames are listed as 4kB while the
>> Juno TRM specifies 64kB for each of these MSI frames.
>>
>> Though the devices connected themself might just use the first 4kB,
> 
> s/themself/themselves/
> 
>> to be consistent with the genaral practice of 64kB boundary alignment
> 
> s/genaral/general/
> 

Fixed those now.

>> to all the devices, lets keep the size as 64kB. This might also help
>> in avoiding any surprise when passing the device to a VM.
>>
>> This patch increase the size of each GICv2m MSI frames from 4kB to 64kB
>> as per the specification.
> 
> Reviewed-by: Robin Murphy 
> 

Thanks.

-- 
Regards,
Sudeep


[PATCH 3.16 054/254] dm: fix various targets to dm_register_target after module __init resources created

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "monty_pa...@sina.com" 

commit 7e6358d244e4706fe612a77b9c36519a33600ac0 upstream.

A NULL pointer is seen if two concurrent "vgchange -ay -K "
processes race to load the dm-thin-pool module:

 PID: 25992 TASK: 883cd7d23500 CPU: 4 COMMAND: "vgchange"
  #0 [883cd743d600] machine_kexec at 81038fa9
  001 [883cd743d660] crash_kexec at 810c5992
  002 [883cd743d730] oops_end at 81515c90
  003 [883cd743d760] no_context at 81049f1b
  004 [883cd743d7b0] __bad_area_nosemaphore at 8104a1a5
  005 [883cd743d800] bad_area at 8104a2ce
  006 [883cd743d830] __do_page_fault at 8104aa6f
  007 [883cd743d950] do_page_fault at 81517bae
  008 [883cd743d980] page_fault at 81514f95
 [exception RIP: kmem_cache_alloc+108]
 RIP: 8116ef3c RSP: 883cd743da38 RFLAGS: 00010046
 RAX: 0004 RBX: 81121b90 RCX: 881bf1e78cc0
 RDX:  RSI: 00d0 RDI: 
 RBP: 883cd743da68 R8: 881bf1a4eb00 R9: 80042000
 R10: 2000 R11:  R12: 00d0
 R13:  R14: 00d0 R15: 0246
 ORIG_RAX:  CS: 0010 SS: 0018
  009 [883cd743da70] mempool_alloc_slab at 81121ba5
 010 [883cd743da80] mempool_create_node at 81122083
 011 [883cd743dad0] mempool_create at 811220f4
 012 [883cd743dae0] pool_ctr at a08de049 [dm_thin_pool]
 013 [883cd743dbd0] dm_table_add_target at a0005f2f [dm_mod]
 014 [883cd743dc30] table_load at a0008ba9 [dm_mod]
 015 [883cd743dc90] ctl_ioctl at a0009dc4 [dm_mod]

The race results in a NULL pointer because:

Process A (vgchange -ay -K):
a. send DM_LIST_VERSIONS_CMD ioctl;
b. pool_target not registered;
c. modprobe dm_thin_pool and wait until end.

Process B (vgchange -ay -K):
a. send DM_LIST_VERSIONS_CMD ioctl;
b. pool_target registered;
c. table_load->dm_table_add_target->pool_ctr;
d. _new_mapping_cache is NULL and panic.
Note:
1. process A and process B are two concurrent processes.
2. pool_target can be detected by process B but
_new_mapping_cache initialization has not ended.

To fix dm-thin-pool, and other targets (cache, multipath, and snapshot)
with the same problem, simply dm_register_target() after all resources
created during module init (as labelled with __init) are finished.

Signed-off-by: monty 
Signed-off-by: Mike Snitzer 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 drivers/md/dm-cache-target.c | 12 +--
 drivers/md/dm-mpath.c| 18 -
 drivers/md/dm-snap.c | 48 ++--
 drivers/md/dm-thin.c | 22 +---
 4 files changed, 49 insertions(+), 51 deletions(-)

--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3108,18 +3108,18 @@ static int __init dm_cache_init(void)
 {
int r;
 
-   r = dm_register_target(_target);
-   if (r) {
-   DMERR("cache target registration failed: %d", r);
-   return r;
-   }
-
migration_cache = KMEM_CACHE(dm_cache_migration, 0);
if (!migration_cache) {
dm_unregister_target(_target);
return -ENOMEM;
}
 
+   r = dm_register_target(_target);
+   if (r) {
+   DMERR("cache target registration failed: %d", r);
+   return r;
+   }
+
return 0;
 }
 
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1688,13 +1688,6 @@ static int __init dm_multipath_init(void
if (!_mpio_cache)
return -ENOMEM;
 
-   r = dm_register_target(_target);
-   if (r < 0) {
-   DMERR("register failed %d", r);
-   r = -EINVAL;
-   goto bad_register_target;
-   }
-
kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
if (!kmultipathd) {
DMERR("failed to create workqueue kmpathd");
@@ -1716,17 +1709,24 @@ static int __init dm_multipath_init(void
goto bad_alloc_kmpath_handlerd;
}
 
+   r = dm_register_target(_target);
+   if (r < 0) {
+   DMERR("register failed %d", r);
+   r = -EINVAL;
+   goto bad_register_target;
+   }
+
DMINFO("version %u.%u.%u loaded",
   multipath_target.version[0], multipath_target.version[1],
   multipath_target.version[2]);
 
return 0;
 
+bad_register_target:
+   

[PATCH 3.16 048/254] ext4: fix fdatasync(2) after fallocate(2) operation

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Eryu Guan 

commit c894aa97577e47d3066b27b32499ecf899bfa8b0 upstream.

Currently, fallocate(2) with KEEP_SIZE followed by a fdatasync(2)
then crash, we'll see wrong allocated block number (stat -c %b), the
blocks allocated beyond EOF are all lost. fstests generic/468
exposes this bug.

Commit 67a7d5f561f4 ("ext4: fix fdatasync(2) after extent
manipulation operations") fixed all the other extent manipulation
operation paths such as hole punch, zero range, collapse range etc.,
but forgot the fallocate case.

So similarly, fix it by recording the correct journal tid in ext4
inode in fallocate(2) path, so that ext4_sync_file() will wait for
the right tid to be committed on fdatasync(2).

This addresses the test failure in xfstests test generic/468.

Signed-off-by: Eryu Guan 
Signed-off-by: Theodore Ts'o 
Signed-off-by: Ben Hutchings 
---
 fs/ext4/extents.c | 1 +
 1 file changed, 1 insertion(+)

--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4726,6 +4726,7 @@ retry:
EXT4_INODE_EOFBLOCKS);
}
ext4_mark_inode_dirty(handle, inode);
+   ext4_update_inode_fsync_trans(handle, inode, 1);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;



[PATCH V2] lightnvm: pblk: remove unused variable

2018-02-28 Thread Javier González
# Changes since V1:
 - Rebase on top of latest 2.0 changes


Javier González (1):
  lightnvm: pblk: remove unused variable

 drivers/lightnvm/pblk-core.c | 3 ---
 1 file changed, 3 deletions(-)

-- 
2.7.4



[PATCH 3.16 118/254] parisc: Hide Diva-built-in serial aux and graphics card

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Helge Deller 

commit bcf3f1752a622f1372d3252d0fea8855d89812e7 upstream.

Diva GSP card has built-in serial AUX port and ATI graphic card which simply
don't work and which both don't have external connectors.  User Guides even
mention that those devices shouldn't be used.
So, prevent that Linux drivers try to enable those devices.

Signed-off-by: Helge Deller 
Signed-off-by: Ben Hutchings 
---
 drivers/parisc/lba_pci.c | 33 +
 1 file changed, 33 insertions(+)

--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1652,3 +1652,36 @@ void lba_set_iregs(struct parisc_device
iounmap(base_addr);
 }
 
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+   if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+   dev->subsystem_device != 0x1292)
+   return;
+
+   dev_info(>dev, "Hiding Diva built-in ATI card");
+   dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+   quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+   if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+   dev->subsystem_device != 0x1291)
+   return;
+
+   dev_info(>dev, "Hiding Diva built-in AUX serial device");
+   dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+   quirk_diva_aux_disable);



[PATCH 3.16 203/254] usb: misc: usb3503: make sure reset is low for at least 100us

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Stefan Agner 

commit b8626f1dc29d3eee444bfaa92146ec7b291ef41c upstream.

When using a GPIO which is high by default, and initialize the
driver in USB Hub mode, initialization fails with:
  [  111.757794] usb3503 0-0008: SP_ILOCK failed (-5)

The reason seems to be that the chip is not properly reset.
Probe does initialize reset low, however some lines later the
code already set it back high, which is not long enouth.

Make sure reset is asserted for at least 100us by inserting a
delay after initializing the reset pin during probe.

Signed-off-by: Stefan Agner 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Ben Hutchings 
---
 drivers/usb/misc/usb3503.c | 2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503
if (gpio_is_valid(hub->gpio_reset)) {
err = devm_gpio_request_one(dev, hub->gpio_reset,
GPIOF_OUT_INIT_LOW, "usb3503 reset");
+   /* Datasheet defines a hardware reset to be at least 100us */
+   usleep_range(100, 1);
if (err) {
dev_err(dev,
"unable to request GPIO %d as reset pin (%d)\n",



[PATCH 3.16 140/254] ALSA: hda - Add MIC_NO_PRESENCE fixup for 2 HP machines

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Hui Wang 

commit 322f74ede933b3e2cb78768b6a6fdbfbf478a0c1 upstream.

There is a headset jack on the front panel, when we plug a headset
into it, the headset mic can't trigger unsol events, and
read_pin_sense() can't detect its presence too. So add this fixup
to fix this issue.

Signed-off-by: Hui Wang 
Signed-off-by: Takashi Iwai 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 sound/pci/hda/patch_conexant.c | 29 +
 1 file changed, 29 insertions(+)

--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -2845,6 +2845,8 @@ enum {
CXT_FIXUP_MUTE_LED_EAPD,
CXT_FIXUP_HP_SPECTRE,
CXT_FIXUP_HP_GATE_MIC,
+   CXT_FIXUP_HEADSET_MIC,
+   CXT_FIXUP_HP_MIC_NO_PRESENCE,
 };
 
 /* for hda_fixup_thinkpad_acpi() */
@@ -2923,6 +2925,18 @@ static void cxt_fixup_headphone_mic(stru
}
 }
 
+static void cxt_fixup_headset_mic(struct hda_codec *codec,
+   const struct hda_fixup *fix, int action)
+{
+   struct conexant_spec *spec = codec->spec;
+
+   switch (action) {
+   case HDA_FIXUP_ACT_PRE_PROBE:
+   spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+   break;
+   }
+}
+
 /* OPLC XO 1.5 fixup */
 
 /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -3374,6 +3388,19 @@ static const struct hda_fixup cxt_fixups
.type = HDA_FIXUP_FUNC,
.v.func = cxt_fixup_hp_gate_mic_jack,
},
+   [CXT_FIXUP_HEADSET_MIC] = {
+   .type = HDA_FIXUP_FUNC,
+   .v.func = cxt_fixup_headset_mic,
+   },
+   [CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
+   .type = HDA_FIXUP_PINS,
+   .v.pins = (const struct hda_pintbl[]) {
+   { 0x1a, 0x02a1113c },
+   { }
+   },
+   .chained = true,
+   .chain_id = CXT_FIXUP_HEADSET_MIC,
+   },
 };
 
 static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -3425,6 +3452,8 @@ static const struct snd_pci_quirk cxt506
SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", 
CXT_FIXUP_ASPIRE_DMIC),
SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
+   SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", 
CXT_FIXUP_HP_MIC_NO_PRESENCE),
+   SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", 
CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),



[PATCH 3.16 114/254] kernel: make groups_sort calling a responsibility group_info allocators

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Thiago Rafael Becker 

commit bdcf0a423ea1c40bbb40e7ee483b50fc8aa3d758 upstream.

In testing, we found that nfsd threads may call set_groups in parallel
for the same entry cached in auth.unix.gid, racing in the call of
groups_sort, corrupting the groups for that entry and leading to
permission denials for the client.

This patch:
 - Make groups_sort globally visible.
 - Move the call to groups_sort to the modifiers of group_info
 - Remove the call to groups_sort from set_groups

Link: http://lkml.kernel.org/r/20171211151420.18655-1-thiago.bec...@gmail.com
Signed-off-by: Thiago Rafael Becker 
Reviewed-by: Matthew Wilcox 
Reviewed-by: NeilBrown 
Acked-by: "J. Bruce Fields" 
Cc: Al Viro 
Cc: Martin Schwidefsky 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 arch/s390/kernel/compat_linux.c   | 1 +
 fs/nfsd/auth.c| 3 +++
 include/linux/cred.h  | 1 +
 kernel/groups.c   | 5 +++--
 kernel/uid16.c| 1 +
 net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 +
 net/sunrpc/auth_gss/svcauth_gss.c | 1 +
 net/sunrpc/svcauth_unix.c | 2 ++
 8 files changed, 13 insertions(+), 2 deletions(-)

--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16,
return retval;
}
 
+   groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
 
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -59,6 +59,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
GROUP_AT(gi, i) = exp->ex_anon_gid;
else
GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+
+   /* Each thread allocates its own gi, no race */
+   groups_sort(gi);
}
} else {
gi = get_group_info(rqgi);
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -69,6 +69,7 @@ extern int set_current_groups(struct gro
 extern void set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
 extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
 
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -104,7 +104,7 @@ static int groups_from_user(struct group
 }
 
 /* a simple Shell sort */
-static void groups_sort(struct group_info *group_info)
+void groups_sort(struct group_info *group_info)
 {
int base, max, stride;
int gidsetsize = group_info->ngroups;
@@ -131,6 +131,7 @@ static void groups_sort(struct group_inf
stride /= 3;
}
 }
+EXPORT_SYMBOL(groups_sort);
 
 /* a simple bsearch */
 int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -162,7 +163,6 @@ int groups_search(const struct group_inf
 void set_groups(struct cred *new, struct group_info *group_info)
 {
put_group_info(new->group_info);
-   groups_sort(group_info);
get_group_info(group_info);
new->group_info = group_info;
 }
@@ -246,6 +246,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsi
return retval;
}
 
+   groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
 
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -190,6 +190,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidset
return retval;
}
 
+   groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
 
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct x
goto out_free_groups;
GROUP_AT(creds->cr_group_info, i) = kgid;
}
+   groups_sort(creds->cr_group_info);
 
return 0;
 out_free_groups:
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -479,6 +479,7 @@ static int rsc_parse(struct cache_detail
goto out;
GROUP_AT(rsci.cred.cr_group_info, i) = kgid;
}
+   groups_sort(rsci.cred.cr_group_info);
 
/* mech name */
len = qword_get(, buf, mlen);
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_d
GROUP_AT(ug.gi, i) = kgid;
}
 
+   

[PATCH 3.16 137/254] n_tty: fix EXTPROC vs ICANON interaction with TIOCINQ (aka FIONREAD)

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Linus Torvalds 

commit 966031f340185eddd05affcf72b740549f056348 upstream.

We added support for EXTPROC back in 2010 in commit 26df6d13406d ("tty:
Add EXTPROC support for LINEMODE") and the intent was to allow it to
override some (all?) ICANON behavior.  Quoting from that original commit
message:

 There is a new bit in the termios local flag word, EXTPROC.
 When this bit is set, several aspects of the terminal driver
 are disabled.  Input line editing, character echo, and mapping
 of signals are all disabled.  This allows the telnetd to turn
 off these functions when in linemode, but still keep track of
 what state the user wants the terminal to be in.

but the problem turns out that "several aspects of the terminal driver
are disabled" is a bit ambiguous, and you can really confuse the n_tty
layer by setting EXTPROC and then causing some of the ICANON invariants
to no longer be maintained.

This fixes at least one such case (TIOCINQ) becoming unhappy because of
the confusion over whether ICANON really means ICANON when EXTPROC is set.

This basically makes TIOCINQ match the case of read: if EXTPROC is set,
we ignore ICANON.  Also, make sure to reset the ICANON state ie EXTPROC
changes, not just if ICANON changes.

Fixes: 26df6d13406d ("tty: Add EXTPROC support for LINEMODE")
Reported-by: Tetsuo Handa 
Reported-by: syzkaller 
Cc: Jiri Slaby 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Ben Hutchings 
---
 drivers/tty/n_tty.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1809,7 +1809,7 @@ static void n_tty_set_termios(struct tty
 {
struct n_tty_data *ldata = tty->disc_data;
 
-   if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
+   if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) 
{
bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
ldata->line_start = ldata->read_tail;
if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2520,7 +2520,7 @@ static int n_tty_ioctl(struct tty_struct
return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
case TIOCINQ:
down_write(>termios_rwsem);
-   if (L_ICANON(tty))
+   if (L_ICANON(tty) && !L_EXTPROC(tty))
retval = inq_canon(ldata);
else
retval = read_cnt(ldata);



[PATCH 3.16 130/254] usbip: fix usbip bind writing random string after command in match_busid

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Juan Zea 

commit 544c4605acc5ae4afe7dd5914147947db182f2fb upstream.

usbip bind writes commands followed by random string when writing to
match_busid attribute in sysfs, caused by using full variable size
instead of string length.

Signed-off-by: Juan Zea 
Acked-by: Shuah Khan 
Signed-off-by: Greg Kroah-Hartman 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 drivers/staging/usbip/userspace/src/utils.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

--- a/drivers/staging/usbip/userspace/src/utils.c
+++ b/drivers/staging/usbip/userspace/src/utils.c
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int
char command[SYSFS_BUS_ID_SIZE + 4];
char match_busid_attr_path[SYSFS_PATH_MAX];
int rc;
+   int cmd_size;
 
snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int
 attr_name);
 
if (add)
-   snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid);
+   cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+   busid);
else
-   snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid);
+   cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+   busid);
 
rc = write_sysfs_attribute(match_busid_attr_path, command,
-  sizeof(command));
+  cmd_size);
if (rc < 0) {
dbg("failed to write match_busid: %s", strerror(errno));
return -1;



[PATCH 3.16 174/254] x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream.

Where an ALTERNATIVE is used in the middle of an inline asm block, this
would otherwise lead to the following instruction being appended directly
to the trailing ".popsection", and a failed compile.

Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection")
Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: a...@linux.intel.com
Cc: Tim Chen 
Cc: Peter Zijlstra 
Cc: Paul Turner 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Greg Kroah-Hartman 
Link: https://lkml.kernel.org/r/20180104143710.8961-8-d...@amazon.co.uk
Signed-off-by: Ben Hutchings 
---
 arch/x86/include/asm/alternative.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -124,7 +124,7 @@ static inline int alternatives_text_rese
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
ALTINSTR_REPLACEMENT(newinstr, feature, 1)  \
-   ".popsection"
+   ".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2)  \
@@ -135,7 +135,7 @@ static inline int alternatives_text_rese
".pushsection .altinstr_replacement, \"ax\"\n"  \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)\
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)\
-   ".popsection"
+   ".popsection\n"
 
 /*
  * This must be included *after* the definition of ALTERNATIVE due to



[PATCH 3.16 135/254] net/mlx5: Stay in polling mode when command EQ destroy fails

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Moshe Shemesh 

commit a2fba188fd5eadd6061bef4f2f2577a43231ebf3 upstream.

During unload, on mlx5_stop_eqs we move command interface from events
mode to polling mode, but if command interface EQ destroy fail we move
back to events mode.
That's wrong since even if we fail to destroy command interface EQ, we
do release its irq, so no interrupts will be received.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Ben Hutchings 
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -502,11 +502,9 @@ void mlx5_stop_eqs(struct mlx5_core_dev
mlx5_cmd_use_polling(dev);
 
err = mlx5_destroy_unmap_eq(dev, >cmd_eq);
-   if (err) {
+   if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
  err);
-   mlx5_cmd_use_events(dev);
-   }
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,



[PATCH 3.16 061/254] efi: Move some sysfs files to be read-only by root

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Greg Kroah-Hartman 

commit af97a77bc01ce49a466f9d4c0125479e2e2230b6 upstream.

Thanks to the scripts/leaking_addresses.pl script, it was found that
some EFI values should not be readable by non-root users.

So make them root-only, and to do that, add a __ATTR_RO_MODE() macro to
make this easier, and use it in other places at the same time.

Reported-by: Linus Torvalds 
Tested-by: Dave Young 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Ard Biesheuvel 
Cc: H. Peter Anvin 
Cc: Matt Fleming 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20171206095010.24170-2-ard.biesheu...@linaro.org
Signed-off-by: Ingo Molnar 
[bwh: Backported to 3.16: drop changes in esrt.c]
Signed-off-by: Ben Hutchings 
---
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -72,8 +72,7 @@ static ssize_t systab_show(struct kobjec
return str - buf;
 }
 
-static struct kobj_attribute efi_attr_systab =
-   __ATTR(systab, 0400, systab_show, NULL);
+static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
 
 #define EFI_FIELD(var) efi.var
 
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -67,11 +67,11 @@ static ssize_t map_attr_show(struct kobj
return map_attr->show(entry, buf);
 }
 
-static struct map_attribute map_type_attr = __ATTR_RO(type);
-static struct map_attribute map_phys_addr_attr   = __ATTR_RO(phys_addr);
-static struct map_attribute map_virt_addr_attr  = __ATTR_RO(virt_addr);
-static struct map_attribute map_num_pages_attr  = __ATTR_RO(num_pages);
-static struct map_attribute map_attribute_attr  = __ATTR_RO(attribute);
+static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400);
+static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 
0400);
+static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 
0400);
+static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 
0400);
+static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 
0400);
 
 /*
  * These are default attributes that are added for every memmap entry.
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -82,6 +82,12 @@ struct attribute_group {
.show   = _name##_show, \
 }
 
+#define __ATTR_RO_MODE(_name, _mode) { \
+   .attr   = { .name = __stringify(_name), \
+   .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },  \
+   .show   = _name##_show, \
+}
+
 #define __ATTR_WO(_name) { \
.attr   = { .name = __stringify(_name), .mode = S_IWUSR },  \
.store  = _name##_store,\



[PATCH 3.16 099/254] MIPS: math-emu: Define IEEE 754-2008 feature control bits

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "Maciej W. Rozycki" 

commit f1f3b7ebac08161761c352fd070cfa07b7b94c54 upstream.

Define IEEE 754-2008 feature control bits: FIR.HAS2008, FCSR.ABS2008 and
FCSR.NAN2008, and update the `_ieee754_csr' structure accordingly.

For completeness define FIR.UFRP too.

Signed-off-by: Maciej W. Rozycki 
Cc: linux-m...@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/9709/
Signed-off-by: Ralf Baechle 
[bwh: Backported to 3.16: In cop1Emulate(), keep converting the rounding mode]
Signed-off-by: Ben Hutchings 
---
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -136,10 +136,13 @@
 #define FPU_CSR_COND7  0x8000  /* $fcc7 */
 
 /*
- * Bits 18 - 20 of the FPU Status Register will be read as 0,
+ * Bits 22:20 of the FPU Status Register will be read as 0,
  * and should be written as zero.
  */
-#define FPU_CSR_RSVD   0x001c
+#define FPU_CSR_RSVD   (_ULCAST_(7) << 20)
+
+#define FPU_CSR_ABS2008(_ULCAST_(1) << 19)
+#define FPU_CSR_NAN2008(_ULCAST_(1) << 18)
 
 /*
  * X the exception cause indicator
@@ -687,6 +690,8 @@
 #define MIPS_FPIR_W(_ULCAST_(1) << 20)
 #define MIPS_FPIR_L(_ULCAST_(1) << 21)
 #define MIPS_FPIR_F64  (_ULCAST_(1) << 22)
+#define MIPS_FPIR_HAS2008  (_ULCAST_(1) << 23)
+#define MIPS_FPIR_UFRP (_ULCAST_(1) << 28)
 
 /*
  * Bits in the MIPS32 Memory Segmentation registers.
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -929,10 +929,12 @@ emul:
 MIPSInst_RT(ir), value);
 
/*
-* Don't write reserved bits,
+* Don't write unsupported bits,
 * and convert to ieee library modes
 */
-   ctx->fcr31 = (value & ~(FPU_CSR_RSVD | 
FPU_CSR_RM)) |
+   ctx->fcr31 = (value &
+ ~(FPU_CSR_RSVD | FPU_CSR_ABS2008 |
+   FPU_CSR_NAN2008 | FPU_CSR_RM)) |
 modeindex(value);
}
if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -195,15 +195,17 @@ static inline int ieee754dp_ge(union iee
  * The control status register
  */
 struct _ieee754_csr {
-   __BITFIELD_FIELD(unsigned pad0:7,
-   __BITFIELD_FIELD(unsigned nod:1,/* set 1 for no denormalised 
numbers */
-   __BITFIELD_FIELD(unsigned c:1,  /* condition */
-   __BITFIELD_FIELD(unsigned pad1:5,
+   __BITFIELD_FIELD(unsigned fcc:7,/* condition[7:1] */
+   __BITFIELD_FIELD(unsigned nod:1,/* set 1 for no denormals */
+   __BITFIELD_FIELD(unsigned c:1,  /* condition[0] */
+   __BITFIELD_FIELD(unsigned pad0:3,
+   __BITFIELD_FIELD(unsigned abs2008:1,/* IEEE 754-2008 ABS/NEG.fmt */
+   __BITFIELD_FIELD(unsigned nan2008:1,/* IEEE 754-2008 NaN mode */
__BITFIELD_FIELD(unsigned cx:6, /* exceptions this operation */
__BITFIELD_FIELD(unsigned mx:5, /* exception enable  mask */
__BITFIELD_FIELD(unsigned sx:5, /* exceptions total */
__BITFIELD_FIELD(unsigned rm:2, /* current rounding mode */
-   ;
+   ;))
 };
 #define ieee754_csr (*(struct _ieee754_csr *)(>thread.fpu.fcr31))
 



[PATCH 3.16 234/254] blk-mq: fix race between timeout and freeing request

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Ming Lei 

commit 0048b4837affd153897ed183492070027aa9 upstream.

Inside timeout handler, blk_mq_tag_to_rq() is called
to retrieve the request from one tag. This way is obviously
wrong because the request can be freed any time and some
fiedds of the request can't be trusted, then kernel oops
might be triggered[1].

Currently wrt. blk_mq_tag_to_rq(), the only special case is
that the flush request can share same tag with the request
cloned from, and the two requests can't be active at the same
time, so this patch fixes the above issue by updating tags->rqs[tag]
with the active request(either flush rq or the request cloned
from) of the tag.

Also blk_mq_tag_to_rq() gets much simplified with this patch.

Given blk_mq_tag_to_rq() is mainly for drivers and the caller must
make sure the request can't be freed, so in bt_for_each() this
helper is replaced with tags->rqs[tag].

[1] kernel oops log
[  439.696220] BUG: unable to handle kernel NULL pointer dereference at 
0158^M
[  439.697162] IP: [] blk_mq_tag_to_rq+0x21/0x6e^M
[  439.700653] PGD 7ef765067 PUD 7ef764067 PMD 0 ^M
[  439.700653] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC ^M
[  439.700653] Dumping ftrace buffer:^M
[  439.700653](ftrace buffer empty)^M
[  439.700653] Modules linked in: nbd ipv6 kvm_intel kvm serio_raw^M
[  439.700653] CPU: 6 PID: 2779 Comm: stress-ng-sigfd Not tainted 
4.2.0-rc5-next-20150805+ #265^M
[  439.730500] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Bochs 01/01/2011^M
[  439.730500] task: 880605308000 ti: 88060530c000 task.ti: 
88060530c000^M
[  439.730500] RIP: 0010:[]  [] 
blk_mq_tag_to_rq+0x21/0x6e^M
[  439.730500] RSP: 0018:880819203da0  EFLAGS: 00010283^M
[  439.730500] RAX: 880811b0e000 RBX: 8800bb465f00 RCX: 
0002^M
[  439.730500] RDX:  RSI: 0202 RDI: 
^M
[  439.730500] RBP: 880819203db0 R08: 0002 R09: 
^M
[  439.730500] R10:  R11:  R12: 
0202^M
[  439.730500] R13: 880814104800 R14: 0002 R15: 
880811a2ea00^M
[  439.730500] FS:  7f165b3f5740() GS:88081920() 
knlGS:^M
[  439.730500] CS:  0010 DS:  ES:  CR0: 8005003b^M
[  439.730500] CR2: 0158 CR3: 0007ef766000 CR4: 
06e0^M
[  439.730500] Stack:^M
[  439.730500]  0008 8808114eed90 880819203e00 
812dc104^M
[  439.755663]  880819203e40 812d9f5e 0200 
8808114eed80^M
[  439.755663] Call Trace:^M
[  439.755663]   ^M
[  439.755663]  [] bt_for_each+0x6e/0xc8^M
[  439.755663]  [] ? blk_mq_rq_timed_out+0x6a/0x6a^M
[  439.755663]  [] ? blk_mq_rq_timed_out+0x6a/0x6a^M
[  439.755663]  [] blk_mq_tag_busy_iter+0x55/0x5e^M
[  439.755663]  [] ? blk_mq_bio_to_request+0x38/0x38^M
[  439.755663]  [] blk_mq_rq_timer+0x5d/0xd4^M
[  439.755663]  [] call_timer_fn+0xf7/0x284^M
[  439.755663]  [] ? call_timer_fn+0x5/0x284^M
[  439.755663]  [] ? blk_mq_bio_to_request+0x38/0x38^M
[  439.755663]  [] run_timer_softirq+0x1ce/0x1f8^M
[  439.755663]  [] __do_softirq+0x181/0x3a4^M
[  439.755663]  [] irq_exit+0x40/0x94^M
[  439.755663]  [] smp_apic_timer_interrupt+0x33/0x3e^M
[  439.755663]  [] apic_timer_interrupt+0x84/0x90^M
[  439.755663]   ^M
[  439.755663]  [] ? _raw_spin_unlock_irq+0x32/0x4a^M
[  439.755663]  [] finish_task_switch+0xe0/0x163^M
[  439.755663]  [] ? finish_task_switch+0xa2/0x163^M
[  439.755663]  [] __schedule+0x469/0x6cd^M
[  439.755663]  [] schedule+0x82/0x9a^M
[  439.789267]  [] signalfd_read+0x186/0x49a^M
[  439.790911]  [] ? wake_up_q+0x47/0x47^M
[  439.790911]  [] __vfs_read+0x28/0x9f^M
[  439.790911]  [] ? __fget_light+0x4d/0x74^M
[  439.790911]  [] vfs_read+0x7a/0xc6^M
[  439.790911]  [] SyS_read+0x49/0x7f^M
[  439.790911]  [] entry_SYSCALL_64_fastpath+0x12/0x6f^M
[  439.790911] Code: 48 89 e5 e8 a9 b8 e7 ff 5d c3 0f 1f 44 00 00 55 89
f2 48 89 e5 41 54 41 89 f4 53 48 8b 47 60 48 8b 1c d0 48 8b 7b 30 48 8b
53 38 <48> 8b 87 58 01 00 00 48 85 c0 75 09 48 8b 97 88 0c 00 00 eb 10
^M
[  439.790911] RIP  [] blk_mq_tag_to_rq+0x21/0x6e^M
[  439.790911]  RSP ^M
[  439.790911] CR2: 0158^M
[  439.790911] ---[ end trace d40af58949325661 ]---^M

Signed-off-by: Ming Lei 
Signed-off-by: Jens Axboe 
[bwh: Backported to 3.16:
 - Flush state is in struct request_queue, not struct blk_flush_queue
 - Flush request cloning is done in blk_mq_clone_flush_request() rather
   than blk_kick_flush()
 - Drop changes in bt{,_tags}_for_each()
 - Adjust filename, context]
Signed-off-by: Ben Hutchings 
---
 block/blk-flush.c  | 15 ++-
 block/blk-mq-tag.c |  4 ++--
 block/blk-mq-tag.h | 12 
 block/blk-mq.c | 16 +---
 block/blk.h  

[PATCH 3.16 110/254] MIPS: Disallow outsized PTRACE_SETREGSET NT_PRFPREG regset accesses

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "Maciej W. Rozycki" 

commit c8c5a3a24d395b14447a9a89d61586a913840a3b upstream.

Complement commit c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use
correct core dump register layout") and also reject outsized
PTRACE_SETREGSET requests to the NT_PRFPREG regset, like with the
NT_PRSTATUS regset.

Signed-off-by: Maciej W. Rozycki 
Fixes: c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump 
register layout")
Cc: James Hogan 
Cc: Paul Burton 
Cc: Alex Smith 
Cc: Dave Martin 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/17930/
Signed-off-by: Ralf Baechle 
Signed-off-by: Ben Hutchings 
---
 arch/mips/kernel/ptrace.c | 3 +++
 1 file changed, 3 insertions(+)

--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -568,6 +568,9 @@ static int fpr_set(struct task_struct *t
 
BUG_ON(count % sizeof(elf_fpreg_t));
 
+   if (pos + count > sizeof(elf_fpregset_t))
+   return -EIO;
+
init_fp_ctx(target);
 
if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))



[PATCH 3.16 104/254] MIPS: MSA: bugfix - disable MSA correctly for new threads/processes.

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Ralf Baechle 

commit 9cc719ab3f4f639d629ac8ff09e9b998bc006f68 upstream.

Due to the slightly odd way that new threads and processes start execution
when scheduled for the very first time they were bypassing the required
disable_msa call.

Signed-off-by: Ralf Baechle 
Signed-off-by: Ben Hutchings 
---
 arch/mips/include/asm/switch_to.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -101,7 +101,6 @@ do {
\
if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA))  \
__fpsave = FP_SAVE_VECTOR;  \
(last) = resume(prev, next, task_thread_info(next), __fpsave);  \
-   disable_msa();  \
 } while (0)
 
 #define finish_arch_switch(prev)   \
@@ -119,6 +118,7 @@ do {
\
if (cpu_has_userlocal)  \
write_c0_userlocal(current_thread_info()->tp_value);\
__restore_watch();  \
+   disable_msa();  \
 } while (0)
 
 #endif /* _ASM_SWITCH_TO_H */



[PATCH 3.16 170/254] mm/mprotect: add a cond_resched() inside change_pmd_range()

2018-02-28 Thread Ben Hutchings
3.16.55-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Anshuman Khandual 

commit 4991c09c7c812dba13ea9be79a68b4565bb1fa4e upstream.

While testing on a large CPU system, detected the following RCU stall
many times over the span of the workload.  This problem is solved by
adding a cond_resched() in the change_pmd_range() function.

  INFO: rcu_sched detected stalls on CPUs/tasks:
   154-: (670 ticks this GP) idle=022/140/0 softirq=2825/2825 
fqs=612
   (detected by 955, t=6002 jiffies, g=4486, c=4485, q=90864)
  Sending NMI from CPU 955 to CPUs 154:
  NMI backtrace for cpu 154
  CPU: 154 PID: 147071 Comm: workload Not tainted 4.15.0-rc3+ #3
  NIP:  c00b3f64 LR: c00b33d4 CTR: aa18
  REGS: a4b0fb44 TRAP: 0501   Not tainted  (4.15.0-rc3+)
  MSR:  80009033   CR: 22422082  XER: 
  CFAR: 006cf8f0 SOFTE: 1
  GPR00: 0010 c3ef9b1cb8c0 c10cc600 
  GPR04: 8e018c32b200 40017b3858fd6e00 8e018c32b208 40017b3858fd6e00
  GPR08: 8e018c32b210 40017b3858fd6e00 8e018c32b218 40017b3858fd6e00
  GPR12:  cfb25100
  NIP [c00b3f64] plpar_hcall9+0x44/0x7c
  LR [c00b33d4] pSeries_lpar_flush_hash_range+0x384/0x420
  Call Trace:
flush_hash_range+0x48/0x100
__flush_tlb_pending+0x44/0xd0
hpte_need_flush+0x408/0x470
change_protection_range+0xaac/0xf10
change_prot_numa+0x30/0xb0
task_numa_work+0x2d0/0x3e0
task_work_run+0x130/0x190
do_notify_resume+0x118/0x120
ret_from_except_lite+0x70/0x74
  Instruction dump:
  6000 f8810028 7ca42b78 7cc53378 7ce63b78 7d074378 7d284b78 7d495378
  e9410060 e9610068 e9810070 4422 <7d806378> e9810028 f88c f8ac0008

Link: http://lkml.kernel.org/r/20171214140551.5794-1-khand...@linux.vnet.ibm.com
Signed-off-by: Anshuman Khandual 
Suggested-by: Nicholas Piggin 
Acked-by: Michal Hocko 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 mm/mprotect.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -152,7 +152,7 @@ static inline unsigned long change_pmd_r
 
next = pmd_addr_end(addr, end);
if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
-   continue;
+   goto next;
 
/* invoke the mmu notifier if the pmd is populated */
if (!mni_start) {
@@ -174,7 +174,7 @@ static inline unsigned long change_pmd_r
}
 
/* huge pmd was handled */
-   continue;
+   goto next;
}
}
/* fall through, the trans huge pmd just split */
@@ -182,6 +182,8 @@ static inline unsigned long change_pmd_r
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
 dirty_accountable, prot_numa);
pages += this_pages;
+next:
+   cond_resched();
} while (pmd++, addr = next, addr != end);
 
if (mni_start)



RE: [PATCH] qlogic/qed: Constify *pkt_type_str[]

2018-02-28 Thread Kalderon, Michal
> From: Hernán Gonzalez [mailto:her...@vanguardiasur.com.ar]
> Sent: Wednesday, February 28, 2018 12:32 AM
> 
> Note: This is compile only tested as I have no access to the hw.
> Constifying and declaring as static saves 24 bytes.
> 
> add/remove: 0/1 grow/shrink: 0/0 up/down: 0/-24 (-24)
> Function old new   delta
> pkt_type_str  24   - -24
> Total: Before=3599256, After=3599232, chg -0.00%
> 
> Signed-off-by: Hernán Gonzalez 
> ---
>  drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
> b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
> index ca4a81d..03ad4ee 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
> @@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
>  /* fpdu can be fragmented over maximum 3 bds: header, partial mpa,
> unaligned */  #define QED_IWARP_MAX_BDS_PER_FPDU 3
> 
> -char *pkt_type_str[] = {
> +static const char * const pkt_type_str[] = {
>   "QED_IWARP_MPA_PKT_PACKED",
>   "QED_IWARP_MPA_PKT_PARTIAL",
>   "QED_IWARP_MPA_PKT_UNALIGNED"
> --
> 2.7.4

Thanks

Acked-by: Michal Kalderon 


Kontakt: Nachricht

2018-02-28 Thread MK Kontakt
Sehr geehrte Damen und Herren, 

nach unserem Besuch Ihrer Homepage möchten wir Ihnen ein Angebot von Produkten 
vorstellen, das Ihnen ermöglichen wird, den Verkauf Ihrer Produkte sowie 
Dienstleistungen deutlich zu erhöhen.

Die Datenbanken der Firmen sind in für Sie interessante und relevante 
Zielgruppen untergliedert.

Die Firmenangaben beinhalten:
Name der Firma, Ansprechpartner, E-mail Adresse, Tel. + Fax-Nr., PLZ, Ort, 
Straße etc.

*** 
1. Schweiz 2018 ( 187 764 ) - 149 EUR ( bis zum 28.02.2018 )
***

Die Verwendungsmöglichkeiten der Datenbanken sind praktisch unbegrenzt und Sie 
können durch Verwendung 
der von uns entwickelten Programme des personalisierten Versendens von 
Angeboten u.ä. mittels
E-mailing bzw. Fax effektive und sichere Werbekampagnen damit durchführen.
Bitte informieren Sie sich über die weiteren Details einmal unverbindlich auf 
unseren Webseite:

http://www.dbkontakt-ch.net/?page=catalog


MfG
Martin Kiefer
http://www.dbkontakt-ch.net/?page=catalog 




[PATCH v2] 8250-men-mcb: add support for 16z025 and 16z057

2018-02-28 Thread Michael Moese
Add support for two MEN UARTs (16z025 and 16z057) to the
8250_men_mcb driver.
The 16z025 consists of up to four ports, the 16z057 has
exactly four ports. Apart from that, all of them share the
Port settings.

Signed-off-by: Michael Moese 
Reported-by: Ben Turner 
Tested-by: Ben Turner 

---
Changes to v1:
Deduplicated the two switch() blocks into a function returning
the number of ports.
Minor style corrections.
---
 drivers/tty/serial/8250/8250_men_mcb.c | 124 -
 drivers/tty/serial/8250/Kconfig|   5 +-
 2 files changed, 93 insertions(+), 36 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_men_mcb.c 
b/drivers/tty/serial/8250/8250_men_mcb.c
index 308977807994..98b6b919bca6 100644
--- a/drivers/tty/serial/8250/8250_men_mcb.c
+++ b/drivers/tty/serial/8250/8250_men_mcb.c
@@ -7,6 +7,12 @@
 #include 
 #include 
 
+#define MEN_UART_ID_Z025 0x19
+#define MEN_UART_ID_Z057 0x39
+#define MEN_UART_ID_Z125 0x7d
+
+#define MEN_UART_MEM_SIZE 0x10
+
 struct serial_8250_men_mcb_data {
struct uart_8250_port uart;
int line;
@@ -18,7 +24,7 @@ struct serial_8250_men_mcb_data {
  * parameter in order to really set the correct baudrate, and
  * do so if possible without user interaction
  */
-static u32 men_z125_lookup_uartclk(struct mcb_device *mdev)
+static u32 men_lookup_uartclk(struct mcb_device *mdev)
 {
/* use default value if board is not available below */
u32 clkval = 1041666;
@@ -28,10 +34,12 @@ static u32 men_z125_lookup_uartclk(struct mcb_device *mdev)
mdev->bus->name);
if  (strncmp(mdev->bus->name, "F075", 4) == 0)
clkval = 1041666;
-   else if  (strncmp(mdev->bus->name, "F216", 4) == 0)
+   else if (strncmp(mdev->bus->name, "F216", 4) == 0)
clkval = 1843200;
else if (strncmp(mdev->bus->name, "G215", 4) == 0)
clkval = 1843200;
+   else if (strncmp(mdev->bus->name, "F210", 4) == 0)
+   clkval = 115200;
else
dev_info(>dev,
 "board not detected, using default uartclk\n");
@@ -41,62 +49,108 @@ static u32 men_z125_lookup_uartclk(struct mcb_device *mdev)
return clkval;
 }
 
+static unsigned int get_num_ports(struct mcb_device *mdev,
+ void __iomem *membase)
+{
+   switch (mdev->id) {
+   case MEN_UART_ID_Z125:
+   return 1U;
+   case MEN_UART_ID_Z025:
+   return readb(membase) >> 4;
+   case MEN_UART_ID_Z057:
+   return 4U;
+   default:
+   dev_err(>dev, "no supported device!\n");
+   return -ENODEV;
+   }
+}
+
 static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
 const struct mcb_device_id *id)
 {
struct serial_8250_men_mcb_data *data;
struct resource *mem;
-
-   data = devm_kzalloc(>dev,
-   sizeof(struct serial_8250_men_mcb_data),
-   GFP_KERNEL);
-   if (!data)
-   return -ENOMEM;
-
-   mcb_set_drvdata(mdev, data);
-   data->uart.port.dev = mdev->dma_dev;
-   spin_lock_init(>uart.port.lock);
-
-   data->uart.port.type = PORT_16550;
-   data->uart.port.flags = UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE;
-   data->uart.port.iotype = UPIO_MEM;
-   data->uart.port.uartclk = men_z125_lookup_uartclk(mdev);
-   data->uart.port.regshift = 0;
-   data->uart.port.fifosize = 60;
+   unsigned int num_ports;
+   unsigned int i;
+   void __iomem *membase;
 
mem = mcb_get_resource(mdev, IORESOURCE_MEM);
if (mem == NULL)
return -ENXIO;
+   membase = devm_ioremap_resource(>dev, mem);
+   if (IS_ERR(membase))
+   return PTR_ERR_OR_ZERO(membase);
 
-   data->uart.port.irq = mcb_get_irq(mdev);
+   num_ports = get_num_ports(mdev, membase);
 
-   data->uart.port.membase = devm_ioremap_resource(>dev, mem);
-   if (IS_ERR(data->uart.port.membase))
-   return PTR_ERR_OR_ZERO(data->uart.port.membase);
+   dev_dbg(>dev, "found a 16z%03u with %u ports\n",
+   mdev->id, num_ports);
 
-   data->uart.port.mapbase = (unsigned long) mem->start;
-   data->uart.port.iobase = data->uart.port.mapbase;
+   if (num_ports == 0 || num_ports > 4) {
+   dev_err(>dev, "unexpected number of ports: %u\n",
+   num_ports);
+   return -ENODEV;
+   }
 
-   /* ok, register the port */
-   data->line = serial8250_register_8250_port(>uart);
-   if (data->line < 0)
-   return data->line;
+   data = devm_kcalloc(>dev, num_ports,
+   sizeof(struct serial_8250_men_mcb_data),
+   GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
 
-   

Re: [PATCH] dma-buf/reservation: shouldn't kfree staged when slot available

2018-02-28 Thread Christian König

Am 28.02.2018 um 07:44 schrieb Monk Liu:

under below scenario the obj->fence would refer to a wild pointer:

1,call reservation_object_reserved_shared
2,call reservation_object_add_shared_fence
3,call reservation_object_reserved_shared
4,call reservation_object_add_shared_fence

in step 1, staged is allocated,

in step 2, code path will go reservation_object_add_shared_replace()
and obj->fence would be assigned as staged (through RCU_INIT_POINTER)

in step 3, obj->staged will be freed(by simple kfree),
which make obj->fence point to a wild pointer...



Well that explanation is still nonsense. See 
reservation_object_add_shared_fence:

    obj->staged = NULL;


Among the first things reservation_object_add_shared_fence() does is it 
sets obj->staged to NULL.


So step 3 will not free anything and we never have a wild pointer.

Regards,
Christian.



in step 4, code path will go reservation_object_add_shared_inplace()
and inside it the @fobj (which equals to @obj->staged, set by above steps)
is already a wild pointer

should remov the kfree on staged in reservation_object_reserve_shared()

Change-Id: If7c01f1b4be3d3d8a81efa90216841f79ab1fc1c
Signed-off-by: Monk Liu 
---
  drivers/dma-buf/reservation.c | 7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 375de41..b473ccc 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -74,12 +74,9 @@ int reservation_object_reserve_shared(struct 
reservation_object *obj)
old = reservation_object_get_list(obj);
  
  	if (old && old->shared_max) {

-   if (old->shared_count < old->shared_max) {
-   /* perform an in-place update */
-   kfree(obj->staged);
-   obj->staged = NULL;
+   if (old->shared_count < old->shared_max)
return 0;
-   } else
+   else
max = old->shared_max * 2;
} else
max = 4;




Re: [PATCH 3/3] userfaultfd: non-cooperative: allow synchronous EVENT_REMOVE

2018-02-28 Thread Mike Rapoport
On Wed, Feb 28, 2018 at 11:21:02AM +0300, Pavel Emelyanov wrote:
> 
> > @@ -52,6 +53,7 @@
> >  #define _UFFDIO_WAKE   (0x02)
> >  #define _UFFDIO_COPY   (0x03)
> >  #define _UFFDIO_ZEROPAGE   (0x04)
> > +#define _UFFDIO_WAKE_SYNC_EVENT(0x05)
> 
> Excuse my ignorance, but what's the difference between UFFDIO_WAKE and 
> UFFDIO_WAKE_SYNC_EVENT?

UFFDIO_WAKE is used when UFFDIO_COPY/UFFDIO_ZERO page are used with
UFFDIO_*_MODE_DONTWAKE flag set and it presumes 'struct uffdio_range'
argument to the ioctl(). Since waking up a non page fault event requires
different parameters I've add new ioctl to keep backwards compatibility.
 
> -- Pavel
> 

-- 
Sincerely yours,
Mike.



Re: [PATCH v2 2/2] xen: events: free irqs in error condition

2018-02-28 Thread Juergen Gross
On 28/02/18 09:25, Shah, Amit wrote:
> 
> On Mi, 2018-02-28 at 08:16 +, Roger Pau Monné wrote:
>> On Tue, Feb 27, 2018 at 05:32:53PM +, Shah, Amit wrote:
>>>
>>>
>>> On Di, 2018-02-27 at 17:07 +, Roger Pau Monné wrote:

 On Tue, Feb 27, 2018 at 03:55:58PM +, Amit Shah wrote:
>
>
> In case of errors in irq setup for MSI, free up the allocated
> irqs.
>
> Fixes: 4892c9b4ada9f9 ("xen: add support for MSI message
> groups")
> Reported-by: Hooman Mirhadi 
> CC: 
> CC: Roger Pau Monné 
> CC: Boris Ostrovsky 
> CC: Eduardo Valentin 
> CC: Juergen Gross 
> CC: Thomas Gleixner 
> CC: "K. Y. Srinivasan" 
> CC: Liu Shuo 
> CC: Anoob Soman 
> Signed-off-by: Amit Shah 
> ---
>  drivers/xen/events/events_base.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/xen/events/events_base.c
> b/drivers/xen/events/events_base.c
> index c86d10e..a299586 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -750,11 +750,14 @@ int xen_bind_pirq_msi_to_irq(struct
> pci_dev
> *dev, struct msi_desc *msidesc,
>  
>   ret = irq_set_msi_desc(irq, msidesc);
>   if (ret < 0)
> - goto error_irq;
> + goto error_desc;
>  out:
>   mutex_unlock(_mapping_update_lock);
>   return irq;
>  error_irq:
> + while (--nvec >= i)
> + xen_free_irq(irq + nvec);
> +error_desc:
>   while (i > 0) {
>   i--;
>   __unbind_from_irq(irq + i);
 It seems pointless to introduce another label and another loop to
 fix
 something that can be fixed with a single label and a single
 loop,
 this just makes the code more complex for no reason.
>>> I disagree, just because there are two different cleanups to be
>>> made
>>> for two different issues; it's not as if the if.. and else
>>> conditions
>>> are going to be interleaved.
>> Oh, I don't mind so much whether it ends up being two patches or a
>> single one, but IMHO the code should end up looking similar to what I
>> proposed, I would like to avoid having two loops and two labels.
>>
>> Could you rework the series so that the end result uses a single loop
>> (and label)?
> 
> That was the part I didn't like much, so it would be better if the
> patch came from you :)

I'd prefer Roger's solution, too.

Roger, in case you don't want to write the patch, I can do it.


Juergen


[PATCH] perf tools: Fix trigger class trigger_on()

2018-02-28 Thread Adrian Hunter
trigger_on() means that the trigger is available but not ready, however
trigger_on() was making it ready. That can segfault if the signal comes
before trigger_ready(). e.g. (USR2 signal delivery not shown)

  $ perf record -e intel_pt//u -S sleep 1
  perf: Segmentation fault
  Obtained 16 stack frames.
  /home/ahunter/bin/perf(sighandler_dump_stack+0x40) [0x4ec550]
  /lib/x86_64-linux-gnu/libc.so.6(+0x36caf) [0x7fa76411acaf]
  /home/ahunter/bin/perf(perf_evsel__disable+0x26) [0x4b9dd6]
  /home/ahunter/bin/perf() [0x43a45b]
  /lib/x86_64-linux-gnu/libc.so.6(+0x36caf) [0x7fa76411acaf]
  /lib/x86_64-linux-gnu/libc.so.6(__xstat64+0x15) [0x7fa7641d2cc5]
  /home/ahunter/bin/perf() [0x4ec6c9]
  /home/ahunter/bin/perf() [0x4ec73b]
  /home/ahunter/bin/perf() [0x4ec73b]
  /home/ahunter/bin/perf() [0x4ec73b]
  /home/ahunter/bin/perf() [0x4eca15]
  /home/ahunter/bin/perf(machine__create_kernel_maps+0x257) [0x4f0b77]
  /home/ahunter/bin/perf(perf_session__new+0xc0) [0x4f86f0]
  /home/ahunter/bin/perf(cmd_record+0x722) [0x43c132]
  /home/ahunter/bin/perf() [0x4a11ae]
  /home/ahunter/bin/perf(main+0x5d4) [0x427fb4]

Note, for testing purposes, this is hard to hit unless you add some sleep()
in builtin-record.c before record__open().

Fixes: 3dcc4436fa6f ("perf tools: Introduce trigger class")
Cc: 
Signed-off-by: Adrian Hunter 
---
 tools/perf/util/trigger.h | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
index 370138e7e35c..88223bc7c82b 100644
--- a/tools/perf/util/trigger.h
+++ b/tools/perf/util/trigger.h
@@ -12,7 +12,7 @@
  * States and transits:
  *
  *
- *  OFF--(on)--> READY --(hit)--> HIT
+ *  OFF--> ON --> READY --(hit)--> HIT
  * ^   |
  * |(ready)
  * |   |
@@ -27,8 +27,9 @@ struct trigger {
volatile enum {
TRIGGER_ERROR   = -2,
TRIGGER_OFF = -1,
-   TRIGGER_READY   = 0,
-   TRIGGER_HIT = 1,
+   TRIGGER_ON  = 0,
+   TRIGGER_READY   = 1,
+   TRIGGER_HIT = 2,
} state;
const char *name;
 };
@@ -50,7 +51,7 @@ static inline bool trigger_is_error(struct trigger *t)
 static inline void trigger_on(struct trigger *t)
 {
TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
-   t->state = TRIGGER_READY;
+   t->state = TRIGGER_ON;
 }
 
 static inline void trigger_ready(struct trigger *t)
-- 
1.9.1



Re: [v2] [media] Use common error handling code in 20 functions

2018-02-28 Thread SF Markus Elfring
>> +put_isp:
>> +omap3isp_put(video->isp);
>> +delete_fh:
>> +v4l2_fh_del(>vfh);
>> +v4l2_fh_exit(>vfh);
>> +kfree(handle);
> 
> Please prefix the error labels with error_.

How often do you really need such an extra prefix?


>> +++ b/drivers/media/usb/uvc/uvc_v4l2.c
>> @@ -994,10 +994,8 @@ static int uvc_ioctl_g_ext_ctrls(struct file *file,
>> void *fh, struct v4l2_queryctrl qc = { .id = ctrl->id };
>>
>>  ret = uvc_query_v4l2_ctrl(chain, );
>> -if (ret < 0) {
>> -ctrls->error_idx = i;
>> -return ret;
>> -}
>> +if (ret < 0)
>> +goto set_index;
>>
>>  ctrl->value = qc.default_value;
>>  }
>> @@ -1013,14 +1011,17 @@ static int uvc_ioctl_g_ext_ctrls(struct file *file,
>> void *fh, ret = uvc_ctrl_get(chain, ctrl);
>>  if (ret < 0) {
>>  uvc_ctrl_rollback(handle);
>> -ctrls->error_idx = i;
>> -return ret;
>> +goto set_index;
>>  }
>>  }
>>
>>  ctrls->error_idx = 0;
>>
>>  return uvc_ctrl_rollback(handle);
>> +
>> +set_index:
>> +ctrls->error_idx = i;
>> +return ret;
>>  }
> 
> For uvcvideo I find this to hinder readability

I got an other development view.


> without adding much added value.

There can be a small effect for such a function implementation.


> Please drop the uvcvideo change from this patch.

Would it be nice if this source code adjustment could be integrated also?

Regards,
Markus


[PATCH V2] scsi: core: use blk_mq_requeue_request in __scsi_queue_insert

2018-02-28 Thread Jianchao Wang
In scsi core, __scsi_queue_insert should just put request back on
the queue and retry using the same command as before. However, for
blk-mq, scsi_mq_requeue_cmd is employed here which will unprepare
the request. To align with the semantics of __scsi_queue_insert,
use blk_mq_requeue_request with kick_requeue_list == true and put
the reference of scsi_device.

V1 -> V2:
 - add put_device on scsi_device->sdev_gendev

Cc: Christoph Hellwig 
Signed-off-by: Jianchao Wang 
---
 drivers/scsi/scsi_lib.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a86df9c..6fa7b0c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -191,7 +191,8 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int 
reason, bool unbusy)
 */
cmd->result = 0;
if (q->mq_ops) {
-   scsi_mq_requeue_cmd(cmd);
+   blk_mq_requeue_request(cmd->request, true);
+   put_device(>sdev_gendev);
return;
}
spin_lock_irqsave(q->queue_lock, flags);
-- 
2.7.4



Re: Removing architectures without upstream gcc support

2018-02-28 Thread Florian Weimer

On 02/23/2018 12:37 PM, Arnd Bergmann wrote:

That makes more sense, yes. I'm still unsure about this one though. Chris in
fact made the suggestion to remove the architecture from both glibc and kernel
as with his departure from Mellanox there is nobody left from to maintain it.

I suggested keeping it as 'Orphaned' for the time being, given that the port
is still in a good shape, much better than many other ports.

The known customers that sold TileGX based appliances (Cisco, Brocade,
Checkpoint, Mikrotik, ...) tend to have long support cycles, and there have
been attempts at getting Debian, OpenWRT and Centos distro support
at least a few years ago.


Note that there is tilegx-*-linux-gnu and tilepro-*-linux-gnu.  Only the 
latter was removed from glibc.


Thanks,
Florian


Re: 4.16 regression: s2ram broken on non-PAE i686

2018-02-28 Thread Thomas Gleixner
Woody,

On Tue, 27 Feb 2018, Woody Suwalski wrote:

> There is a problem with s2ram on 4.16, and it has now been propagated
> to 4.15 and 4.14 stable updates.
> 
> It originates from
> 
> commit 62c00e6122a6b5aa7b1350023967a2d7a12b54c9
> Author: William Grant  >
> Date:   Tue Jan 30 22:22:55 2018 +1100
> 
> x86/mm: Fix overlap of i386 CPU_ENTRY_AREA with FIX_BTMAP
> 
> s2ram works OK on PAE kernels, breaks badly on non-PAE. I do not think
> that the problem can be duplicated in VMPlayer, but it is 100%
> reproducible on a "real" hardware.
> System goes to sleep OK, but when woken - it reboots the PC.
> 
> The issue is tracked in Bugzilla bug 198763
> [https://bugzilla.kernel.org/show_bug.cgi?id=198763]

Thanks for digging into this so far. Can you please provide dmesg output
from a PAE=y and PAE=n kernel after boot?

Thanks,

tglx


Possible usb_request leak in the function dwc2_gadget_complete_isoc_request_ddma

2018-02-28 Thread Zengtao (B)
Hi johnyoun:

I found a suspected bug, and I am writing to confirm with you.

In the function 
dwc2_gadget_complete_isoc_request_ddma(drivers/usb/dwc2/gadget.c).
Only the first request from the eq queue is processed while maybe there are 
more than one descriptors done by the HW.

1. Each usb request is associated with a DMA descriptor, but this is not 
reflect in the driver, so when one DMA descriptor is done, 
we don't know which usb request is done, but I think if only one DMA descriptor 
is done, we can know that the first USB request in 
eq queue is done, because the HW DMA descriptor and SW usb request are both in 
sequence.

2. In the function dwc2_gadget_complete_isoc_request_ddma, we may complete more 
than one DMA descriptor but only the first
Usb request is processed, but in fact, we should all the usb requests 
associated with all the done DMA descriptors.

3. I noticed that each DMA descriptor is configured to report an interrupt, and 
if each DMA descriptor generate an interrupt, the above
Flow should be ok, but the interrupts can merge and we have used the depdma to 
figure out the largest finished DMA descriptor index.

Looking forward your reply.

Thank you. 

Regards
Zengtao 


Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a valid iova range

2018-02-28 Thread Auger Eric
Hi Shameer,

On 27/02/18 10:57, Shameerali Kolothum Thodi wrote:
> 
> 
>> -Original Message-
>> From: Auger Eric [mailto:eric.au...@redhat.com]
>> Sent: Tuesday, February 27, 2018 8:27 AM
>> To: Alex Williamson 
>> Cc: Shameerali Kolothum Thodi ;
>> pmo...@linux.vnet.ibm.com; k...@vger.kernel.org; linux-
>> ker...@vger.kernel.org; Linuxarm ; John Garry
>> ; xuwei (O) ; Robin Murphy
>> 
>> Subject: Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a 
>> valid
>> iova range
>>
>> Hi,
>> On 27/02/18 00:13, Alex Williamson wrote:
>>> On Mon, 26 Feb 2018 23:05:43 +0100
>>> Auger Eric  wrote:
>>>
 Hi Shameer,

 [Adding Robin in CC]
 On 21/02/18 13:22, Shameer Kolothum wrote:
> This checks and rejects any dma map request outside valid iova
> range.
>
> Signed-off-by: Shameer Kolothum
>> 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 22 ++
>  1 file changed, 22 insertions(+)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c
>> b/drivers/vfio/vfio_iommu_type1.c
> index a80884e..3049393 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -970,6 +970,23 @@ static int vfio_pin_map_dma(struct vfio_iommu
>> *iommu, struct vfio_dma *dma,
>   return ret;
>  }
>
> +/*
> + * Check dma map request is within a valid iova range
> + */
> +static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
> + dma_addr_t start, dma_addr_t end)
> +{
> + struct list_head *iova = >iova_list;
> + struct vfio_iova *node;
> +
> + list_for_each_entry(node, iova, list) {
> + if ((start >= node->start) && (end <= node->end))
> + return true;
 I am now confused by the fact this change will prevent existing QEMU
 from working with this series on some platforms. For instance QEMU virt
 machine GPA space collides with Seattle PCI host bridge windows. On ARM
 the smmu and smmuv3 drivers report the PCI host bridge windows as
 reserved regions which does not seem to be the case on other platforms.
 The change happened in commit
>> 273df9635385b2156851c7ee49f40658d7bcb29d
 (iommu/dma: Make PCI window reservation generic).

 For background, we already discussed the topic after LPC 2016. See
 https://www.spinics.net/lists/kernel/msg2379607.html.

 So is it the right choice to expose PCI host bridge windows as reserved
 regions? If yes shouldn't we make a difference between those and MSI
 windows in this series and do not reject any user space DMA_MAP attempt
 within PCI host bridge windows.
>>>
>>> If the QEMU machine GPA collides with a reserved region today, then
>>> either:
>>>
>>> a) The mapping through the IOMMU works and the reserved region is wrong
>>>
>>> or
>>>
>>> b) The mapping doesn't actually work, QEMU is at risk of data loss by
>>> being told that it worked, and we're justified in changing that
>>> behavior.
>>>
>>> Without knowing the specifics of SMMU, it doesn't particularly make
>>> sense to me to mark the entire PCI hierarchy MMIO range as reserved,
>>> unless perhaps the IOMMU is incapable of translating those IOVAs.
>> to me the limitation does not come from the smmu itself, which is a
>> separate HW block sitting between the root complex and the interconnect.
>> If ACS is not enforced by the PCIe subsystem, the transaction will never
>> reach the IOMMU.
> 
> True. And we do have one such platform where ACS is not enforced but 
> reserving the regions and possibly creating holes while launching VM will
> make it secure. But I do wonder how we will solve the device grouping
> in such cases. 
> 
> The Seattle PCI host bridge windows case you mentioned has any pci quirk 
> to claim that they support ACS?
No there is none to my knowledge. I am applying Alex' not upstream ACS
overwrite patch.

Thanks

Eric
>  
>> In the case of such overlap, shouldn't we just warn the end-user that
>> this situation is dangerous instead of forbidding the use case which
>> worked "in most cases" until now.
> 
> Yes, may be something similar to the allow_unsafe_interrupts case, if
> that is acceptable.
> 
> Thanks,
> Shameer
>  
>>> Are we trying to prevent untranslated p2p with this reserved range?
>>> That's not necessarily a terrible idea, but it seems that doing it for
>>> that purpose would need to be a lot smarter, taking into account ACS
>>> and precisely selecting ranges within the peer address space that would
>>> be untranslated.  Perhaps only populated MMIO within non-ACS
>>> hierarchies.  Thanks,
>>
>> Indeed taking into account the ACS capability would refine the
>> situations where a risk exists.
>>
>> Thanks
>>
>> Eric
>>>

[PATCH] rcu: Clean up rcu_init_nohz() by removing unnecessary statements

2018-02-28 Thread Byungchul Park
Since the commit 44c65ff2e3b0(rcu: Eliminate NOCBs CPU-state Kconfig
options) made nocb-cpus identified only through the rcu_nocbs= boot
parameter, we don't have to care NOCBs CPU-state Kconfig options
anymore, which means now we can just rely on rcu_nocb_mask to
decide whether going ahead in rcu_init_nohz().

Remove the deprecated code.

Signed-off-by: Byungchul Park 
---
 kernel/rcu/tree_plugin.h | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index b0d7f9b..510a6af 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2313,22 +2313,14 @@ static void do_nocb_deferred_wakeup(struct rcu_data 
*rdp)
 void __init rcu_init_nohz(void)
 {
int cpu;
-   bool need_rcu_nocb_mask = true;
struct rcu_state *rsp;
 
-#if defined(CONFIG_NO_HZ_FULL)
-   if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
-   need_rcu_nocb_mask = true;
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
-
-   if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
+   if (!cpumask_available(rcu_nocb_mask)) {
if (!zalloc_cpumask_var(_nocb_mask, GFP_KERNEL)) {
pr_info("rcu_nocb_mask allocation failed, callback 
offloading disabled.\n");
return;
}
}
-   if (!cpumask_available(rcu_nocb_mask))
-   return;
 
 #if defined(CONFIG_NO_HZ_FULL)
if (tick_nohz_full_running)
-- 
1.9.1



RE: [PATCH] pvcalls-front: 64-bit align flags

2018-02-28 Thread David Laight
From: Stefano Stabellini
> Sent: 27 February 2018 19:55
> 
> We are using test_and_* operations on the status and flag fields of
> struct sock_mapping. However, these functions require the operand to be
> 64-bit aligned on arm64. Currently, only status is 64-bit aligned.

That sounds like a big accident just waiting to happen...
If the operand is required to be 64bit aligned why isn't it a type
that requires 64bit alignment?

> Make flags 64-bit aligned by introducing an explicit padding field.

At some point such padding will always go wrong...

David




Re: [PATCH] selftest: fix kselftest-merge depend on 'RUNTIME_TESTING_MENU'

2018-02-28 Thread Zong Li
2018-02-28 6:32 GMT+08:00 Anders Roxell :
>
> On 23 February 2018 at 08:05, Zong Li  wrote:
> > 2018-02-23 3:57 GMT+08:00 Anders Roxell :
> >> On 22 February 2018 at 12:53, Zong Li  wrote:
> >>> Since the 'commit d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig
> >>> to ease disabling it all")', the make kselftest-merge cannot merge the
> >>> config dependencies of kselftest to the existing .config file.
> >>>
> >>> These config dependencies of kselftest need to enable the
> >>> 'CONFIG_RUNTIME_TESTING_MENU=y' at the same time.
> >>
> >> Is this patch needed when patch sha 'f29c79906064 ("lib/Kconfig.debug: 
> >> enable
> >> RUNTIME_TESTING_MENU")' find its way into the kernel ?
> >> I think it's in linux-next now.
> >
> > Thanks. The patch sha 'f29c79906064 ("lib/Kconfig.debug: enable
> > RUNTIME_TESTING_MENU")' can resolve the make kselftest-merge
> > on default situation, but I think the kselftest-merge should resolve
> > the dependencies
> > without the prerequisite, it should enable the 'RUNTIME_TESTING_MENU'
> > when merging
> > the config of some selftests if RUNTIME_TESTING_MENU is not set.
>
> I see your point, but should the be enabled in a common place that
> gets included per
> default if running kselftest-merge and not in all config fragments?
>
> Cheers,
> Anders

I agree with you. It is better that the config fragments of each
selftests is responsible for
enabling the specific configs they needed only.
Thanks for your reply.

Best Regard,
Zong Li


Re: [PATCH v5] reset: add support for non-DT systems

2018-02-28 Thread Philipp Zabel
On Tue, 2018-02-27 at 19:07 +0100, Bartosz Golaszewski wrote:
> 2018-02-27 17:10 GMT+01:00 Philipp Zabel :
> > Hi Bartosz,
> > 
> > thank you for the update.
> > 
> > On Fri, 2018-02-23 at 12:39 +0100, Bartosz Golaszewski wrote:
> > > From: Bartosz Golaszewski 
> > > 
> > > The reset framework only supports device-tree. There are some platforms
> > > however, which need to use it even in legacy, board-file based mode.
> > > 
> > > An example of such architecture is the DaVinci family of SoCs which
> > > supports both device tree and legacy boot modes and we don't want to
> > > introduce any regressions.
> > > 
> > > We're currently working on converting the platform from its hand-crafted
> > > clock API to using the common clock framework. Part of the overhaul will
> > > be representing the chip's power sleep controller's reset lines using
> > > the reset framework.
> > > 
> > > This changeset extends the core reset code with a new reset lookup
> > > entry structure. It contains data allowing the reset core to associate
> > > reset lines with devices by comparing the dev_id and con_id strings.
> > > 
> > > It also provides a function allowing drivers to register lookup entries
> > > with the framework.
> > > 
> > > The new lookup function is only called as a fallback in case the
> > > of_node field is NULL and doesn't change anything for current users.
> > > 
> > > Tested with a dummy reset driver with several lookup entries.
> > > 
> > > An example lookup table registration from a driver can be found below:
> > > 
> > > static struct reset_control_lookup foobar_reset_lookup[] = {
> > >   RESET_LOOKUP("foo.0", "foo", 15),
> > >   RESET_LOOKUP("bar.0", NULL,   5),
> > > };
> > > 
> > > foobar_probe()
> > > {
> > > ...
> > > 
> > > reset_controller_add_lookup(, foobar_reset_lookup,
> > > ARRAY_SIZE(foobar_reset_lookup));
> > > 
> > > ...
> > > }
> > > 
> > > Cc: Sekhar Nori 
> > > Cc: Kevin Hilman 
> > > Cc: David Lechner 
> > > Signed-off-by: Bartosz Golaszewski 
> > > ---
> > > v1 -> v2:
> > > - renamed the new function to __reset_control_get_from_lookup()
> > > - added a missing break; when a matching entry is found
> > > - rearranged the code in __reset_control_get() - we can no longer get to 
> > > the
> > >   return at the bottom, so remove it and return from
> > >   __reset_control_get_from_lookup() if __of_reset_control_get() fails
> > > - return -ENOENT from reset_contol_get() if we can't find a matching 
> > > entry,
> > >   prevously returned -EINVAL referred to the fact that we passed a device
> > >   without the of_node which is no longer an error condition
> > > - add a comment about needing a sentinel in the lookup table
> > > 
> > > v2 -> v3:
> > > - added the reset id number field to the lookup struct so that we don't 
> > > need
> > >   to rely on the array index
> > > 
> > > v3 -> v4:
> > > - separated the driver and lookup table registration logic by adding a
> > >   function meant to be called by machine-specific code that adds a lookup
> > >   table to the internal list
> > > - the correct reset controller is now found by first finding the lookup
> > >   table associated with it, then finding the actual reset controller by
> > >   the associated device
> > > 
> > > v4 -> v5:
> > > - since the first user of this will be the davinci clk driver and it
> > >   already registers clock lookup from within the driver code - allow
> > >   drivers to register lookups with the assumption that the code can be
> > >   extended to make it possible to register entries from machine code as
> > >   well
> > 
> > How do you imagine this may be extended? By adding an rddev_devid field
> > to the lookup, similarly to the pwm_lookup?
> > I suppose reset_controller_add_lookup could then be called with a NULL
> > rcdev to register lookups by id.
> > 
> 
> Yes, this is what I was thinking about more or less.

Ok. I just want to avoid having to change all users when somebody needs
that functionality, even though hopefully there won't be that many.

> > > - simplify the code - only expose a single lookup structure and a simply
> > >   registration function
> > > - add the RESET_LOOKUP macro for brevity
> > > 
> > >  drivers/reset/core.c | 65 
> > > +++-
> > >  include/linux/reset-controller.h | 28 +
> > >  2 files changed, 92 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/reset/core.c b/drivers/reset/core.c
> > > index da4292e9de97..75e54a05147a 100644
> > > --- a/drivers/reset/core.c
> > > +++ b/drivers/reset/core.c
> > > @@ -23,6 +23,9 @@
> > >  static DEFINE_MUTEX(reset_list_mutex);
> > >  static LIST_HEAD(reset_controller_list);
> > > 
> > > +static DEFINE_MUTEX(reset_lookup_mutex);
> > > +static LIST_HEAD(reset_lookup_list);
> > > +
> > >  

Re: [PATCH v2] tracing/power: Polish the tracepoints cpu_idle and cpu_frequency

2018-02-28 Thread Du, Changbin
On Wed, Feb 28, 2018 at 10:14:41AM +0100, Rafael J. Wysocki wrote:
> On 2/28/2018 3:45 AM, Du, Changbin wrote:
> > On Tue, Feb 27, 2018 at 05:39:38PM -0500, Steven Rostedt wrote:
> > > On Tue, 27 Feb 2018 17:35:27 +0800
> > > "Du, Changbin"  wrote:
> > > 
> > > > >  From the tracing perspective:
> > > > > 
> > > > > Acked-by: Steven Rostedt (VMware) 
> > > > > 
> > > > > -- Steve
> > > > Hi Steve, will you pick this or someoneelse?
> > > I maintain the tracing infrastructure, but the tracing use cases are
> > > maintained by the maintainers of the users of the trace events. That
> > > is, who added these trace events? They are the ones most affected by
> > > these changes.
> > > 
> > > For example, it looks like Rafael J. Wysocki, is the one that added
> > > trace_cpu_frequency(). He's the one that is affected by this change,
> > > and is the one that you need to have take it.
> > > 
> > Got it, thanks!
> > 
> > Hi Wysocki, could you take a look?
> 
> Please send the patch(es) to linux...@vger.kernel.org with a CC to me and I
> will take care of them.
> 
sure~

> Thanks,
> Rafael
> 

-- 
Thanks,
Changbin Du


[PATCH] checkpatch: avoid error report caused by syzbot

2018-02-28 Thread Yisheng Xie
syzbot request to add his reported-by when fix the bug find by syzbot.
However, it will trigger a error when use checkpatch:

  ERROR: Unrecognized email address: 
'syzbot+d7a918a7a8e1c952b...@syzkaller.appspotmail.com'

For it include '+' in email and make regular expression works abnomal. Fix
it by adding check for syzbot.

Signed-off-by: Yisheng Xie 
---
 scripts/checkpatch.pl | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3d40403..19f3e26 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1075,7 +1075,11 @@ sub parse_email {
} elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
$address = $1;
$comment = $2 if defined $2;
-   $formatted_email =~ s/$address.*$//;
+   if ($address =~ /^syzbot.*\@syzkaller\.appspotmail\.com$/) {
+   $formatted_email = "";
+   } else {
+   $formatted_email =~ s/^($address).*$//;
+   }
$name = $formatted_email;
$name = trim($name);
$name =~ s/^\"|\"$//g;
-- 
1.7.12.4



RE: [PATCH v4 4/6] vfio/type1: check dma map request is within a valid iova range

2018-02-28 Thread Shameerali Kolothum Thodi


> -Original Message-
> From: Auger Eric [mailto:eric.au...@redhat.com]
> Sent: Wednesday, February 28, 2018 9:02 AM
> To: Shameerali Kolothum Thodi ;
> Alex Williamson 
> Cc: pmo...@linux.vnet.ibm.com; k...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Linuxarm ; John Garry
> ; xuwei (O) ; Robin Murphy
> 
> Subject: Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a 
> valid
> iova range
> 
> Hi Shameer,
> 
> On 27/02/18 10:57, Shameerali Kolothum Thodi wrote:
> >
> >
> >> -Original Message-
> >> From: Auger Eric [mailto:eric.au...@redhat.com]
> >> Sent: Tuesday, February 27, 2018 8:27 AM
> >> To: Alex Williamson 
> >> Cc: Shameerali Kolothum Thodi ;
> >> pmo...@linux.vnet.ibm.com; k...@vger.kernel.org; linux-
> >> ker...@vger.kernel.org; Linuxarm ; John Garry
> >> ; xuwei (O) ; Robin
> Murphy
> >> 
> >> Subject: Re: [PATCH v4 4/6] vfio/type1: check dma map request is within a
> valid
> >> iova range
> >>
> >> Hi,
> >> On 27/02/18 00:13, Alex Williamson wrote:
> >>> On Mon, 26 Feb 2018 23:05:43 +0100
> >>> Auger Eric  wrote:
> >>>
>  Hi Shameer,
> 
>  [Adding Robin in CC]
>  On 21/02/18 13:22, Shameer Kolothum wrote:
> > This checks and rejects any dma map request outside valid iova
> > range.
> >
> > Signed-off-by: Shameer Kolothum
> >> 
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 22 ++
> >  1 file changed, 22 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> >> b/drivers/vfio/vfio_iommu_type1.c
> > index a80884e..3049393 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -970,6 +970,23 @@ static int vfio_pin_map_dma(struct vfio_iommu
> >> *iommu, struct vfio_dma *dma,
> > return ret;
> >  }
> >
> > +/*
> > + * Check dma map request is within a valid iova range
> > + */
> > +static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
> > +   dma_addr_t start, dma_addr_t end)
> > +{
> > +   struct list_head *iova = >iova_list;
> > +   struct vfio_iova *node;
> > +
> > +   list_for_each_entry(node, iova, list) {
> > +   if ((start >= node->start) && (end <= node->end))
> > +   return true;
>  I am now confused by the fact this change will prevent existing QEMU
>  from working with this series on some platforms. For instance QEMU virt
>  machine GPA space collides with Seattle PCI host bridge windows. On
> ARM
>  the smmu and smmuv3 drivers report the PCI host bridge windows as
>  reserved regions which does not seem to be the case on other platforms.
>  The change happened in commit
> >> 273df9635385b2156851c7ee49f40658d7bcb29d
>  (iommu/dma: Make PCI window reservation generic).
> 
>  For background, we already discussed the topic after LPC 2016. See
>  https://www.spinics.net/lists/kernel/msg2379607.html.
> 
>  So is it the right choice to expose PCI host bridge windows as reserved
>  regions? If yes shouldn't we make a difference between those and MSI
>  windows in this series and do not reject any user space DMA_MAP
> attempt
>  within PCI host bridge windows.
> >>>
> >>> If the QEMU machine GPA collides with a reserved region today, then
> >>> either:
> >>>
> >>> a) The mapping through the IOMMU works and the reserved region is
> wrong
> >>>
> >>> or
> >>>
> >>> b) The mapping doesn't actually work, QEMU is at risk of data loss by
> >>> being told that it worked, and we're justified in changing that
> >>> behavior.
> >>>
> >>> Without knowing the specifics of SMMU, it doesn't particularly make
> >>> sense to me to mark the entire PCI hierarchy MMIO range as reserved,
> >>> unless perhaps the IOMMU is incapable of translating those IOVAs.
> >> to me the limitation does not come from the smmu itself, which is a
> >> separate HW block sitting between the root complex and the interconnect.
> >> If ACS is not enforced by the PCIe subsystem, the transaction will never
> >> reach the IOMMU.
> >
> > True. And we do have one such platform where ACS is not enforced but
> > reserving the regions and possibly creating holes while launching VM will
> > make it secure. But I do wonder how we will solve the device grouping
> > in such cases.
> >
> > The Seattle PCI host bridge windows case you mentioned has any pci quirk
> > to claim that they support ACS?
> No there is none to my knowledge. I am applying Alex' not upstream ACS
> overwrite patch.

Ok. But isn't that 

Re: [PATCH 1/5 v2] f2fs: add mount option for segment allocation policy

2018-02-28 Thread Chao Yu
On 2018/2/28 13:09, Jaegeuk Kim wrote:
> Change log from v1:
>  - add doc :)
> 
> This patch adds an mount option, "alloc_mode=%s" having two options, "default"
> and "reuse".
> 
> In "alloc_mode=reuse" case, f2fs starts to allocate segments from 0'th segment
> all the time to reassign segments. It'd be useful for small-sized eMMC parts.
> 
> Signed-off-by: Jaegeuk Kim 

Reviewed-by: Chao Yu 

Thanks,



Re: [PATCH] clk: tegra: fix pllu rate configuration

2018-02-28 Thread Peter De Schrijver
On Tue, Feb 27, 2018 at 02:59:11PM +0300, Dmitry Osipenko wrote:
> On 27.02.2018 02:04, Marcel Ziswiler wrote:
> > On Mon, 2018-02-26 at 15:42 +0300, Dmitry Osipenko wrote:
> >> On 23.02.2018 02:04, Marcel Ziswiler wrote:
> >>> Turns out latest upstream U-Boot does not configure/enable pllu
> >>> which
> >>> leaves it at some default rate of 500 kHz:
> >>>
> >>> root@apalis-t30:~# cat /sys/kernel/debug/clk/clk_summary | grep
> >>> pll_u
> >>>pll_u  330  50  
> >>> 0
> >>>
> >>> Of course this won't quite work leading to the following messages:
> >>>
> >>> [6.559593] usb 2-1: new full-speed USB device number 2 using
> >>> tegra-
> >>> ehci
> >>> [   11.759173] usb 2-1: device descriptor read/64, error -110
> >>> [   27.119453] usb 2-1: device descriptor read/64, error -110
> >>> [   27.389217] usb 2-1: new full-speed USB device number 3 using
> >>> tegra-
> >>> ehci
> >>> [   32.559454] usb 2-1: device descriptor read/64, error -110
> >>> [   47.929777] usb 2-1: device descriptor read/64, error -110
> >>> [   48.049658] usb usb2-port1: attempt power cycle
> >>> [   48.759475] usb 2-1: new full-speed USB device number 4 using
> >>> tegra-
> >>> ehci
> >>> [   59.349457] usb 2-1: device not accepting address 4, error -110
> >>> [   59.509449] usb 2-1: new full-speed USB device number 5 using
> >>> tegra-
> >>> ehci
> >>> [   70.069457] usb 2-1: device not accepting address 5, error -110
> >>> [   70.079721] usb usb2-port1: unable to enumerate USB device
> >>>
> >>> Fix this by actually allowing the rate also being set from within
> >>> the Linux kernel.

I think the best solution to this problem would be to make pll_u a fixed
clock and enable it and program the rate if it's not enabled at boot.
This is how it's done for Tegra210. The reason is that the USB IP blocks
can control the pll_u state in hw. This means that if sw would disable
and then re-enable the pll_u clock, but there is no USB activity, pll_u
will still be disable and therefor not lock, causing an error. Today
this is worked around by not polling the lock bit for pll_u, but a better
solution would be to just remove all sw controls for pll_u.

Peter.


Re: [PATCH v2 08/15] KVM: s390: interface to enable AP execution mode

2018-02-28 Thread David Hildenbrand
On 27.02.2018 15:28, Tony Krowiak wrote:
> Introduces a new interface to enable AP interpretive
> execution (IE) mode for the KVM guest. When running
> with IE mode enabled, AP instructions executed on the
> KVM guest will be interpreted by the firmware and
> passed directly through to an AP device installed on
> the system. The CPU model feature for AP must
> be enabled for the KVM guest in order to enable
> interpretive execution mode.
> 
> This interface will be used in a subsequent patch
> by the VFIO AP device driver.
> 
> Signed-off-by: Tony Krowiak 
> ---
>  arch/s390/include/asm/kvm-ap.h   |2 ++
>  arch/s390/include/asm/kvm_host.h |1 +
>  arch/s390/kvm/kvm-ap.c   |   27 +++
>  arch/s390/kvm/kvm-s390.h |1 +
>  4 files changed, 31 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/s390/include/asm/kvm-ap.h b/arch/s390/include/asm/kvm-ap.h
> index 46e7c5b..6bd6bfb 100644
> --- a/arch/s390/include/asm/kvm-ap.h
> +++ b/arch/s390/include/asm/kvm-ap.h
> @@ -51,4 +51,6 @@ struct kvm_ap_matrix {
>  
>  void kvm_ap_deconfigure_matrix(struct kvm *kvm);
>  
> +int kvm_ap_enable_ie_mode(struct kvm *kvm);
> +
>  #endif /* _ASM_KVM_AP */
> diff --git a/arch/s390/include/asm/kvm_host.h 
> b/arch/s390/include/asm/kvm_host.h
> index a4c77d3..1eebdd6 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -186,6 +186,7 @@ struct kvm_s390_sie_block {
>  #define ECA_AIV  0x0020
>  #define ECA_VX   0x0002
>  #define ECA_PROTEXCI 0x2000
> +#define ECA_APIE 0x0008
>  #define ECA_SII  0x0001
>   __u32   eca;/* 0x004c */
>  #define ICPT_INST0x04
> diff --git a/arch/s390/kvm/kvm-ap.c b/arch/s390/kvm/kvm-ap.c
> index bb29045..862e54b 100644
> --- a/arch/s390/kvm/kvm-ap.c
> +++ b/arch/s390/kvm/kvm-ap.c
> @@ -307,3 +307,30 @@ void kvm_ap_deconfigure_matrix(struct kvm *kvm)
>   kvm_ap_clear_crycb_masks(kvm);
>  }
>  EXPORT_SYMBOL(kvm_ap_deconfigure_matrix);
> +
> +/**
> + * kvm_ap_enable_ie_mode
> + *
> + * Enable interpretrive execution of AP instructions for the guest. When
> + * enabled, AP instructions executed on the guest will be interpreted and
> + * passed through to an AP installed on the host system.
> + *
> + * Returns 0 if interpretrive execution is enabled. Returns -EOPNOTSUPP
> + * if AP facilities are not installed for the guest.
> + *
> + * @kvm: the guest's kvm structure
> + */
> +int kvm_ap_enable_ie_mode(struct kvm *kvm)
> +{
> + int i;
> + struct kvm_vcpu *vcpu;
> +
> + if (!test_kvm_cpu_feat(kvm, KVM_S390_VM_CPU_FEAT_AP))
> + return -EOPNOTSUPP;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm)
> + vcpu->arch.sie_block->eca |= ECA_APIE;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(kvm_ap_enable_ie_mode);
> diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
> index 1b5621f..3142541 100644
> --- a/arch/s390/kvm/kvm-s390.h
> +++ b/arch/s390/kvm/kvm-s390.h
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* Transactional Memory Execution related macros */
>  #define IS_TE_ENABLED(vcpu)  ((vcpu->arch.sie_block->ecb & ECB_TE))
> 

What about VSIE?

-- 

Thanks,

David / dhildenb


[PATCH v3 1/1] x86: Fix Intel Edison module stuck reboot

2018-02-28 Thread Sebastian Panceac
When powering Intel Edison module with 3.3V, the "reboot" command makes
the module stuck.
If the module is powered at a greater voltage, like 4.4V (as the Edison
Mini Breakout board does), reboot works OK.

This patch copies the behaviour from the official Intel Edison BSP,
where "IPCMSG_COLD_RESET" message was sent to SCU by default at
reboot time.

In the official BSP, sending the "IPCMSG_COLD_BOOT" message instead of
"IPCMSG_COLD_RESET" was possible, by using a kernel command
line parameter.

Signed-off-by: Sebastian Panceac 
---

Hi,

I submitted again the patch, after taking into consideration Andy's suggestions.

Regards!

 arch/x86/platform/intel-mid/intel-mid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/intel-mid/intel-mid.c 
b/arch/x86/platform/intel-mid/intel-mid.c
index 2c67bae..fb1df94 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -79,7 +79,7 @@ static void intel_mid_power_off(void)
 
 static void intel_mid_reboot(void)
 {
-   intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+   intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
 }
 
 static unsigned long __init intel_mid_calibrate_tsc(void)
-- 
2.7.4


Re: linux-next: manual merge of the net-next tree with the net tree

2018-02-28 Thread Petr Machata
Stephen Rothwell  writes:

> Today's linux-next merge of the net-next tree got a conflict in:
>
>   net/ipv4/ip_tunnel.c
>
> between commit:
>
>   4e994776e7bd ("ip_tunnel: Do not use mark in skb by default")
>
> from the net tree and commit:
>
>   b0066da52ea5 ("ip_tunnel: Rename & publish init_tunnel_flow")
>
> from the net-next tree.
>
> I fixed it up (see below) and can carry the fix as necessary.

Looks good, thanks!

Petr


Re: [PATCH v2 04/15] KVM: s390: CPU model support for AP virtualization

2018-02-28 Thread David Hildenbrand
On 27.02.2018 15:28, Tony Krowiak wrote:
> Introduces a new CPU model feature and two CPU model
> facilities to support AP virtualization for KVM guests.
> 
> CPU model feature:
> 
> The KVM_S390_VM_CPU_FEAT_AP feature indicates that the
> AP facilities are installed on the KVM guest. This
> feature will be enabled by the kernel only if the AP
> facilities are installed on the linux host. This feature
> must be specifically turned on for the KVM guest from
> userspace to allow guest access to AP devices installed
> on the linux host.
> 
> CPU model facilities:
> 
> 1. AP Query Configuration Information (QCI) facility is installed.
> 
>This is indicated by setting facilities bit 12 for
>the guest. The kernel will not enable this facility
>for the guest if it is not set on the host. This facility
>must not be set by userspace if the KVM_S390_VM_CPU_FEAT_AP
>feature is not installed.
> 
> 2. AP Facilities Test facility (APFT) is installed.
> 
>This is indicated by setting facilities bit 15 for
>the guest. The kernel will not enable this facility for
>the guest if it is not set on the host. This facility
>must not be set by userspace if the KVM_S390_VM_CPU_FEAT_AP
>feature is not installed.
> 
> Reviewed-by: Christian Borntraeger 
> Reviewed-by: Halil Pasic 
> Signed-off-by: Tony Krowiak 
> ---
>  arch/s390/include/uapi/asm/kvm.h |1 +
>  arch/s390/kvm/kvm-s390.c |4 
>  arch/s390/tools/gen_facilities.c |2 ++
>  3 files changed, 7 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/s390/include/uapi/asm/kvm.h 
> b/arch/s390/include/uapi/asm/kvm.h
> index 4cdaa55..a580dec 100644
> --- a/arch/s390/include/uapi/asm/kvm.h
> +++ b/arch/s390/include/uapi/asm/kvm.h
> @@ -130,6 +130,7 @@ struct kvm_s390_vm_cpu_machine {
>  #define KVM_S390_VM_CPU_FEAT_PFMFI   11
>  #define KVM_S390_VM_CPU_FEAT_SIGPIF  12
>  #define KVM_S390_VM_CPU_FEAT_KSS 13
> +#define KVM_S390_VM_CPU_FEAT_AP  14
>  struct kvm_s390_vm_cpu_feat {
>   __u64 feat[16];
>  };
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index de1e299..c68ca86 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -347,6 +347,10 @@ static void kvm_s390_cpu_feat_init(void)
>  
>   if (MACHINE_HAS_ESOP)
>   allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
> +
> + if (ap_instructions_installed()) /* AP instructions installed on host */
> + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_AP);

Don't we have a SIE specific AP feature? So is it true, that once we
have AP instructions, we are allowed to use them for SIE? Isn't there a
"AP interpretation facility" or anything like that? (that unlocks ECA_APIE)

> +
>   /*
>* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
>* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
> diff --git a/arch/s390/tools/gen_facilities.c 
> b/arch/s390/tools/gen_facilities.c
> index 90a8c9e..a52290b 100644
> --- a/arch/s390/tools/gen_facilities.c
> +++ b/arch/s390/tools/gen_facilities.c
> @@ -106,6 +106,8 @@ struct facility_def {
>  
>   .name = "FACILITIES_KVM_CPUMODEL",
>   .bits = (int[]){
> + 12, /* AP Query Configuration Information */
> + 15, /* AP Facilities Test */
>   -1  /* END */
>   }
>   },
> 

So only if this feature is enabled, we later on allow to
kvm_ap_enable_ie_mode, right?

So basically, without this feature:

1. We will never set the execution control ECA_APIE.
2. The masks will always be 0.

Which also results in VSIE never having masks set.


-- 

Thanks,

David / dhildenb


Re: [PATCH] mei: remove dev_err message on an unsupported ioctl

2018-02-28 Thread Greg Kroah-Hartman
On Tue, Feb 27, 2018 at 09:44:41PM +, Winkler, Tomas wrote:
> > 
> > On Tue, Feb 27, 2018 at 05:26:22PM +, Winkler, Tomas wrote:
> > > >
> > > > From: Colin Ian King 
> > > >
> > > > Currently the driver spams the kernel log on unsupported ioctls
> > > > which is unnecessary as the ioctl returns -ENOIOCTLCMD to indicate this
> > anyway.
> > > > I suspect this was originally for debugging purposes but it really
> > > > is not required so remove it.
> > > >
> > >
> > > This is rather strange as most of the legimit applications are using
> > > one IOCTL from kernel 3.0 Do you have any reference where this call is
> > originated from, frankly this is the first time I got such complain.
> > >
> > > In any case it would be maybe better to change it a warn once call.
> > 
> > Just drop the message, it should not be this easy for userspace to spam the
> > kernel log.  The patch looks fine to me.
> > 
> Fair enough, add my ack too.
> 
> Still I'm curious if this comes from the field.

fuzzers are "in the field" :)


Re: [PATCH v2 13/15] KVM: s390: Configure the guest's CRYCB

2018-02-28 Thread David Hildenbrand

> +static int vfio_ap_mdev_open(struct mdev_device *mdev)
> +{
> + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> + unsigned long events;
> + int ret;
> +
> + matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
> + events = VFIO_GROUP_NOTIFY_SET_KVM;
> + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
> +  , _mdev->group_notifier);
> +
> + ret = kvm_ap_configure_matrix(matrix_mdev->kvm,
> +   matrix_mdev->matrix);
> + if (ret)
> + return ret;
> +
> + ret = kvm_ap_enable_ie_mode(matrix_mdev->kvm);

Can't this happen while the guest is already running? Or what hinders us
from doing that?

> +
> + return ret;
> +}
> +
> +static void vfio_ap_mdev_release(struct mdev_device *mdev)

Thanks,

David / dhildenb


[PATCH] net: iucv: Free memory obtained by kzalloc

2018-02-28 Thread Arvind Yadav
Free memory, if afiucv_iucv_init is not successful and
removing a IUCV driver.

Signed-off-by: Arvind Yadav 
---
 net/iucv/af_iucv.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e8cc7b..eb0995a 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -2433,9 +2433,11 @@ static int afiucv_iucv_init(void)
af_iucv_dev->driver = _iucv_driver;
err = device_register(af_iucv_dev);
if (err)
-   goto out_driver;
+   goto out_iucv_dev;
return 0;
 
+out_iucv_dev:
+   kfree(af_iucv_dev);
 out_driver:
driver_unregister(_iucv_driver);
 out_iucv:
@@ -2496,6 +2498,7 @@ static void __exit afiucv_exit(void)
 {
if (pr_iucv) {
device_unregister(af_iucv_dev);
+   kfree(af_iucv_dev);
driver_unregister(_iucv_driver);
pr_iucv->iucv_unregister(_iucv_handler, 0);
symbol_put(iucv_if);
-- 
1.9.1



Re: HRTimer causing rtctest to fail

2018-02-28 Thread Thomas Gleixner
On Wed, 28 Feb 2018, Felipe Balbi wrote:
> Thomas Gleixner  writes:
> > Enable the hrtimer and scheduling tracepoints. That should give you a hint
> > what's going on.
> 
> Thanks, that does give me a lot more information. So here you go:
> 
>  rtctest-1348  [003] d..2   313.766141: hrtimer_start: 
> hrtimer=667ce595 function=rtc_pie_update_irq expires=313573983010 
> softexpires=313573983010 mode=REL
>   -0 [003] d.h1   313.767189: hrtimer_expire_entry: 
> hrtimer=667ce595 function=rtc_pie_update_irq now=313574053764
> 
> We still have a 70754 nS deviation. After changing to absolute time,
> the deviation remains:
>
>   -0 [000] dNh229.303251: hrtimer_start: 
> hrtimer=6858b496 function=rtc_pie_update_irq expires=28765551360 
> softexpires=28765551360 mode=ABS
>   -0 [000] d.h129.303565: hrtimer_expire_entry: 
> hrtimer=6858b496 function=rtc_pie_update_irq now=28765621916

Changing REL/ABS in the kernel does not make a difference because periodic
mode just forwards by period so even if the first timer is started with REL
it results in a absolute timeline. What I meant is the user space
measurement as it cannot figure out when the first event was supposed to
happen so it's hard to calculate latency information.

The interesting information is that the timer fires late and the system is
idle. Now the question is in which idle state did the machine go?

Wake up from deeper C-states can be slow. On my laptop the wakeup latencies
are:

POLL   0
C1 2
C1E   10
C333
C6   133
C7S  166
C8   300
C9   600
C10 2600

All numbers in micro seconds! What happens if you load the system or
restrict C-States?

Thanks,

tglx



<    4   5   6   7   8   9   10   11   12   13   >