[RFC PATCH] usb: host: xhci: plat: add support for otg_set_host() call

2016-12-14 Thread Manish Narani
This patch will add support for OTG host initialization. This will
help OTG drivers to populate their host subsystem.

Signed-off-by: Manish Narani 
---
 drivers/usb/host/xhci-plat.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index ddfab30..aa08bdd 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "xhci.h"
 #include "xhci-plat.h"
@@ -144,6 +145,37 @@ static const struct of_device_id usb_xhci_of_match[] = {
 MODULE_DEVICE_TABLE(of, usb_xhci_of_match);
 #endif
 
+static int usb_otg_set_host(struct device *dev, struct usb_hcd *hcd, bool yes)
+{
+   int ret = 0;
+
+   hcd->usb_phy = usb_get_phy(USB_PHY_TYPE_USB3);
+   if (!IS_ERR_OR_NULL(hcd->usb_phy) && hcd->usb_phy->otg) {
+   dev_dbg(dev, "%s otg support available\n", __func__);
+   if (yes) {
+   if (otg_set_host(hcd->usb_phy->otg, >self)) {
+   dev_err(dev, "%s otg_set_host failed\n",
+   __func__);
+   usb_put_phy(hcd->usb_phy);
+   goto disable_phy;
+   }
+   } else {
+   ret = otg_set_host(hcd->usb_phy->otg, NULL);
+   usb_put_phy(hcd->usb_phy);
+   goto disable_phy;
+   }
+
+   } else
+   goto disable_phy;
+
+   return 0;
+
+disable_phy:
+   hcd->usb_phy = NULL;
+
+   return ret;
+}
+
 static int xhci_plat_probe(struct platform_device *pdev)
 {
const struct of_device_id *match;
@@ -255,6 +287,11 @@ static int xhci_plat_probe(struct platform_device *pdev)
if (ret)
goto dealloc_usb2_hcd;
 
+   ret = usb_otg_set_host(>dev, hcd, 1);
+   if (ret)
+   goto dealloc_usb2_hcd;
+
+
return 0;
 
 
@@ -283,6 +320,8 @@ static int xhci_plat_remove(struct platform_device *dev)
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
struct clk *clk = xhci->clk;
 
+   usb_otg_set_host(>dev, hcd, 0);
+
usb_remove_hcd(xhci->shared_hcd);
usb_phy_shutdown(hcd->usb_phy);
 
-- 
2.1.1



[RFC PATCH] usb: host: xhci: plat: add support for otg_set_host() call

2016-12-14 Thread Manish Narani
This patch will add support for OTG host initialization. This will
help OTG drivers to populate their host subsystem.

Signed-off-by: Manish Narani 
---
 drivers/usb/host/xhci-plat.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index ddfab30..aa08bdd 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "xhci.h"
 #include "xhci-plat.h"
@@ -144,6 +145,37 @@ static const struct of_device_id usb_xhci_of_match[] = {
 MODULE_DEVICE_TABLE(of, usb_xhci_of_match);
 #endif
 
+static int usb_otg_set_host(struct device *dev, struct usb_hcd *hcd, bool yes)
+{
+   int ret = 0;
+
+   hcd->usb_phy = usb_get_phy(USB_PHY_TYPE_USB3);
+   if (!IS_ERR_OR_NULL(hcd->usb_phy) && hcd->usb_phy->otg) {
+   dev_dbg(dev, "%s otg support available\n", __func__);
+   if (yes) {
+   if (otg_set_host(hcd->usb_phy->otg, >self)) {
+   dev_err(dev, "%s otg_set_host failed\n",
+   __func__);
+   usb_put_phy(hcd->usb_phy);
+   goto disable_phy;
+   }
+   } else {
+   ret = otg_set_host(hcd->usb_phy->otg, NULL);
+   usb_put_phy(hcd->usb_phy);
+   goto disable_phy;
+   }
+
+   } else
+   goto disable_phy;
+
+   return 0;
+
+disable_phy:
+   hcd->usb_phy = NULL;
+
+   return ret;
+}
+
 static int xhci_plat_probe(struct platform_device *pdev)
 {
const struct of_device_id *match;
@@ -255,6 +287,11 @@ static int xhci_plat_probe(struct platform_device *pdev)
if (ret)
goto dealloc_usb2_hcd;
 
+   ret = usb_otg_set_host(>dev, hcd, 1);
+   if (ret)
+   goto dealloc_usb2_hcd;
+
+
return 0;
 
 
@@ -283,6 +320,8 @@ static int xhci_plat_remove(struct platform_device *dev)
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
struct clk *clk = xhci->clk;
 
+   usb_otg_set_host(>dev, hcd, 0);
+
usb_remove_hcd(xhci->shared_hcd);
usb_phy_shutdown(hcd->usb_phy);
 
-- 
2.1.1



Re: [PATCH 2/3] perf/x86/pebs: add workaround for broken OVFL status on HSW

2016-12-14 Thread Jiri Olsa
On Wed, Dec 14, 2016 at 11:26:49PM -0800, Stephane Eranian wrote:
> On Wed, Dec 14, 2016 at 9:55 AM, Peter Zijlstra  wrote:
> >
> > Just spotted this again, ping?
> >
> Ok, on what processor running what command, so I can try and reproduce?

for me it's snb_x (model 45) and peter's ivb-ep model 62

after several hours of fuzzer test, log below.. I'll try again with the change

jirka


---
[14404.947844] perfevents: irq loop stuck!
[14404.952560] [ cut here ]
[14404.957720] WARNING: CPU: 0 PID: 0 at arch/x86/events/intel/core.c:2093 
intel_pmu_handle_irq+0x2f8/0x4c0
[14404.968305] Modules linked in:\x01c intel_rapl\x01c sb_edac\x01c 
edac_core\x01c x86_pkg_temp_thermal\x01c intel_powerclamp\x01c coretemp
\x01c ipmi_devintf\x01c crct10dif_pclmul\x01c crc32_pclmul\x01c iTCO_wdt\x01c 
iTCO_vendor_support\x01c ghash_clmulni_intel\x01c pcspkr\x01c
 ipmi_ssif\x01c tpm_tis\x01c i2c_i801\x01c tpm_tis_core\x01c ipmi_si\x01c 
tpm\x01c i2c_smbus\x01c ipmi_msghandler\x01c cdc_ether\x01c usbne
t\x01c mii\x01c shpchp\x01c ioatdma\x01c wmi\x01c lpc_ich\x01c xfs\x01c 
libcrc32c\x01c mgag200\x01c drm_kms_helper\x01c ttm\x01c drm\x01c i
gb\x01c ptp\x01c crc32c_intel\x01c pps_core\x01c dca\x01c i2c_algo_bit\x01c 
megaraid_sas\x01c fjes\x01c
[14405.019901] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.0-rc8+ #51
[14405.026985] Hardware name: IBM System x3650 M4 : -[7915E2G]-/00Y7683, BIOS 
-[VVE124AUS-1.30]- 11/21/2012
[14405.037568]  880277a05b08\x01c 81463243\x01c 
880277a05b58\x01c \x01c
[14405.046601]  880277a05b48\x01c 810b698b\x01c 
082d81133a1d\x01c 0064\x01c
[14405.055634]  880277a0a380\x01c 880276208800\x01c 
0040\x01c 880277a0a580\x01c
[14405.064665] Call Trace:
[14405.067394][] dump_stack+0x86/0xc3
[14405.073807]  [] __warn+0xcb/0xf0
[14405.079156]  [] warn_slowpath_fmt+0x5f/0x80
[14405.085569]  [] ? warn_slowpath_fmt+0x5/0x80
[14405.092081]  [] intel_pmu_handle_irq+0x2f8/0x4c0
[14405.098971]  [] ? perf_event_nmi_handler+0x2c/0x50
[14405.106065]  [] ? intel_pmu_save_and_restart+0x50/0x50
[14405.113547]  [] ? nmi_raise_cpu_backtrace+0x20/0x20
[14405.120737]  [] ? ftrace_ops_test.isra.23+0x65/0xa0
[14405.127917]  [] ? bsearch+0x5e/0x90
[14405.133556]  [] ? __add_hash_entry+0x50/0x50
[14405.140066]  [] ? bsearch+0x5e/0x90
[14405.145704]  [] ? __add_hash_entry+0x50/0x50
[14405.152214]  [] ? nmi_raise_cpu_backtrace+0x20/0x20
[14405.159403]  [] ? nmi_raise_cpu_backtrace+0x20/0x20
[14405.166594]  [] ? debug_lockdep_rcu_enabled+0x1d/0x20
[14405.173979]  [] ? ftrace_ops_list_func+0xce/0x1d0
[14405.180974]  [] ? ftrace_call+0x5/0x34
[14405.186904]  [] ? ftrace_call+0x5/0x34
[14405.192824]  [] ? printk_nmi_enter+0x20/0x20
[14405.199337]  [] ? intel_pmu_handle_irq+0x5/0x4c0
[14405.206235]  [] ? perf_event_nmi_handler+0x5/0x50
[14405.213231]  [] perf_event_nmi_handler+0x2c/0x50
[14405.220121]  [] nmi_handle+0xbd/0x2e0
[14405.225954]  [] ? nmi_handle+0x5/0x2e0
[14405.231875]  [] ? nmi_handle+0x5/0x2e0
[14405.237804]  [] default_do_nmi+0x53/0x100
[14405.244025]  [] do_nmi+0x11f/0x170
[14405.249557]  [] end_repeat_nmi+0x1a/0x1e
[14405.255680]  [] ? native_write_msr+0x6/0x30
[14405.262093]  [] ? native_write_msr+0x6/0x30
[14405.268507]  [] ? native_write_msr+0x6/0x30
[14405.274914]  [] ? 
intel_pmu_pebs_enable_all+0x34/0x40
[14405.283656]  [] 
__intel_pmu_enable_all.constprop.17+0x23/0xa0
[14405.291815]  [] intel_pmu_enable_all+0x10/0x20
[14405.298520]  [] x86_pmu_enable+0x256/0x2e0
[14405.304836]  [] perf_pmu_enable.part.86+0x7/0x10
[14405.311736]  [] perf_mux_hrtimer_handler+0x22e/0x2c0
[14405.319014]  [] __hrtimer_run_queues+0xfb/0x510
[14405.325808]  [] ? ctx_resched+0x90/0x90
[14405.331834]  [] hrtimer_interrupt+0x9d/0x1a0
[14405.338343]  [] local_apic_timer_interrupt+0x38/0x60
[14405.345629]  [] smp_trace_apic_timer_interrupt+0x5b/0x25f
[14405.353402]  [] trace_apic_timer_interrupt+0x96/0xa0
[14405.360689][] ? cpuidle_enter_state+0x124/0x380
[14405.368354]  [] ? cpuidle_enter_state+0x120/0x380
[14405.375349]  [] cpuidle_enter+0x17/0x20
[14405.381375]  [] call_cpuidle+0x23/0x40
[14405.387303]  [] cpu_startup_entry+0x160/0x250
[14405.393910]  [] rest_init+0x135/0x140
[14405.399743]  [] start_kernel+0x45e/0x47f
[14405.405866]  [] ? early_idt_handler_array+0x120/0x120
[14405.413250]  [] x86_64_start_reservations+0x2a/0x2c
[14405.420432]  [] x86_64_start_kernel+0x14c/0x16f
[14405.427224] ---[ end trace 62b08c15aaa2825d ]---
[14405.432378] 
[14405.434043] CPU#0: ctrl:   
[14405.439099] CPU#0: status: 0008
[14405.444157] CPU#0: overflow:   
[14405.449214] CPU#0: fixed:  00b0
[14405.454271] CPU#0: pebs:   
[14405.459326] CPU#0: debugctl:   
[14405.464383] CPU#0: active: 0002000f
[14405.469431] CPU#0:   gen-PMC0 ctrl:  01d301b1
[14405.475069] CPU#0:   gen-PMC0 count: 

Re: [PATCH 2/3] perf/x86/pebs: add workaround for broken OVFL status on HSW

2016-12-14 Thread Jiri Olsa
On Wed, Dec 14, 2016 at 11:26:49PM -0800, Stephane Eranian wrote:
> On Wed, Dec 14, 2016 at 9:55 AM, Peter Zijlstra  wrote:
> >
> > Just spotted this again, ping?
> >
> Ok, on what processor running what command, so I can try and reproduce?

for me it's snb_x (model 45) and peter's ivb-ep model 62

after several hours of fuzzer test, log below.. I'll try again with the change

jirka


---
[14404.947844] perfevents: irq loop stuck!
[14404.952560] [ cut here ]
[14404.957720] WARNING: CPU: 0 PID: 0 at arch/x86/events/intel/core.c:2093 
intel_pmu_handle_irq+0x2f8/0x4c0
[14404.968305] Modules linked in:\x01c intel_rapl\x01c sb_edac\x01c 
edac_core\x01c x86_pkg_temp_thermal\x01c intel_powerclamp\x01c coretemp
\x01c ipmi_devintf\x01c crct10dif_pclmul\x01c crc32_pclmul\x01c iTCO_wdt\x01c 
iTCO_vendor_support\x01c ghash_clmulni_intel\x01c pcspkr\x01c
 ipmi_ssif\x01c tpm_tis\x01c i2c_i801\x01c tpm_tis_core\x01c ipmi_si\x01c 
tpm\x01c i2c_smbus\x01c ipmi_msghandler\x01c cdc_ether\x01c usbne
t\x01c mii\x01c shpchp\x01c ioatdma\x01c wmi\x01c lpc_ich\x01c xfs\x01c 
libcrc32c\x01c mgag200\x01c drm_kms_helper\x01c ttm\x01c drm\x01c i
gb\x01c ptp\x01c crc32c_intel\x01c pps_core\x01c dca\x01c i2c_algo_bit\x01c 
megaraid_sas\x01c fjes\x01c
[14405.019901] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.0-rc8+ #51
[14405.026985] Hardware name: IBM System x3650 M4 : -[7915E2G]-/00Y7683, BIOS 
-[VVE124AUS-1.30]- 11/21/2012
[14405.037568]  880277a05b08\x01c 81463243\x01c 
880277a05b58\x01c \x01c
[14405.046601]  880277a05b48\x01c 810b698b\x01c 
082d81133a1d\x01c 0064\x01c
[14405.055634]  880277a0a380\x01c 880276208800\x01c 
0040\x01c 880277a0a580\x01c
[14405.064665] Call Trace:
[14405.067394][] dump_stack+0x86/0xc3
[14405.073807]  [] __warn+0xcb/0xf0
[14405.079156]  [] warn_slowpath_fmt+0x5f/0x80
[14405.085569]  [] ? warn_slowpath_fmt+0x5/0x80
[14405.092081]  [] intel_pmu_handle_irq+0x2f8/0x4c0
[14405.098971]  [] ? perf_event_nmi_handler+0x2c/0x50
[14405.106065]  [] ? intel_pmu_save_and_restart+0x50/0x50
[14405.113547]  [] ? nmi_raise_cpu_backtrace+0x20/0x20
[14405.120737]  [] ? ftrace_ops_test.isra.23+0x65/0xa0
[14405.127917]  [] ? bsearch+0x5e/0x90
[14405.133556]  [] ? __add_hash_entry+0x50/0x50
[14405.140066]  [] ? bsearch+0x5e/0x90
[14405.145704]  [] ? __add_hash_entry+0x50/0x50
[14405.152214]  [] ? nmi_raise_cpu_backtrace+0x20/0x20
[14405.159403]  [] ? nmi_raise_cpu_backtrace+0x20/0x20
[14405.166594]  [] ? debug_lockdep_rcu_enabled+0x1d/0x20
[14405.173979]  [] ? ftrace_ops_list_func+0xce/0x1d0
[14405.180974]  [] ? ftrace_call+0x5/0x34
[14405.186904]  [] ? ftrace_call+0x5/0x34
[14405.192824]  [] ? printk_nmi_enter+0x20/0x20
[14405.199337]  [] ? intel_pmu_handle_irq+0x5/0x4c0
[14405.206235]  [] ? perf_event_nmi_handler+0x5/0x50
[14405.213231]  [] perf_event_nmi_handler+0x2c/0x50
[14405.220121]  [] nmi_handle+0xbd/0x2e0
[14405.225954]  [] ? nmi_handle+0x5/0x2e0
[14405.231875]  [] ? nmi_handle+0x5/0x2e0
[14405.237804]  [] default_do_nmi+0x53/0x100
[14405.244025]  [] do_nmi+0x11f/0x170
[14405.249557]  [] end_repeat_nmi+0x1a/0x1e
[14405.255680]  [] ? native_write_msr+0x6/0x30
[14405.262093]  [] ? native_write_msr+0x6/0x30
[14405.268507]  [] ? native_write_msr+0x6/0x30
[14405.274914]  [] ? 
intel_pmu_pebs_enable_all+0x34/0x40
[14405.283656]  [] 
__intel_pmu_enable_all.constprop.17+0x23/0xa0
[14405.291815]  [] intel_pmu_enable_all+0x10/0x20
[14405.298520]  [] x86_pmu_enable+0x256/0x2e0
[14405.304836]  [] perf_pmu_enable.part.86+0x7/0x10
[14405.311736]  [] perf_mux_hrtimer_handler+0x22e/0x2c0
[14405.319014]  [] __hrtimer_run_queues+0xfb/0x510
[14405.325808]  [] ? ctx_resched+0x90/0x90
[14405.331834]  [] hrtimer_interrupt+0x9d/0x1a0
[14405.338343]  [] local_apic_timer_interrupt+0x38/0x60
[14405.345629]  [] smp_trace_apic_timer_interrupt+0x5b/0x25f
[14405.353402]  [] trace_apic_timer_interrupt+0x96/0xa0
[14405.360689][] ? cpuidle_enter_state+0x124/0x380
[14405.368354]  [] ? cpuidle_enter_state+0x120/0x380
[14405.375349]  [] cpuidle_enter+0x17/0x20
[14405.381375]  [] call_cpuidle+0x23/0x40
[14405.387303]  [] cpu_startup_entry+0x160/0x250
[14405.393910]  [] rest_init+0x135/0x140
[14405.399743]  [] start_kernel+0x45e/0x47f
[14405.405866]  [] ? early_idt_handler_array+0x120/0x120
[14405.413250]  [] x86_64_start_reservations+0x2a/0x2c
[14405.420432]  [] x86_64_start_kernel+0x14c/0x16f
[14405.427224] ---[ end trace 62b08c15aaa2825d ]---
[14405.432378] 
[14405.434043] CPU#0: ctrl:   
[14405.439099] CPU#0: status: 0008
[14405.444157] CPU#0: overflow:   
[14405.449214] CPU#0: fixed:  00b0
[14405.454271] CPU#0: pebs:   
[14405.459326] CPU#0: debugctl:   
[14405.464383] CPU#0: active: 0002000f
[14405.469431] CPU#0:   gen-PMC0 ctrl:  01d301b1
[14405.475069] CPU#0:   gen-PMC0 count: 800090b1c37e

Re: [PATCH v2 01/46] mtdpart: Propagate _get/put_device()

2016-12-14 Thread Richard Weinberger
On 15.12.2016 08:09, Karl Beldan wrote:
>>> I think this should also go into -stable.
>>
>> Why? Do you have real use cases that are broken by this? I understand
> 
> I do, some code adding partitions on a gluebi master.

What exactly are you doing?

>> this is a problem, but I'm curious on how this satisfies the stable
>> rules.
>>
>> Also, note that this isn't a regression; it's been broken forever and
>> apparently no one noticed. IMO that raises the bar a bit (but not
>> impossibly so) for -stable.
>>
> 
> I just encountered the bug yesterday and yes it is obvious it has been
> broken forever.
> I don't have strong opinion about these things so no worries.

If existing stuff is broken, and you can trigger it. Please let us
know. Then it should go into -stable.

Thanks,
//richard


Re: [PATCH v2 01/46] mtdpart: Propagate _get/put_device()

2016-12-14 Thread Richard Weinberger
On 15.12.2016 08:09, Karl Beldan wrote:
>>> I think this should also go into -stable.
>>
>> Why? Do you have real use cases that are broken by this? I understand
> 
> I do, some code adding partitions on a gluebi master.

What exactly are you doing?

>> this is a problem, but I'm curious on how this satisfies the stable
>> rules.
>>
>> Also, note that this isn't a regression; it's been broken forever and
>> apparently no one noticed. IMO that raises the bar a bit (but not
>> impossibly so) for -stable.
>>
> 
> I just encountered the bug yesterday and yes it is obvious it has been
> broken forever.
> I don't have strong opinion about these things so no worries.

If existing stuff is broken, and you can trigger it. Please let us
know. Then it should go into -stable.

Thanks,
//richard


Re: [PATCH] vfio-mdev: Fix mtty sample driver building

2016-12-14 Thread Kirti Wankhede
On 12/15/2016 1:09 AM, Alex Williamson wrote:
> This sample driver was originally under Documentation/ and was moved
> to samples, but build support was never adjusted for the new location.
> 
> Signed-off-by: Alex Williamson 
> ---
>  samples/Kconfig|7 +++
>  samples/Makefile   |3 ++-
>  samples/vfio-mdev/Makefile |   14 +-
>  3 files changed, 10 insertions(+), 14 deletions(-)
> 

This change would bring this example driver in Kernel build system if
selected. I do have verified build and tested it on x86_64 platform.
Have you verified x86 and PPC/ARM build (which I think primarily could
use VFIO)?

Thanks,
Kirti

> diff --git a/samples/Kconfig b/samples/Kconfig
> index a6d2a43..b124f62 100644
> --- a/samples/Kconfig
> +++ b/samples/Kconfig
> @@ -105,4 +105,11 @@ config SAMPLE_BLACKFIN_GPTIMERS
>   help
> Build samples of blackfin gptimers sample module.
>  
> +config SAMPLE_VFIO_MDEV_MTTY
> + tristate "Build VFIO mtty example mediated device sample code -- 
> loadable modules only"
> + depends on VFIO_MDEV_DEVICE && m
> + help
> +   Build a virtual tty sample driver for use as a VFIO
> +   mediated device
> +
>  endif # SAMPLES
> diff --git a/samples/Makefile b/samples/Makefile
> index e17d66d..86a137e 100644
> --- a/samples/Makefile
> +++ b/samples/Makefile
> @@ -2,4 +2,5 @@
>  
>  obj-$(CONFIG_SAMPLES)+= kobject/ kprobes/ trace_events/ livepatch/ \
>  hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
> -configfs/ connector/ v4l/ trace_printk/ blackfin/
> +configfs/ connector/ v4l/ trace_printk/ blackfin/ \
> +vfio-mdev/
> diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile
> index a932edb..cbbd868 100644
> --- a/samples/vfio-mdev/Makefile
> +++ b/samples/vfio-mdev/Makefile
> @@ -1,13 +1 @@
> -#
> -# Makefile for mtty.c file
> -#
> -KERNEL_DIR:=/lib/modules/$(shell uname -r)/build
> -
> -obj-m:=mtty.o
> -
> -modules clean modules_install:
> - $(MAKE) -C $(KERNEL_DIR) SUBDIRS=$(PWD) $@
> -
> -default: modules
> -
> -module: modules
> +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o
> 


Re: [PATCH] vfio-mdev: Fix mtty sample driver building

2016-12-14 Thread Kirti Wankhede
On 12/15/2016 1:09 AM, Alex Williamson wrote:
> This sample driver was originally under Documentation/ and was moved
> to samples, but build support was never adjusted for the new location.
> 
> Signed-off-by: Alex Williamson 
> ---
>  samples/Kconfig|7 +++
>  samples/Makefile   |3 ++-
>  samples/vfio-mdev/Makefile |   14 +-
>  3 files changed, 10 insertions(+), 14 deletions(-)
> 

This change would bring this example driver in Kernel build system if
selected. I do have verified build and tested it on x86_64 platform.
Have you verified x86 and PPC/ARM build (which I think primarily could
use VFIO)?

Thanks,
Kirti

> diff --git a/samples/Kconfig b/samples/Kconfig
> index a6d2a43..b124f62 100644
> --- a/samples/Kconfig
> +++ b/samples/Kconfig
> @@ -105,4 +105,11 @@ config SAMPLE_BLACKFIN_GPTIMERS
>   help
> Build samples of blackfin gptimers sample module.
>  
> +config SAMPLE_VFIO_MDEV_MTTY
> + tristate "Build VFIO mtty example mediated device sample code -- 
> loadable modules only"
> + depends on VFIO_MDEV_DEVICE && m
> + help
> +   Build a virtual tty sample driver for use as a VFIO
> +   mediated device
> +
>  endif # SAMPLES
> diff --git a/samples/Makefile b/samples/Makefile
> index e17d66d..86a137e 100644
> --- a/samples/Makefile
> +++ b/samples/Makefile
> @@ -2,4 +2,5 @@
>  
>  obj-$(CONFIG_SAMPLES)+= kobject/ kprobes/ trace_events/ livepatch/ \
>  hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
> -configfs/ connector/ v4l/ trace_printk/ blackfin/
> +configfs/ connector/ v4l/ trace_printk/ blackfin/ \
> +vfio-mdev/
> diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile
> index a932edb..cbbd868 100644
> --- a/samples/vfio-mdev/Makefile
> +++ b/samples/vfio-mdev/Makefile
> @@ -1,13 +1 @@
> -#
> -# Makefile for mtty.c file
> -#
> -KERNEL_DIR:=/lib/modules/$(shell uname -r)/build
> -
> -obj-m:=mtty.o
> -
> -modules clean modules_install:
> - $(MAKE) -C $(KERNEL_DIR) SUBDIRS=$(PWD) $@
> -
> -default: modules
> -
> -module: modules
> +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o
> 


Re: [PATCH V2] Coccinelle: check usleep_range() usage

2016-12-14 Thread Julia Lawall


On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:

> On Thu, Dec 15, 2016 at 06:52:28AM +0100, Julia Lawall wrote:
> >
> >
> > On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:
> >
> > > Documentation/timers/timers-howto.txt outlines the intended usage of
> > > usleep_range(), this spatch tries to locate missuse/out-of-spec cases.
> > >
> > > Signed-off-by: Nicholas Mc Guire <hof...@osadl.org>
> > > ---
> > > V2: added context mode as suggested by Julia Lawall <julia.law...@lip6.fr>
> > > added min
> > > added in the range checks as they are resonably reliable based on
> > > a review of all 1648 call sites of usleep_range()
> > >
> > > 1648 calls total
> > > 1488 pass numeric values only (90.29%)
> > >   27 min below 10us (1.81%)
> > >   40 min above 10ms (2.68%)
> > >  min out of spec 4.50%
> > >   76 preprocessor constants (4.61%)
> > >1 min below 10us (1.31%)
> > >8 min above 10ms (10.52%)
> > >  min out of spec 11.84%
> > >   85 expressions (5.15%)
> > > 1(0) min below 10us (1.50%)*
> > > 6(2) min above 10ms (7.50%)*
> > >  min out of spec 9.0%
> > > Errors:
> > >   23 where min==max  (1.39%)
> > >0 where max < min (0.00%)
> > >
> > > Total:
> > >   Bugs: 6.48%-10.70%*
> > >   Crit: 3.09%-3.15%* (min < 10, min==max, max < min)
> > >   Detectable by coccinelle:
> > >   Bugs: 74/103 (71.8%)
> > >   Crit: 50/52 (96.1%)
> > > * numbers estimated based on code review
> > >
> > > Patch is againts 4.9.0 (localversion-next is next-20161214)
> > >
> > >  scripts/coccinelle/api/bad_usleep_range.cocci | 88 
> > > +++
> > >  1 file changed, 88 insertions(+)
> > >  create mode 100644 scripts/coccinelle/api/bad_usleep_range.cocci
> > >
> > > diff --git a/scripts/coccinelle/api/bad_usleep_range.cocci 
> > > b/scripts/coccinelle/api/bad_usleep_range.cocci
> > > new file mode 100644
> > > index 000..003e9ef
> > > --- /dev/null
> > > +++ b/scripts/coccinelle/api/bad_usleep_range.cocci
> > > @@ -0,0 +1,88 @@
> > > +/// report bad/problematic usleep_range usage
> > > +//
> > > +// This is a checker for the documented intended use of usleep_range
> > > +// see: Documentation/timers/timers-howto.txt and
> > > +// Link: http://lkml.org/lkml/2016/11/29/54 for some notes on
> > > +//   when mdelay might not be a suitable replacement
> > > +//
> > > +// Limitations:
> > > +//  * The numeric limits are only checked when numeric constants are in
> > > +//use (as of 4.9.0 thats 90.29% of the calls) no constant folding
> > > +//is done - so this can miss some out-of-range cases - but in 4.9.0
> > > +//it was catching 74 of the 103 bad cases (71.8%) and 50 of 52
> > > +//(96.1%) of the critical cases (min < 10 and min==max - there
> > > +//  * There may be RT use-cases where both min < 10 and min==max)
> > > +//justified (e.g. high-throughput drivers on a shielded core)
> > > +//
> > > +// 1) warn if min == max
> > > +//
> > > +//  The problem is that usleep_range is calculating the delay by
> > > +//  exp = ktime_add_us(ktime_get(), min)
> > > +//  delta = (u64)(max - min) * NSEC_PER_USEC
> > > +//  so delta is set to 0 if min==max
> > > +//  and then calls
> > > +//  schedule_hrtimeout_range(exp, 0,...)
> > > +//  effectively this means that the clock subsystem has no room to
> > > +//  optimize. usleep_range() is in non-atomic context so a 0 range
> > > +//  makes very little sense as the task can be preempted anyway so
> > > +//  there is no guarantee that the 0 range would be adding much
> > > +//  precision - it just removes optimization potential, so it probably
> > > +//  never really makes sense.
> > > +//
> > > +// 2) warn if min < 10 or min > 20ms
> > > +//
> > > +//  it makes little sense to use a non-atomic call for very short
> > > +//  delays because the scheduling jitter will most likely exceed
> > > +//  this limit - udelay() makes more sense in that case. For very
> > > +//  large delays using hrtimers is useless as preemption becomes
> > > +//  quite likely resulting in high inaccuracy anyway - so use
> > > +//  jiffies based msleep and don't burden the hrtimer subsystem.
> &

Re: [PATCH V2] Coccinelle: check usleep_range() usage

2016-12-14 Thread Julia Lawall


On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:

> On Thu, Dec 15, 2016 at 06:52:28AM +0100, Julia Lawall wrote:
> >
> >
> > On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:
> >
> > > Documentation/timers/timers-howto.txt outlines the intended usage of
> > > usleep_range(), this spatch tries to locate missuse/out-of-spec cases.
> > >
> > > Signed-off-by: Nicholas Mc Guire 
> > > ---
> > > V2: added context mode as suggested by Julia Lawall 
> > > added min
> > > added in the range checks as they are resonably reliable based on
> > > a review of all 1648 call sites of usleep_range()
> > >
> > > 1648 calls total
> > > 1488 pass numeric values only (90.29%)
> > >   27 min below 10us (1.81%)
> > >   40 min above 10ms (2.68%)
> > >  min out of spec 4.50%
> > >   76 preprocessor constants (4.61%)
> > >1 min below 10us (1.31%)
> > >8 min above 10ms (10.52%)
> > >  min out of spec 11.84%
> > >   85 expressions (5.15%)
> > > 1(0) min below 10us (1.50%)*
> > > 6(2) min above 10ms (7.50%)*
> > >  min out of spec 9.0%
> > > Errors:
> > >   23 where min==max  (1.39%)
> > >0 where max < min (0.00%)
> > >
> > > Total:
> > >   Bugs: 6.48%-10.70%*
> > >   Crit: 3.09%-3.15%* (min < 10, min==max, max < min)
> > >   Detectable by coccinelle:
> > >   Bugs: 74/103 (71.8%)
> > >   Crit: 50/52 (96.1%)
> > > * numbers estimated based on code review
> > >
> > > Patch is againts 4.9.0 (localversion-next is next-20161214)
> > >
> > >  scripts/coccinelle/api/bad_usleep_range.cocci | 88 
> > > +++
> > >  1 file changed, 88 insertions(+)
> > >  create mode 100644 scripts/coccinelle/api/bad_usleep_range.cocci
> > >
> > > diff --git a/scripts/coccinelle/api/bad_usleep_range.cocci 
> > > b/scripts/coccinelle/api/bad_usleep_range.cocci
> > > new file mode 100644
> > > index 000..003e9ef
> > > --- /dev/null
> > > +++ b/scripts/coccinelle/api/bad_usleep_range.cocci
> > > @@ -0,0 +1,88 @@
> > > +/// report bad/problematic usleep_range usage
> > > +//
> > > +// This is a checker for the documented intended use of usleep_range
> > > +// see: Documentation/timers/timers-howto.txt and
> > > +// Link: http://lkml.org/lkml/2016/11/29/54 for some notes on
> > > +//   when mdelay might not be a suitable replacement
> > > +//
> > > +// Limitations:
> > > +//  * The numeric limits are only checked when numeric constants are in
> > > +//use (as of 4.9.0 thats 90.29% of the calls) no constant folding
> > > +//is done - so this can miss some out-of-range cases - but in 4.9.0
> > > +//it was catching 74 of the 103 bad cases (71.8%) and 50 of 52
> > > +//(96.1%) of the critical cases (min < 10 and min==max - there
> > > +//  * There may be RT use-cases where both min < 10 and min==max)
> > > +//justified (e.g. high-throughput drivers on a shielded core)
> > > +//
> > > +// 1) warn if min == max
> > > +//
> > > +//  The problem is that usleep_range is calculating the delay by
> > > +//  exp = ktime_add_us(ktime_get(), min)
> > > +//  delta = (u64)(max - min) * NSEC_PER_USEC
> > > +//  so delta is set to 0 if min==max
> > > +//  and then calls
> > > +//  schedule_hrtimeout_range(exp, 0,...)
> > > +//  effectively this means that the clock subsystem has no room to
> > > +//  optimize. usleep_range() is in non-atomic context so a 0 range
> > > +//  makes very little sense as the task can be preempted anyway so
> > > +//  there is no guarantee that the 0 range would be adding much
> > > +//  precision - it just removes optimization potential, so it probably
> > > +//  never really makes sense.
> > > +//
> > > +// 2) warn if min < 10 or min > 20ms
> > > +//
> > > +//  it makes little sense to use a non-atomic call for very short
> > > +//  delays because the scheduling jitter will most likely exceed
> > > +//  this limit - udelay() makes more sense in that case. For very
> > > +//  large delays using hrtimers is useless as preemption becomes
> > > +//  quite likely resulting in high inaccuracy anyway - so use
> > > +//  jiffies based msleep and don't burden the hrtimer subsystem.
> > > +//
> > > +// 3) warn if max < mi

Re: [PATCH 03/39] ARM: dts: armada-370: Correct license text

2016-12-14 Thread Uwe Kleine-König
On Wed, Dec 14, 2016 at 11:37:10PM +0100, Alexandre Belloni wrote:
> The license test has been mangled at some point then copy pasted across
> multiple files. Restore it to what it should be.
> Note that this is not intended as a license change.
> 
> Cc: Arnaud Ebalard 
> Cc: Ben Dooks 
> Cc: Boris Brezillon 
> Cc: Ezequiel Garcia 
> Cc: Lior Amsalem 
> Cc: Ryan Press 
> Cc: Simon Guinot 
> Cc: Stefan Roese 
> Cc: Thomas Petazzoni 
> Cc: Uwe Kleine-König 
Acked-by: Uwe Kleine-König 

> Cc: Yehuda Yitschak 
> Signed-off-by: Alexandre Belloni 
> ---
>  arch/arm/boot/dts/armada-370.dtsi | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm/boot/dts/armada-370.dtsi 
> b/arch/arm/boot/dts/armada-370.dtsi
> index b4258105e91f..c057c54f6b36 100644
> --- a/arch/arm/boot/dts/armada-370.dtsi
> +++ b/arch/arm/boot/dts/armada-370.dtsi
> @@ -17,17 +17,17 @@
>   * published by the Free Software Foundation; either version 2 of the
>   * License, or (at your option) any later version.
>   *
> - * This file is distributed in the hope that it will be useful
> + * This file is distributed in the hope that it will be useful,
>   * but WITHOUT ANY WARRANTY; without even the implied warranty of
>   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>   * GNU General Public License for more details.
>   *
> - * Or, alternatively
> + * Or, alternatively,
>   *
>   *  b) Permission is hereby granted, free of charge, to any person
>   * obtaining a copy of this software and associated documentation
>   * files (the "Software"), to deal in the Software without
> - * restriction, including without limitation the rights to use
> + * restriction, including without limitation the rights to use,
>   * copy, modify, merge, publish, distribute, sublicense, and/or
>   * sell copies of the Software, and to permit persons to whom the
>   * Software is furnished to do so, subject to the following
> @@ -36,11 +36,11 @@
>   * The above copyright notice and this permission notice shall be
>   * included in all copies or substantial portions of the Software.
>   *
> - * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
>   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
> -- 
> 2.10.2
> 
> 

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | http://www.pengutronix.de/  |


Re: [PATCH 03/39] ARM: dts: armada-370: Correct license text

2016-12-14 Thread Uwe Kleine-König
On Wed, Dec 14, 2016 at 11:37:10PM +0100, Alexandre Belloni wrote:
> The license test has been mangled at some point then copy pasted across
> multiple files. Restore it to what it should be.
> Note that this is not intended as a license change.
> 
> Cc: Arnaud Ebalard 
> Cc: Ben Dooks 
> Cc: Boris Brezillon 
> Cc: Ezequiel Garcia 
> Cc: Lior Amsalem 
> Cc: Ryan Press 
> Cc: Simon Guinot 
> Cc: Stefan Roese 
> Cc: Thomas Petazzoni 
> Cc: Uwe Kleine-König 
Acked-by: Uwe Kleine-König 

> Cc: Yehuda Yitschak 
> Signed-off-by: Alexandre Belloni 
> ---
>  arch/arm/boot/dts/armada-370.dtsi | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm/boot/dts/armada-370.dtsi 
> b/arch/arm/boot/dts/armada-370.dtsi
> index b4258105e91f..c057c54f6b36 100644
> --- a/arch/arm/boot/dts/armada-370.dtsi
> +++ b/arch/arm/boot/dts/armada-370.dtsi
> @@ -17,17 +17,17 @@
>   * published by the Free Software Foundation; either version 2 of the
>   * License, or (at your option) any later version.
>   *
> - * This file is distributed in the hope that it will be useful
> + * This file is distributed in the hope that it will be useful,
>   * but WITHOUT ANY WARRANTY; without even the implied warranty of
>   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>   * GNU General Public License for more details.
>   *
> - * Or, alternatively
> + * Or, alternatively,
>   *
>   *  b) Permission is hereby granted, free of charge, to any person
>   * obtaining a copy of this software and associated documentation
>   * files (the "Software"), to deal in the Software without
> - * restriction, including without limitation the rights to use
> + * restriction, including without limitation the rights to use,
>   * copy, modify, merge, publish, distribute, sublicense, and/or
>   * sell copies of the Software, and to permit persons to whom the
>   * Software is furnished to do so, subject to the following
> @@ -36,11 +36,11 @@
>   * The above copyright notice and this permission notice shall be
>   * included in all copies or substantial portions of the Software.
>   *
> - * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
>   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
> -- 
> 2.10.2
> 
> 

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | http://www.pengutronix.de/  |


Re: [PATCH 2/3] perf/x86/pebs: add workaround for broken OVFL status on HSW

2016-12-14 Thread Stephane Eranian
On Wed, Dec 14, 2016 at 9:55 AM, Peter Zijlstra  wrote:
>
> Just spotted this again, ping?
>
Ok, on what processor running what command, so I can try and reproduce?

> On Thu, Mar 10, 2016 at 11:42:36AM +0100, Peter Zijlstra wrote:
>> On Wed, Mar 09, 2016 at 09:40:07AM -0800, Stephane Eranian wrote:
>> > With your queue.tip perf/core branch, I run into another problem.
>> > I am monitoring with 2 PEBS events and I have the NMI watchdog enabled.
>> >
>> > I see non-EXACT PEBS records again, despite my change (which is in).
>> > I tracked it down to the following issue after the testing of bit 62:
>> >
>> > [31137.273061] CPU71 status=0x20001 orig_status=0x20001 bit62=0
>> >
>> > The IRQ handler is called because the fixed counter for the NMI has 
>> > overflowed
>> > and it sees this in bit 33, but it also sees that one of the PEBS
>> > events has also
>> > overflowed, yet bit 62 is not set. Therefore both overflows are
>> > treated as regular
>> > and the drain_pebs() is not called generating a non-EXACT record for the 
>> > PEBS
>> > counter (counter 0). So something is wrong still and this is on Broadwell.
>> >
>> > First, I don't understand why the OVF bit for counter 0 is set. It
>> > should not according
>> > to specs because the counter is in PEBS mode. There must be a race there. 
>> > So we
>> > have to handle it by relying on cpuc->pebs_enabled. I will try that.
>> > We likely also
>> > need to force OVF bit 62 to 1 so we can ack it in the end (and in case
>> > it gets set).
>>
>> How about we make the clear of pebs_enabled unconditional?
>>
>> ---
>>  arch/x86/events/intel/core.c | 20 ++--
>>  1 file changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 68fa55b4d42e..dc9579665425 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -1883,6 +1883,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
>>   status &= ~(GLOBAL_STATUS_COND_CHG |
>>   GLOBAL_STATUS_ASIF |
>>   GLOBAL_STATUS_LBRS_FROZEN);
>> + /*
>> +  * There are cases where, even though, the PEBS ovfl bit is set
>> +  * in GLOBAL_OVF_STATUS, the PEBS events may also have their
>> +  * overflow bits set for their counters. We must clear them
>> +  * here because they have been processed as exact samples in
>> +  * the drain_pebs() routine. They must not be processed again
>> +  * in the for_each_bit_set() loop for regular samples below.
>> +  */
>> + status &= ~cpuc->pebs_enabled;
>> +
>>   if (!status)
>>   goto done;
>>
>> @@ -1892,16 +1902,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
>>   if (__test_and_clear_bit(62, (unsigned long *))) {
>>   handled++;
>>   x86_pmu.drain_pebs(regs);
>> - /*
>> -  * There are cases where, even though, the PEBS ovfl bit is set
>> -  * in GLOBAL_OVF_STATUS, the PEBS events may also have their
>> -  * overflow bits set for their counters. We must clear them
>> -  * here because they have been processed as exact samples in
>> -  * the drain_pebs() routine. They must not be processed again
>> -  * in the for_each_bit_set() loop for regular samples below.
>> -  */
>> - status &= ~cpuc->pebs_enabled;
>> - status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
>>   }
>>
>>   /*


Re: [PATCH 2/3] perf/x86/pebs: add workaround for broken OVFL status on HSW

2016-12-14 Thread Stephane Eranian
On Wed, Dec 14, 2016 at 9:55 AM, Peter Zijlstra  wrote:
>
> Just spotted this again, ping?
>
Ok, on what processor running what command, so I can try and reproduce?

> On Thu, Mar 10, 2016 at 11:42:36AM +0100, Peter Zijlstra wrote:
>> On Wed, Mar 09, 2016 at 09:40:07AM -0800, Stephane Eranian wrote:
>> > With your queue.tip perf/core branch, I run into another problem.
>> > I am monitoring with 2 PEBS events and I have the NMI watchdog enabled.
>> >
>> > I see non-EXACT PEBS records again, despite my change (which is in).
>> > I tracked it down to the following issue after the testing of bit 62:
>> >
>> > [31137.273061] CPU71 status=0x20001 orig_status=0x20001 bit62=0
>> >
>> > The IRQ handler is called because the fixed counter for the NMI has 
>> > overflowed
>> > and it sees this in bit 33, but it also sees that one of the PEBS
>> > events has also
>> > overflowed, yet bit 62 is not set. Therefore both overflows are
>> > treated as regular
>> > and the drain_pebs() is not called generating a non-EXACT record for the 
>> > PEBS
>> > counter (counter 0). So something is wrong still and this is on Broadwell.
>> >
>> > First, I don't understand why the OVF bit for counter 0 is set. It
>> > should not according
>> > to specs because the counter is in PEBS mode. There must be a race there. 
>> > So we
>> > have to handle it by relying on cpuc->pebs_enabled. I will try that.
>> > We likely also
>> > need to force OVF bit 62 to 1 so we can ack it in the end (and in case
>> > it gets set).
>>
>> How about we make the clear of pebs_enabled unconditional?
>>
>> ---
>>  arch/x86/events/intel/core.c | 20 ++--
>>  1 file changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 68fa55b4d42e..dc9579665425 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -1883,6 +1883,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
>>   status &= ~(GLOBAL_STATUS_COND_CHG |
>>   GLOBAL_STATUS_ASIF |
>>   GLOBAL_STATUS_LBRS_FROZEN);
>> + /*
>> +  * There are cases where, even though, the PEBS ovfl bit is set
>> +  * in GLOBAL_OVF_STATUS, the PEBS events may also have their
>> +  * overflow bits set for their counters. We must clear them
>> +  * here because they have been processed as exact samples in
>> +  * the drain_pebs() routine. They must not be processed again
>> +  * in the for_each_bit_set() loop for regular samples below.
>> +  */
>> + status &= ~cpuc->pebs_enabled;
>> +
>>   if (!status)
>>   goto done;
>>
>> @@ -1892,16 +1902,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
>>   if (__test_and_clear_bit(62, (unsigned long *))) {
>>   handled++;
>>   x86_pmu.drain_pebs(regs);
>> - /*
>> -  * There are cases where, even though, the PEBS ovfl bit is set
>> -  * in GLOBAL_OVF_STATUS, the PEBS events may also have their
>> -  * overflow bits set for their counters. We must clear them
>> -  * here because they have been processed as exact samples in
>> -  * the drain_pebs() routine. They must not be processed again
>> -  * in the for_each_bit_set() loop for regular samples below.
>> -  */
>> - status &= ~cpuc->pebs_enabled;
>> - status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
>>   }
>>
>>   /*


[PATCH v5 0/2] change the proc handler for nsm_use_hostnames

2016-12-14 Thread Jia He
nsm_use_hostnames is a module parameter and it will be exported to sysctl
procfs. This is to let user sometimes change it from userspace. But the
minimal unit for sysctl procfs read/write it sizeof(int).
In big endian system, the converting from/to  bool to/from int will cause
error for proc items.

This patch introduces a new proc handler proc_dobool for nsm_use_hostnames.

Changes:
v5: Fix compilation error when CONFIG_PROC_SYSCTL is not set 
v4: Change (u8 *) to (bool *)
v3: Introduce a new proc handler proc_dou8vec(suggested by Xinhui Pan)
v2: Change extern type in lockd.h

The test case I used:
/***/
#include 
#include 
#include 

bool __read_mostly nsm_use_hostnames;
module_param(nsm_use_hostnames, bool, 0644);

static struct ctl_table my_sysctl[] = {
{
.procname   = "nsm_use_hostnames",
.data   = _use_hostnames,
.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = _dointvec,
},
{}
};

static struct ctl_table my_root[] = {
{
.procname   = "mysysctl",
.mode   = 0555,
.child  = my_sysctl,
},
{}
};

static struct ctl_table_header * my_ctl_header;

static int __init sysctl_exam_init(void)
{
my_ctl_header = register_sysctl_table(_root);
if (my_ctl_header == NULL)
printk("error regiester sysctl");

return 0;
}

static void __exit sysctl_exam_exit(void)
{
unregister_sysctl_table(my_ctl_header);
}

module_init(sysctl_exam_init);
module_exit(sysctl_exam_exit);
MODULE_LICENSE("GPL");
//

[root@bigendian my]# insmod -f /root/my/hello.ko nsm_use_hostnames=1   
[root@bigendian my]# cat /proc/sys/mysysctl/nsm_use_hostnames 
16777216

After I change the proc_dointvec to new handler proc_dou8vec with the
patch:
[root@bigendian my]# insmod -f /root/my/hello.ko nsm_use_hostnames=1
[root@bigendian my]# cat /proc/sys/mysysctl/nsm_use_hostnames
1

In little endian system, there is no such issue.
Already tested in both of big and little endian(ppc64 and ppc64le)

Jia He (2):
  sysctl: introduce new proc handler proc_dobool
  lockd: change the proc_handler for nsm_use_hostnames

 fs/lockd/svc.c |  2 +-
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 41 +
 3 files changed, 44 insertions(+), 1 deletion(-)
-- 
2.5.5



[PATCH v5 2/2] lockd: change the proc_handler for nsm_use_hostnames

2016-12-14 Thread Jia He
nsm_use_hostnames is a module parameter and it will be exported to sysctl
procfs. This is to let user sometimes change it from userspace. But the
minimal unit for sysctl procfs read/write it sizeof(int).
In big endian system, the converting from/to  bool to/from int will cause
error for proc items.

This patch use a new proc_handler proc_dobool to fixe it.

Signed-off-by: Jia He 
---
 fs/lockd/svc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..bd6fcf9 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -561,7 +561,7 @@ static struct ctl_table nlm_sysctls[] = {
.data   = _use_hostnames,
.maxlen = sizeof(int),
.mode   = 0644,
-   .proc_handler   = proc_dointvec,
+   .proc_handler   = proc_dobool,
},
{
.procname   = "nsm_local_state",
-- 
2.5.5



[PATCH v5 1/2] sysctl: introduce new proc handler proc_dobool

2016-12-14 Thread Jia He
This is to let bool variable could be correctly displayed in
big/little endian sysctl procfs. sizeof(bool) is arch dependent, 
proc_dobool should work in all arches.

Suggested-by: Pan Xinhui 
Signed-off-by: Jia He 
---
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 41 +
 2 files changed, 43 insertions(+)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index adf4e51..255a9c7 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -41,6 +41,8 @@ typedef int proc_handler (struct ctl_table *ctl, int write,
 
 extern int proc_dostring(struct ctl_table *, int,
 void __user *, size_t *, loff_t *);
+extern int proc_dobool(struct ctl_table *, int,
+   void __user *, size_t *, loff_t *);
 extern int proc_dointvec(struct ctl_table *, int,
 void __user *, size_t *, loff_t *);
 extern int proc_douintvec(struct ctl_table *, int,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f..c4bec65 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2112,6 +2112,20 @@ static int proc_put_char(void __user **buf, size_t 
*size, char c)
return 0;
 }
 
+static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
+   int *valp,
+   int write, void *data)
+{
+   if (write)
+   *(bool *)valp = *lvalp;
+   else {
+   int val = *(bool *)valp;
+
+   *lvalp = (unsigned long)val;
+   }
+   return 0;
+}
+
 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 int *valp,
 int write, void *data)
@@ -2258,6 +2272,26 @@ static int do_proc_dointvec(struct ctl_table *table, int 
write,
 }
 
 /**
+ * proc_dobool - read/write a bool
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_dobool(struct ctl_table *table, int write,
+   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+   return do_proc_dointvec(table, write, buffer, lenp, ppos,
+   do_proc_dobool_conv, NULL);
+}
+
+/**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
@@ -2885,6 +2919,12 @@ int proc_dostring(struct ctl_table *table, int write,
return -ENOSYS;
 }
 
+int proc_dobool(struct ctl_table *table, int write,
+  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+return -ENOSYS;
+}
+
 int proc_dointvec(struct ctl_table *table, int write,
  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2941,6 +2981,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table 
*table, int write,
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
  */
+EXPORT_SYMBOL(proc_dobool);
 EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
-- 
2.5.5



[PATCH v5 0/2] change the proc handler for nsm_use_hostnames

2016-12-14 Thread Jia He
nsm_use_hostnames is a module parameter and it will be exported to sysctl
procfs. This is to let user sometimes change it from userspace. But the
minimal unit for sysctl procfs read/write it sizeof(int).
In big endian system, the converting from/to  bool to/from int will cause
error for proc items.

This patch introduces a new proc handler proc_dobool for nsm_use_hostnames.

Changes:
v5: Fix compilation error when CONFIG_PROC_SYSCTL is not set 
v4: Change (u8 *) to (bool *)
v3: Introduce a new proc handler proc_dou8vec(suggested by Xinhui Pan)
v2: Change extern type in lockd.h

The test case I used:
/***/
#include 
#include 
#include 

bool __read_mostly nsm_use_hostnames;
module_param(nsm_use_hostnames, bool, 0644);

static struct ctl_table my_sysctl[] = {
{
.procname   = "nsm_use_hostnames",
.data   = _use_hostnames,
.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = _dointvec,
},
{}
};

static struct ctl_table my_root[] = {
{
.procname   = "mysysctl",
.mode   = 0555,
.child  = my_sysctl,
},
{}
};

static struct ctl_table_header * my_ctl_header;

static int __init sysctl_exam_init(void)
{
my_ctl_header = register_sysctl_table(_root);
if (my_ctl_header == NULL)
printk("error regiester sysctl");

return 0;
}

static void __exit sysctl_exam_exit(void)
{
unregister_sysctl_table(my_ctl_header);
}

module_init(sysctl_exam_init);
module_exit(sysctl_exam_exit);
MODULE_LICENSE("GPL");
//

[root@bigendian my]# insmod -f /root/my/hello.ko nsm_use_hostnames=1   
[root@bigendian my]# cat /proc/sys/mysysctl/nsm_use_hostnames 
16777216

After I change the proc_dointvec to new handler proc_dou8vec with the
patch:
[root@bigendian my]# insmod -f /root/my/hello.ko nsm_use_hostnames=1
[root@bigendian my]# cat /proc/sys/mysysctl/nsm_use_hostnames
1

In little endian system, there is no such issue.
Already tested in both of big and little endian(ppc64 and ppc64le)

Jia He (2):
  sysctl: introduce new proc handler proc_dobool
  lockd: change the proc_handler for nsm_use_hostnames

 fs/lockd/svc.c |  2 +-
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 41 +
 3 files changed, 44 insertions(+), 1 deletion(-)
-- 
2.5.5



[PATCH v5 2/2] lockd: change the proc_handler for nsm_use_hostnames

2016-12-14 Thread Jia He
nsm_use_hostnames is a module parameter and it will be exported to sysctl
procfs. This is to let user sometimes change it from userspace. But the
minimal unit for sysctl procfs read/write it sizeof(int).
In big endian system, the converting from/to  bool to/from int will cause
error for proc items.

This patch use a new proc_handler proc_dobool to fixe it.

Signed-off-by: Jia He 
---
 fs/lockd/svc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..bd6fcf9 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -561,7 +561,7 @@ static struct ctl_table nlm_sysctls[] = {
.data   = _use_hostnames,
.maxlen = sizeof(int),
.mode   = 0644,
-   .proc_handler   = proc_dointvec,
+   .proc_handler   = proc_dobool,
},
{
.procname   = "nsm_local_state",
-- 
2.5.5



[PATCH v5 1/2] sysctl: introduce new proc handler proc_dobool

2016-12-14 Thread Jia He
This is to let bool variable could be correctly displayed in
big/little endian sysctl procfs. sizeof(bool) is arch dependent, 
proc_dobool should work in all arches.

Suggested-by: Pan Xinhui 
Signed-off-by: Jia He 
---
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 41 +
 2 files changed, 43 insertions(+)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index adf4e51..255a9c7 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -41,6 +41,8 @@ typedef int proc_handler (struct ctl_table *ctl, int write,
 
 extern int proc_dostring(struct ctl_table *, int,
 void __user *, size_t *, loff_t *);
+extern int proc_dobool(struct ctl_table *, int,
+   void __user *, size_t *, loff_t *);
 extern int proc_dointvec(struct ctl_table *, int,
 void __user *, size_t *, loff_t *);
 extern int proc_douintvec(struct ctl_table *, int,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f..c4bec65 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2112,6 +2112,20 @@ static int proc_put_char(void __user **buf, size_t 
*size, char c)
return 0;
 }
 
+static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
+   int *valp,
+   int write, void *data)
+{
+   if (write)
+   *(bool *)valp = *lvalp;
+   else {
+   int val = *(bool *)valp;
+
+   *lvalp = (unsigned long)val;
+   }
+   return 0;
+}
+
 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 int *valp,
 int write, void *data)
@@ -2258,6 +2272,26 @@ static int do_proc_dointvec(struct ctl_table *table, int 
write,
 }
 
 /**
+ * proc_dobool - read/write a bool
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_dobool(struct ctl_table *table, int write,
+   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+   return do_proc_dointvec(table, write, buffer, lenp, ppos,
+   do_proc_dobool_conv, NULL);
+}
+
+/**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
@@ -2885,6 +2919,12 @@ int proc_dostring(struct ctl_table *table, int write,
return -ENOSYS;
 }
 
+int proc_dobool(struct ctl_table *table, int write,
+  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+return -ENOSYS;
+}
+
 int proc_dointvec(struct ctl_table *table, int write,
  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2941,6 +2981,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table 
*table, int write,
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
  */
+EXPORT_SYMBOL(proc_dobool);
 EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
-- 
2.5.5



[PATCH] libnvdimm, dax: replace mutex_is_locked() warnings with lockdep_assert_held

2016-12-14 Thread Dan Williams
For warnings that should only ever trigger during development and
testing replace WARN statements with lockdep_assert_held. The lockdep
pattern is prevalent, and these paths are are well covered by libnvdimm
+ dax unit tests.

Reported-by: Johannes Thumshirn 
Signed-off-by: Dan Williams 
---
 drivers/dax/dax.c   |4 ++--
 drivers/nvdimm/claim.c  |   10 --
 drivers/nvdimm/namespace_devs.c |2 +-
 drivers/nvdimm/region_devs.c|2 +-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index c7b23d52f945..cae68465f430 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -94,7 +94,7 @@ static unsigned long long dax_region_avail_size(
unsigned long long size;
struct resource *res;
 
-   WARN_ON_ONCE(!mutex_is_locked(_region->lock));
+   lockdep_assert_held(_region->lock);
 
size = resource_size(_region->res);
for_each_dax_region_resource(dax_region, res) {
@@ -419,7 +419,7 @@ static unsigned long long dax_dev_size(struct dax_dev 
*dax_dev)
unsigned long long size = 0;
int i;
 
-   WARN_ON_ONCE(!mutex_is_locked(_region->lock));
+   lockdep_assert_held(_region->lock);
 
if (!dax_dev->alive)
return 0;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index d5dc80c48b4c..b910d171824a 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -22,9 +22,8 @@ void __nd_detach_ndns(struct device *dev, struct 
nd_namespace_common **_ndns)
 {
struct nd_namespace_common *ndns = *_ndns;
 
-   dev_WARN_ONCE(dev, !mutex_is_locked(>dev.mutex)
-   || ndns->claim != dev,
-   "%s: invalid claim\n", __func__);
+   lockdep_assert_held(>dev.mutex);
+   dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__);
ndns->claim = NULL;
*_ndns = NULL;
put_device(>dev);
@@ -49,9 +48,8 @@ bool __nd_attach_ndns(struct device *dev, struct 
nd_namespace_common *attach,
 {
if (attach->claim)
return false;
-   dev_WARN_ONCE(dev, !mutex_is_locked(>dev.mutex)
-   || *_ndns,
-   "%s: invalid claim\n", __func__);
+   lockdep_assert_held(>dev.mutex);
+   dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__);
attach->claim = dev;
*_ndns = attach;
get_device(>dev);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index abe5c6bc756c..874471a98751 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1653,7 +1653,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 
*pmem_id)
u64 hw_start, hw_end, pmem_start, pmem_end;
struct nd_label_ent *label_ent;
 
-   WARN_ON(!mutex_is_locked(_mapping->lock));
+   lockdep_assert_held(_mapping->lock);
list_for_each_entry(label_ent, _mapping->labels, list) {
nd_label = label_ent->label;
if (!nd_label)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 6af5e629140c..7cd705f3247c 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -509,7 +509,7 @@ void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 {
struct nd_label_ent *label_ent, *e;
 
-   WARN_ON(!mutex_is_locked(_mapping->lock));
+   lockdep_assert_held(_mapping->lock);
list_for_each_entry_safe(label_ent, e, _mapping->labels, list) {
list_del(_ent->list);
kfree(label_ent);



[PATCH] libnvdimm, dax: replace mutex_is_locked() warnings with lockdep_assert_held

2016-12-14 Thread Dan Williams
For warnings that should only ever trigger during development and
testing replace WARN statements with lockdep_assert_held. The lockdep
pattern is prevalent, and these paths are are well covered by libnvdimm
+ dax unit tests.

Reported-by: Johannes Thumshirn 
Signed-off-by: Dan Williams 
---
 drivers/dax/dax.c   |4 ++--
 drivers/nvdimm/claim.c  |   10 --
 drivers/nvdimm/namespace_devs.c |2 +-
 drivers/nvdimm/region_devs.c|2 +-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index c7b23d52f945..cae68465f430 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -94,7 +94,7 @@ static unsigned long long dax_region_avail_size(
unsigned long long size;
struct resource *res;
 
-   WARN_ON_ONCE(!mutex_is_locked(_region->lock));
+   lockdep_assert_held(_region->lock);
 
size = resource_size(_region->res);
for_each_dax_region_resource(dax_region, res) {
@@ -419,7 +419,7 @@ static unsigned long long dax_dev_size(struct dax_dev 
*dax_dev)
unsigned long long size = 0;
int i;
 
-   WARN_ON_ONCE(!mutex_is_locked(_region->lock));
+   lockdep_assert_held(_region->lock);
 
if (!dax_dev->alive)
return 0;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index d5dc80c48b4c..b910d171824a 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -22,9 +22,8 @@ void __nd_detach_ndns(struct device *dev, struct 
nd_namespace_common **_ndns)
 {
struct nd_namespace_common *ndns = *_ndns;
 
-   dev_WARN_ONCE(dev, !mutex_is_locked(>dev.mutex)
-   || ndns->claim != dev,
-   "%s: invalid claim\n", __func__);
+   lockdep_assert_held(>dev.mutex);
+   dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__);
ndns->claim = NULL;
*_ndns = NULL;
put_device(>dev);
@@ -49,9 +48,8 @@ bool __nd_attach_ndns(struct device *dev, struct 
nd_namespace_common *attach,
 {
if (attach->claim)
return false;
-   dev_WARN_ONCE(dev, !mutex_is_locked(>dev.mutex)
-   || *_ndns,
-   "%s: invalid claim\n", __func__);
+   lockdep_assert_held(>dev.mutex);
+   dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__);
attach->claim = dev;
*_ndns = attach;
get_device(>dev);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index abe5c6bc756c..874471a98751 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1653,7 +1653,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 
*pmem_id)
u64 hw_start, hw_end, pmem_start, pmem_end;
struct nd_label_ent *label_ent;
 
-   WARN_ON(!mutex_is_locked(_mapping->lock));
+   lockdep_assert_held(_mapping->lock);
list_for_each_entry(label_ent, _mapping->labels, list) {
nd_label = label_ent->label;
if (!nd_label)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 6af5e629140c..7cd705f3247c 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -509,7 +509,7 @@ void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 {
struct nd_label_ent *label_ent, *e;
 
-   WARN_ON(!mutex_is_locked(_mapping->lock));
+   lockdep_assert_held(_mapping->lock);
list_for_each_entry_safe(label_ent, e, _mapping->labels, list) {
list_del(_ent->list);
kfree(label_ent);



[PATCH] dax: clarify driver_data ownership

2016-12-14 Thread Dan Williams
Johannes points out that the warning in alloc_dax_region() seems to
imply that multiple devices might claim a dax region. In fact it's a
warning to driver developers that the dax core wants to own the
driver_data pointer for the hosting device of a dax_region. Add a
clarification.

Reported-by: Johannes Thumshirn 
Signed-off-by: Dan Williams 
---
 drivers/dax/dax.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index d1641e69a088..c7b23d52f945 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -359,8 +359,13 @@ struct dax_region *alloc_dax_region(struct device *parent, 
int region_id,
 {
struct dax_region *dax_region;
 
+   /*
+* The DAX core assumes that it can store its private data in
+* parent->driver_data. This WARN is a reminder / safeguard for
+* developers of device-dax drivers.
+*/
if (dev_get_drvdata(parent)) {
-   dev_WARN(parent, "dax core found drvdata already in use\n");
+   dev_WARN(parent, "dax core failed to setup private data\n");
return NULL;
}
 



[PATCH] dax: clarify driver_data ownership

2016-12-14 Thread Dan Williams
Johannes points out that the warning in alloc_dax_region() seems to
imply that multiple devices might claim a dax region. In fact it's a
warning to driver developers that the dax core wants to own the
driver_data pointer for the hosting device of a dax_region. Add a
clarification.

Reported-by: Johannes Thumshirn 
Signed-off-by: Dan Williams 
---
 drivers/dax/dax.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index d1641e69a088..c7b23d52f945 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -359,8 +359,13 @@ struct dax_region *alloc_dax_region(struct device *parent, 
int region_id,
 {
struct dax_region *dax_region;
 
+   /*
+* The DAX core assumes that it can store its private data in
+* parent->driver_data. This WARN is a reminder / safeguard for
+* developers of device-dax drivers.
+*/
if (dev_get_drvdata(parent)) {
-   dev_WARN(parent, "dax core found drvdata already in use\n");
+   dev_WARN(parent, "dax core failed to setup private data\n");
return NULL;
}
 



Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors

2016-12-14 Thread Jiri Olsa
On Wed, Dec 14, 2016 at 08:32:39PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 14, 2016 at 07:16:36PM +0100, Jiri Olsa wrote:
> 
> > > > +++ b/arch/x86/events/intel/ds.c
> > > > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct 
> > > > pt_regs *iregs)
> > > > continue;
> > > >  
> > > > /* log dropped samples number */
> > > > -   if (error[bit])
> > > > +   if (error[bit]) {
> > > > perf_log_lost_samples(event, error[bit]);
> > > >  
> > > > +   if (perf_event_account_interrupt(event, 1))
> > > 
> > > Seems a bit daft to expose the .throttle argument, since that would be
> > > the only point of calling this.
> > 
> > there's also the other caller from __perf_event_overflow
> 
> See the below patchlet ;-)

ok, np ;-)

> 
> > > > +static int __perf_event_overflow(struct perf_event *event,
> > > > +  int throttle, struct 
> > > > perf_sample_data *data,
> > > > +  struct pt_regs *regs)
> > > > +{
> > > > +   int events = atomic_read(>event_limit);
> > > > +   struct hw_perf_event *hwc = >hw;
> > > > +   int ret = 0;
> > > > +
> > > > +   /*
> > > > +* Non-sampling counters might still use the PMI to fold short
> > > > +* hardware counters, ignore those.
> > > > +*/
> > > > +   if (unlikely(!is_sampling_event(event)))
> > > > +   return 0;
> > > > +
> > > > +   ret = perf_event_account_interrupt(event, throttle);
> > > > +
> > > > if (event->attr.freq) {
> > > > u64 now = perf_clock();
> > > > s64 delta = now - hwc->freq_time_stamp;
> > > 
> > > Arguably, everything in __perf_event_overflow() except for calling of
> > > ->overflow_handler() should be done I think.
> > 
> > well, I was wondering about that period adjustment bit
> > 
> > but I wasn't sure about those pending_kill/pending_wakeup bits,
> > they make sense to me only if we have some data to deliver
> 
> Hmm, maybe. Please add a comment, that way we can at least rediscover we
> thought about this.

ook

jirka


Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors

2016-12-14 Thread Jiri Olsa
On Wed, Dec 14, 2016 at 08:32:39PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 14, 2016 at 07:16:36PM +0100, Jiri Olsa wrote:
> 
> > > > +++ b/arch/x86/events/intel/ds.c
> > > > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct 
> > > > pt_regs *iregs)
> > > > continue;
> > > >  
> > > > /* log dropped samples number */
> > > > -   if (error[bit])
> > > > +   if (error[bit]) {
> > > > perf_log_lost_samples(event, error[bit]);
> > > >  
> > > > +   if (perf_event_account_interrupt(event, 1))
> > > 
> > > Seems a bit daft to expose the .throttle argument, since that would be
> > > the only point of calling this.
> > 
> > there's also the other caller from __perf_event_overflow
> 
> See the below patchlet ;-)

ok, np ;-)

> 
> > > > +static int __perf_event_overflow(struct perf_event *event,
> > > > +  int throttle, struct 
> > > > perf_sample_data *data,
> > > > +  struct pt_regs *regs)
> > > > +{
> > > > +   int events = atomic_read(>event_limit);
> > > > +   struct hw_perf_event *hwc = >hw;
> > > > +   int ret = 0;
> > > > +
> > > > +   /*
> > > > +* Non-sampling counters might still use the PMI to fold short
> > > > +* hardware counters, ignore those.
> > > > +*/
> > > > +   if (unlikely(!is_sampling_event(event)))
> > > > +   return 0;
> > > > +
> > > > +   ret = perf_event_account_interrupt(event, throttle);
> > > > +
> > > > if (event->attr.freq) {
> > > > u64 now = perf_clock();
> > > > s64 delta = now - hwc->freq_time_stamp;
> > > 
> > > Arguably, everything in __perf_event_overflow() except for calling of
> > > ->overflow_handler() should be done I think.
> > 
> > well, I was wondering about that period adjustment bit
> > 
> > but I wasn't sure about those pending_kill/pending_wakeup bits,
> > they make sense to me only if we have some data to deliver
> 
> Hmm, maybe. Please add a comment, that way we can at least rediscover we
> thought about this.

ook

jirka


Re: [PATCH v2 01/46] mtdpart: Propagate _get/put_device()

2016-12-14 Thread Karl Beldan
On Wed, Dec 14, 2016 at 9:09 PM, Brian Norris
 wrote:
> On Wed, Dec 14, 2016 at 07:24:46PM +, Karl Beldan wrote:
>> On Wed, Sep 28, 2016 at 8:16 PM, Brian Norris
>>  wrote:
>> > On Wed, Sep 21, 2016 at 12:15:31PM +0200, Boris Brezillon wrote:
>> >> On Wed, 21 Sep 2016 11:43:56 +0200
>> >> Daniel Walter  wrote:
>> >>
>> >> > From: Richard Weinberger 
>> >> >
>> >> > If the master device has callbacks for _get/put_device()
>> >> > and this MTD has slaves a get_mtd_device() call on paritions
>> >> > will never issue the registered callbacks.
>> >> > Fix this by propagating _get/put_device() down.
>> >>
>> >> Brian, can we have this one queued for 4.9? I can't take it in my tree
>> >> if you want, but it's probably better if it's in the mtd tree.
>> >
>> > Applied this patch to l2-mtd.git
>> >
>>
>> I think this should also go into -stable.
>
> Why? Do you have real use cases that are broken by this? I understand

I do, some code adding partitions on a gluebi master.

> this is a problem, but I'm curious on how this satisfies the stable
> rules.
>
> Also, note that this isn't a regression; it's been broken forever and
> apparently no one noticed. IMO that raises the bar a bit (but not
> impossibly so) for -stable.
>

I just encountered the bug yesterday and yes it is obvious it has been
broken forever.
I don't have strong opinion about these things so no worries.

Karl

> Anyway, if we decide to do this, you'll also want to include the git
> hash and applicable kernel versions, per Option 2 [1].
>
> Brian
>
> [1] Documentation/stable_kernel_rules.txt.


Re: [PATCH v2 01/46] mtdpart: Propagate _get/put_device()

2016-12-14 Thread Karl Beldan
On Wed, Dec 14, 2016 at 9:09 PM, Brian Norris
 wrote:
> On Wed, Dec 14, 2016 at 07:24:46PM +, Karl Beldan wrote:
>> On Wed, Sep 28, 2016 at 8:16 PM, Brian Norris
>>  wrote:
>> > On Wed, Sep 21, 2016 at 12:15:31PM +0200, Boris Brezillon wrote:
>> >> On Wed, 21 Sep 2016 11:43:56 +0200
>> >> Daniel Walter  wrote:
>> >>
>> >> > From: Richard Weinberger 
>> >> >
>> >> > If the master device has callbacks for _get/put_device()
>> >> > and this MTD has slaves a get_mtd_device() call on paritions
>> >> > will never issue the registered callbacks.
>> >> > Fix this by propagating _get/put_device() down.
>> >>
>> >> Brian, can we have this one queued for 4.9? I can't take it in my tree
>> >> if you want, but it's probably better if it's in the mtd tree.
>> >
>> > Applied this patch to l2-mtd.git
>> >
>>
>> I think this should also go into -stable.
>
> Why? Do you have real use cases that are broken by this? I understand

I do, some code adding partitions on a gluebi master.

> this is a problem, but I'm curious on how this satisfies the stable
> rules.
>
> Also, note that this isn't a regression; it's been broken forever and
> apparently no one noticed. IMO that raises the bar a bit (but not
> impossibly so) for -stable.
>

I just encountered the bug yesterday and yes it is obvious it has been
broken forever.
I don't have strong opinion about these things so no worries.

Karl

> Anyway, if we decide to do this, you'll also want to include the git
> hash and applicable kernel versions, per Option 2 [1].
>
> Brian
>
> [1] Documentation/stable_kernel_rules.txt.


Re: [PATCH 8/8] Makefile: drop -D__CHECK_ENDIAN__ from cflags

2016-12-14 Thread Marcel Holtmann
Hi Michael,

> That's the default now, no need for makefiles to set it.
> 
> Signed-off-by: Michael S. Tsirkin 
> ---
> drivers/bluetooth/Makefile| 2 --
> drivers/net/can/Makefile  | 1 -
> drivers/net/ethernet/altera/Makefile  | 1 -
> drivers/net/ethernet/atheros/alx/Makefile | 1 -
> drivers/net/ethernet/freescale/Makefile   | 2 --
> drivers/net/wireless/ath/Makefile | 2 --
> drivers/net/wireless/ath/wil6210/Makefile | 2 --
> drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile | 2 --
> drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile | 1 -
> drivers/net/wireless/intel/iwlegacy/Makefile  | 2 --
> drivers/net/wireless/intel/iwlwifi/Makefile   | 2 +-
> drivers/net/wireless/intel/iwlwifi/dvm/Makefile   | 2 +-
> drivers/net/wireless/intel/iwlwifi/mvm/Makefile   | 2 +-
> drivers/net/wireless/intersil/orinoco/Makefile| 3 ---
> drivers/net/wireless/mediatek/mt7601u/Makefile| 2 --
> drivers/net/wireless/realtek/rtlwifi/Makefile | 2 --
> drivers/net/wireless/realtek/rtlwifi/btcoexist/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8188ee/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192c/Makefile| 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192ce/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192de/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192ee/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192se/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8723ae/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8723be/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8723com/Makefile  | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8821ae/Makefile   | 2 --
> drivers/net/wireless/ti/wl1251/Makefile   | 2 --
> drivers/net/wireless/ti/wlcore/Makefile   | 2 --
> drivers/staging/rtl8188eu/Makefile| 2 +-
> drivers/staging/rtl8192e/Makefile | 2 --
> drivers/staging/rtl8192e/rtl8192e/Makefile| 2 --
> net/bluetooth/Makefile| 2 --
> net/ieee802154/Makefile   | 2 --
> net/mac80211/Makefile | 2 +-
> net/mac802154/Makefile| 2 --
> net/wireless/Makefile | 2 --
> 38 files changed, 5 insertions(+), 68 deletions(-)

for drivers/bluetooth, net/bluetooth, net/ieee802154 and net/mac802154

Acked-by: Marcel Holtmann 

Regards

Marcel



Re: [PATCH 8/8] Makefile: drop -D__CHECK_ENDIAN__ from cflags

2016-12-14 Thread Marcel Holtmann
Hi Michael,

> That's the default now, no need for makefiles to set it.
> 
> Signed-off-by: Michael S. Tsirkin 
> ---
> drivers/bluetooth/Makefile| 2 --
> drivers/net/can/Makefile  | 1 -
> drivers/net/ethernet/altera/Makefile  | 1 -
> drivers/net/ethernet/atheros/alx/Makefile | 1 -
> drivers/net/ethernet/freescale/Makefile   | 2 --
> drivers/net/wireless/ath/Makefile | 2 --
> drivers/net/wireless/ath/wil6210/Makefile | 2 --
> drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile | 2 --
> drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile | 1 -
> drivers/net/wireless/intel/iwlegacy/Makefile  | 2 --
> drivers/net/wireless/intel/iwlwifi/Makefile   | 2 +-
> drivers/net/wireless/intel/iwlwifi/dvm/Makefile   | 2 +-
> drivers/net/wireless/intel/iwlwifi/mvm/Makefile   | 2 +-
> drivers/net/wireless/intersil/orinoco/Makefile| 3 ---
> drivers/net/wireless/mediatek/mt7601u/Makefile| 2 --
> drivers/net/wireless/realtek/rtlwifi/Makefile | 2 --
> drivers/net/wireless/realtek/rtlwifi/btcoexist/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8188ee/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192c/Makefile| 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192ce/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192de/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192ee/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8192se/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8723ae/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8723be/Makefile   | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8723com/Makefile  | 2 --
> drivers/net/wireless/realtek/rtlwifi/rtl8821ae/Makefile   | 2 --
> drivers/net/wireless/ti/wl1251/Makefile   | 2 --
> drivers/net/wireless/ti/wlcore/Makefile   | 2 --
> drivers/staging/rtl8188eu/Makefile| 2 +-
> drivers/staging/rtl8192e/Makefile | 2 --
> drivers/staging/rtl8192e/rtl8192e/Makefile| 2 --
> net/bluetooth/Makefile| 2 --
> net/ieee802154/Makefile   | 2 --
> net/mac80211/Makefile | 2 +-
> net/mac802154/Makefile| 2 --
> net/wireless/Makefile | 2 --
> 38 files changed, 5 insertions(+), 68 deletions(-)

for drivers/bluetooth, net/bluetooth, net/ieee802154 and net/mac802154

Acked-by: Marcel Holtmann 

Regards

Marcel



[PATCHSET v3] blk-mq scheduling framework

2016-12-14 Thread Jens Axboe
This is version 3 of the blk-mq scheduling framework. Version 2
was posted here:

https://marc.info/?l=linux-block=148122805026762=2

It's fully stable. In fact I'm running it on my laptop [1]. That may
or may not have been part of a dare. In any case, it's been stable
on that too, and has survived lengthy testing on dedicated test
boxes.

[1] $ cat /sys/block/nvme0n1/queue/scheduler
[mq-deadline] none

I'm still mentally debating whether to shift this over to have
duplicate request tags, one for the scheduler and one for the issue
side. We run into various issues if we do that, but we also get
rid of the shadow request field copying. I think both approaches
have their downsides. I originally considered both, and though that
the shadow request would potentially be the cleanest.

I've rebased this against Linus master branch, since a bunch of
the prep patches are now in, and the general block changes are in
as well.

The patches can be pulled here:

git://git.kernel.dk/linux-block blk-mq-sched.3

Changes since v2:

- Fix the Kconfig single/multi queue sched entry. Suggested by Bart.

- Move the queue ref put into the failure path of the request getting,
  so the caller doesn't have to know about it. Suggested by Bart.

- Add support for IO context management. Needed for the BFQ port.

- Change the anonymous elevator ops union to a named one, since
  old (looking at you, gcc 4.4) compilers don't support named
  initialization of anon unions.

- Constify the blk_mq_ops structure pointers.

- Add generic merging code, so mq-deadline (and others) don't have to
  handle/duplicate that.

- Switched the dispatch hook to list based, so we can move more entries
  at the time, if we want/need to. From Omar.

- Add support for schedulers to continue using the software queues.
  From Omar.

- Ensure that it works with blk-wbt.

- Fix a failure case if we fail registering the MQ elevator. We'd
  fall back to trying noop, which we'd find, but that would not
  work for MQ devices. Fall back to 'none' instead.

- Verified queue ref management.

- Fixed a bunch of bugs, and added a bunch of cleanups.

 block/Kconfig.iosched|   37 ++
 block/Makefile   |3 
 block/blk-core.c |   23 -
 block/blk-exec.c |3 
 block/blk-flush.c|7 
 block/blk-ioc.c  |8 
 block/blk-merge.c|4 
 block/blk-mq-sched.c |  394 +
 block/blk-mq-sched.h |  192 ++
 block/blk-mq-tag.c   |1 
 block/blk-mq.c   |  226 +++-
 block/blk-mq.h   |   28 ++
 block/blk.h  |   26 +
 block/cfq-iosched.c  |2 
 block/deadline-iosched.c |2 
 block/elevator.c |  229 
 block/mq-deadline.c  |  638 +++
 block/noop-iosched.c |2 
 drivers/nvme/host/pci.c  |1 
 include/linux/blk-mq.h   |6 
 include/linux/blkdev.h   |2 
 include/linux/elevator.h |   33 ++
 22 files changed, 1635 insertions(+), 232 deletions(-)

-- 
Jens Axboe



[PATCHSET v3] blk-mq scheduling framework

2016-12-14 Thread Jens Axboe
This is version 3 of the blk-mq scheduling framework. Version 2
was posted here:

https://marc.info/?l=linux-block=148122805026762=2

It's fully stable. In fact I'm running it on my laptop [1]. That may
or may not have been part of a dare. In any case, it's been stable
on that too, and has survived lengthy testing on dedicated test
boxes.

[1] $ cat /sys/block/nvme0n1/queue/scheduler
[mq-deadline] none

I'm still mentally debating whether to shift this over to have
duplicate request tags, one for the scheduler and one for the issue
side. We run into various issues if we do that, but we also get
rid of the shadow request field copying. I think both approaches
have their downsides. I originally considered both, and though that
the shadow request would potentially be the cleanest.

I've rebased this against Linus master branch, since a bunch of
the prep patches are now in, and the general block changes are in
as well.

The patches can be pulled here:

git://git.kernel.dk/linux-block blk-mq-sched.3

Changes since v2:

- Fix the Kconfig single/multi queue sched entry. Suggested by Bart.

- Move the queue ref put into the failure path of the request getting,
  so the caller doesn't have to know about it. Suggested by Bart.

- Add support for IO context management. Needed for the BFQ port.

- Change the anonymous elevator ops union to a named one, since
  old (looking at you, gcc 4.4) compilers don't support named
  initialization of anon unions.

- Constify the blk_mq_ops structure pointers.

- Add generic merging code, so mq-deadline (and others) don't have to
  handle/duplicate that.

- Switched the dispatch hook to list based, so we can move more entries
  at the time, if we want/need to. From Omar.

- Add support for schedulers to continue using the software queues.
  From Omar.

- Ensure that it works with blk-wbt.

- Fix a failure case if we fail registering the MQ elevator. We'd
  fall back to trying noop, which we'd find, but that would not
  work for MQ devices. Fall back to 'none' instead.

- Verified queue ref management.

- Fixed a bunch of bugs, and added a bunch of cleanups.

 block/Kconfig.iosched|   37 ++
 block/Makefile   |3 
 block/blk-core.c |   23 -
 block/blk-exec.c |3 
 block/blk-flush.c|7 
 block/blk-ioc.c  |8 
 block/blk-merge.c|4 
 block/blk-mq-sched.c |  394 +
 block/blk-mq-sched.h |  192 ++
 block/blk-mq-tag.c   |1 
 block/blk-mq.c   |  226 +++-
 block/blk-mq.h   |   28 ++
 block/blk.h  |   26 +
 block/cfq-iosched.c  |2 
 block/deadline-iosched.c |2 
 block/elevator.c |  229 
 block/mq-deadline.c  |  638 +++
 block/noop-iosched.c |2 
 drivers/nvme/host/pci.c  |1 
 include/linux/blk-mq.h   |6 
 include/linux/blkdev.h   |2 
 include/linux/elevator.h |   33 ++
 22 files changed, 1635 insertions(+), 232 deletions(-)

-- 
Jens Axboe



Re: [PATCHv6] support for AD5820 camera auto-focus coil

2016-12-14 Thread Sakari Ailus
Hi Pali and Tony,

On Wed, Dec 14, 2016 at 07:08:19AM -0800, Tony Lindgren wrote:
> * Pali Rohár  [161214 05:38]:
> > On Monday 08 August 2016 23:41:32 Pavel Machek wrote:
> > > On Mon 2016-08-08 11:09:56, Sakari Ailus wrote:
> > > > On Fri, Aug 05, 2016 at 12:26:11PM +0200, Pavel Machek wrote:
> > > > > This adds support for AD5820 autofocus coil, found for example in
> > > > > Nokia N900 smartphone.
> > > > 
> > > > Thanks, Pavel!
> > > > 
> > > > Let's use V4L2_CID_FOCUS_ABSOLUTE, as is in the patch. If we get
> > > > something better in the future, we'll switch to that then.
> > > > 
> > > > I've applied this to ad5820 branch in my tree.
> > > 
> > > Thanks. If I understands things correctly, both DTS patch and this
> > > patch are waiting in your tree, so we should be good to go for 4.9
> > > (unless some unexpected problems surface)?
> > > 
> > > Best regards,
> > >   Pavel
> > 
> > Was DTS patch merged into 4.9? At least I do not see updated that dts 
> > file omap3-n900.dts in linus tree...
> 
> If it's not in current mainline or next, it's off my radar so sounds
> like I've somehow missed it and needs resending..

Where's this patch? I remember seeing the driver patch and the DT
documentation but no actual DT source patch for the N900.

-- 
Kind regards,

Sakari Ailus
e-mail: sakari.ai...@iki.fi XMPP: sai...@retiisi.org.uk


Re: [PATCHv6] support for AD5820 camera auto-focus coil

2016-12-14 Thread Sakari Ailus
Hi Pali and Tony,

On Wed, Dec 14, 2016 at 07:08:19AM -0800, Tony Lindgren wrote:
> * Pali Rohár  [161214 05:38]:
> > On Monday 08 August 2016 23:41:32 Pavel Machek wrote:
> > > On Mon 2016-08-08 11:09:56, Sakari Ailus wrote:
> > > > On Fri, Aug 05, 2016 at 12:26:11PM +0200, Pavel Machek wrote:
> > > > > This adds support for AD5820 autofocus coil, found for example in
> > > > > Nokia N900 smartphone.
> > > > 
> > > > Thanks, Pavel!
> > > > 
> > > > Let's use V4L2_CID_FOCUS_ABSOLUTE, as is in the patch. If we get
> > > > something better in the future, we'll switch to that then.
> > > > 
> > > > I've applied this to ad5820 branch in my tree.
> > > 
> > > Thanks. If I understands things correctly, both DTS patch and this
> > > patch are waiting in your tree, so we should be good to go for 4.9
> > > (unless some unexpected problems surface)?
> > > 
> > > Best regards,
> > >   Pavel
> > 
> > Was DTS patch merged into 4.9? At least I do not see updated that dts 
> > file omap3-n900.dts in linus tree...
> 
> If it's not in current mainline or next, it's off my radar so sounds
> like I've somehow missed it and needs resending..

Where's this patch? I remember seeing the driver patch and the DT
documentation but no actual DT source patch for the N900.

-- 
Kind regards,

Sakari Ailus
e-mail: sakari.ai...@iki.fi XMPP: sai...@retiisi.org.uk


Re: [PATCH 1/8] dax: add region-available-size attribute

2016-12-14 Thread Dan Williams
On Wed, Dec 14, 2016 at 7:53 AM, Dan Williams  wrote:
> On Wed, Dec 14, 2016 at 6:38 AM, Johannes Thumshirn  
> wrote:
>> Hi Dan,
>>
>> On Sat, Dec 10, 2016 at 10:28:30PM -0800, Dan Williams wrote:
>>> In preparation for a facility that enables dax regions to be
>>> sub-divided, introduce a 'dax/available_size' attribute.  This attribute
>>> appears under the parent device that registered the device-dax region,
>>> and it assumes that the device-dax-core owns the driver-data for that
>>> device.
>>>
>>> 'dax/available_size' adjusts dynamically as dax-device instances are
>>> registered and unregistered.
>>>
>>> As a side effect of using __request_region() to reserve capacity from
>>> the dax_region we now track pointers to those returned resources rather
>>> than duplicating the passed in resource array.
>>>
>>> Signed-off-by: Dan Williams 
>>> ---
>>
>> [...]
>>
>>> +static const struct attribute_group *dax_region_attribute_groups[] = {
>>> + _region_attribute_group,
>>> + NULL,
>>>  };
>>>
>>>  static struct inode *dax_alloc_inode(struct super_block *sb)
>>> @@ -200,12 +251,27 @@ void dax_region_put(struct dax_region *dax_region)
>>>  }
>>>  EXPORT_SYMBOL_GPL(dax_region_put);
>>>
>>> +
>>
>> Stray extra newline?
>>
>> [...]
>>
>>>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>>>   struct resource *res, unsigned int align, void *addr,
>>>   unsigned long pfn_flags)
>>>  {
>>>   struct dax_region *dax_region;
>>>
>>> + if (dev_get_drvdata(parent)) {
>>> + dev_WARN(parent, "dax core found drvdata already in use\n");
>>> + return NULL;
>>> + }
>>> +
>>
>> My first thought was, it might be interesting to see who already claimed
>> the drvdata. Then I figured, how are multiple sub-regions of a dax-device
>> supposed to work? What am I missing here?
>
> This is a check similar to the -EBUSY return you would get from
> request_mem_region(). In fact if all dax drivers are correctly calling
> request_mem_region() before alloc_dax_region() then it would be
> impossible for this check to ever fire. It's already impossible
> because there's only one dax driver upstream (dax_pmem). It's not
> really benefiting the kernel at all until we have multiple dax
> drivers, I'll remove it.

No, I went to go delete this and remembered the real reason this was
added. A device driver that calls alloc_dax_region() commits to
letting the dax core own dev->driver_data. Since this wasn't even
clear to me, I'll go fix up the comment.


Re: [PATCH 1/8] dax: add region-available-size attribute

2016-12-14 Thread Dan Williams
On Wed, Dec 14, 2016 at 7:53 AM, Dan Williams  wrote:
> On Wed, Dec 14, 2016 at 6:38 AM, Johannes Thumshirn  
> wrote:
>> Hi Dan,
>>
>> On Sat, Dec 10, 2016 at 10:28:30PM -0800, Dan Williams wrote:
>>> In preparation for a facility that enables dax regions to be
>>> sub-divided, introduce a 'dax/available_size' attribute.  This attribute
>>> appears under the parent device that registered the device-dax region,
>>> and it assumes that the device-dax-core owns the driver-data for that
>>> device.
>>>
>>> 'dax/available_size' adjusts dynamically as dax-device instances are
>>> registered and unregistered.
>>>
>>> As a side effect of using __request_region() to reserve capacity from
>>> the dax_region we now track pointers to those returned resources rather
>>> than duplicating the passed in resource array.
>>>
>>> Signed-off-by: Dan Williams 
>>> ---
>>
>> [...]
>>
>>> +static const struct attribute_group *dax_region_attribute_groups[] = {
>>> + _region_attribute_group,
>>> + NULL,
>>>  };
>>>
>>>  static struct inode *dax_alloc_inode(struct super_block *sb)
>>> @@ -200,12 +251,27 @@ void dax_region_put(struct dax_region *dax_region)
>>>  }
>>>  EXPORT_SYMBOL_GPL(dax_region_put);
>>>
>>> +
>>
>> Stray extra newline?
>>
>> [...]
>>
>>>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>>>   struct resource *res, unsigned int align, void *addr,
>>>   unsigned long pfn_flags)
>>>  {
>>>   struct dax_region *dax_region;
>>>
>>> + if (dev_get_drvdata(parent)) {
>>> + dev_WARN(parent, "dax core found drvdata already in use\n");
>>> + return NULL;
>>> + }
>>> +
>>
>> My first thought was, it might be interesting to see who already claimed
>> the drvdata. Then I figured, how are multiple sub-regions of a dax-device
>> supposed to work? What am I missing here?
>
> This is a check similar to the -EBUSY return you would get from
> request_mem_region(). In fact if all dax drivers are correctly calling
> request_mem_region() before alloc_dax_region() then it would be
> impossible for this check to ever fire. It's already impossible
> because there's only one dax driver upstream (dax_pmem). It's not
> really benefiting the kernel at all until we have multiple dax
> drivers, I'll remove it.

No, I went to go delete this and remembered the real reason this was
added. A device driver that calls alloc_dax_region() commits to
letting the dax core own dev->driver_data. Since this wasn't even
clear to me, I'll go fix up the comment.


Re: [PATCH 3/3] arm64: dts: rockchip: add clk-480m for ehci and ohci of rk3399

2016-12-14 Thread Frank Wang

Hi Brain, Doug and Heiko,

I would like to summarize why this story was constructed.

The ehci/ohci-platform suspend process are blocked due to UTMI clock 
which directly output from usb-phy has been disabled, and why the UTMI 
clock was disabled?


UTMI clock and 480m clock all output from the same internal PLL of 
usb-phy, and there is only one bit can use to control this PLL on or 
off, which we named "otg_commononn"(GRF, offset 0x0e450/0x0e460 bit4 ) 
in RK3399 TRM.


When system boot up, ehci/ohci-platform probe function invoke 
phy_power_on(), further invoke rockchip_usb2phy_power_on() to enable 
480m clock, actually, it sets the otg_commononn bit on, and then usb-phy 
will go to (auto)suspend if there is no devices plug-in after 1 minute, 
the rockchip_usb2phy_power_off() will be invoked and the 480m clock may 
be disabled in the (auto)suspend process. As a result, the otg_commononn 
bit may be turned off, and all output clock of usb-phy will be disabled. 
However, ehci/ohci-platform PM suspend operation (read/write controller 
register) are based on the UTMI clock.


So we introduced "clk_usbphy0_480m_src"/"clk_usbphy1_480m_src" as one 
input clock for ehci/ohci-platform, in this way, the otg_commononn bit 
is not turned off until ehci/ohci-platform go to PM suspend.



BR.
Frank

On 2016/12/15 10:41, Xing Zheng wrote:

// Frank

Hi Doug,  Brain,
Thanks for the reply.
Sorry I forgot these patches have been sent earlier, and Frank 
have some explained and discussed with Heiko.

Please see https://patchwork.kernel.org/patch/9255245/
Perhaps we can move to that patch tree to continue the discussion.

I think Frank and William will help us to continue checking these.

Thanks

在 2016年12月15日 08:10, Doug Anderson 写道:

Hi,

On Wed, Dec 14, 2016 at 2:11 AM, Xing Zheng 
 wrote:

From: William wu 

We found that the suspend process was blocked when it run into
ehci/ohci module due to clk-480m of usb2-phy was disabled.

The root cause is that usb2-phy suspended earlier than ehci/ohci
(usb2-phy will be auto suspended if no devices plug-in).

This is really weird, but I can confirm it is true on my system too
(kernel-4.4 based).  At least I see:

[  208.012065] calling  usb1+ @ 4984, parent: fe38.usb, cb: 
usb_dev_suspend

[  208.569112] calling  ff77.syscon:usb2-phy@e450+ @ 4983, parent:
ff77.syscon, cb: platform_pm_suspend
[  208.569113] call ff77.syscon:usb2-phy@e450+ returned 0 after 0 
usecs

[  208.569439] calling  fe38.usb+ @ 4983, parent: platform, cb:
platform_pm_suspend
[  208.569444] call fe38.usb+ returned 0 after 4 usecs


In general I thought that suspend order was supposed to be related to
probe order.  So if your probe order is A, B, C then your suspend
order would be C, B, A.  ...and we know for sure that the USB PHY
needs to probe _before_ the main USB controller.  If it didn't then
you'd get an EPROBE_DEFER in the USB controller, right?  So that means
that the USB controller should be suspending before its PHY.

Any chance this is somehow related to async probe?  I'm not a huge
expert on async probe but I guess I could imagine things getting
confused if you had a sequence like this:

1. Start USB probe (async)
2. Start PHY probe
3. Finish PHY probe
4. In USB probe, ask for PHY--no problems since PHY probe finished
5. Finish USB probe

The probe order would be USB before PHY even though the USB probe
_depended_ on the PHY probe being finished...  :-/  Anyway, probably
I'm just misunderstanding something and someone can tell me how dumb I
am...

I also notice that the ehci_platform_power_off() function we're
actually making PHY commands right before the same commands that turn
off our clocks.  Presumably those commands aren't really so good to do
if the PHY has already been suspended?

Actually, does the PHY suspend from platform_pm_suspend() actually
even do anything?  It doesn't look like it.  It looks as if all the
PHY cares about is init/exit and on/off...  ...and it looks as if the
PHY should be turned off by the EHCI controller at about the same time
it turns off its clocks...

I haven't fully dug, but is there any chance that things are getting
confused between the OTG PHY and the Host PHY?  Maybe when we turn off
the OTG PHY it turns off something that the host PHY needs?



and the
clk-480m provided by it was disabled if no module used. However,
some suspend process related ehci/ohci are base on this clock,
so we should refer it into ehci/ohci driver to prevent this case.

Though I don't actually have details about the internals of the chip,
it does seem highly likely that the USB block actually uses this clock
for some things, so it doesn't seem insane (to me) to have the USB
controller request that the clock be on.  So, in general, I don't have
lots of objections to including the USB PHY Clock here.

...but I think you have the wrong clock (please correct me if I'm
wrong).  I think you really 

Re: [RFC PATCH] Memory hotplug support for arm64 platform

2016-12-14 Thread Xishi Qiu
On 2016/12/15 14:18, Xishi Qiu wrote:

> On 2016/12/14 20:16, Maciej Bielski wrote:
> 
>>
>>  
>> -#ifdef CONFIG_MEMORY_HOTREMOVE
>> -int arch_remove_memory(u64 start, u64 size)
>> -{
>> -unsigned long start_pfn = start >> PAGE_SHIFT;
>> -unsigned long nr_pages = size >> PAGE_SHIFT;
>> -struct zone *zone;
>> -int ret;
>> +SetPageReserved(pfn_to_page(pfn));
>> +}
> 
> Hi Maciej,
> 
> Why we need to set reserved here?
> I think the new pages are already reserved in __add_zone() -> 
> memmap_init_zone(), right?
> 

Hi Maciej,

The reason is as follows, right?

It's because that in memmap_init_zone() -> early_pfn_valid(), the new page is 
still
invalid, so we need to init it after memblock_clear_nomap()

So why not use __init_single_page() and set_pageblock_migratetype()?

> Thanks,
> Xishi Qiu
> 





Re: [PATCH 3/3] arm64: dts: rockchip: add clk-480m for ehci and ohci of rk3399

2016-12-14 Thread Frank Wang

Hi Brain, Doug and Heiko,

I would like to summarize why this story was constructed.

The ehci/ohci-platform suspend process are blocked due to UTMI clock 
which directly output from usb-phy has been disabled, and why the UTMI 
clock was disabled?


UTMI clock and 480m clock all output from the same internal PLL of 
usb-phy, and there is only one bit can use to control this PLL on or 
off, which we named "otg_commononn"(GRF, offset 0x0e450/0x0e460 bit4 ) 
in RK3399 TRM.


When system boot up, ehci/ohci-platform probe function invoke 
phy_power_on(), further invoke rockchip_usb2phy_power_on() to enable 
480m clock, actually, it sets the otg_commononn bit on, and then usb-phy 
will go to (auto)suspend if there is no devices plug-in after 1 minute, 
the rockchip_usb2phy_power_off() will be invoked and the 480m clock may 
be disabled in the (auto)suspend process. As a result, the otg_commononn 
bit may be turned off, and all output clock of usb-phy will be disabled. 
However, ehci/ohci-platform PM suspend operation (read/write controller 
register) are based on the UTMI clock.


So we introduced "clk_usbphy0_480m_src"/"clk_usbphy1_480m_src" as one 
input clock for ehci/ohci-platform, in this way, the otg_commononn bit 
is not turned off until ehci/ohci-platform go to PM suspend.



BR.
Frank

On 2016/12/15 10:41, Xing Zheng wrote:

// Frank

Hi Doug,  Brain,
Thanks for the reply.
Sorry I forgot these patches have been sent earlier, and Frank 
have some explained and discussed with Heiko.

Please see https://patchwork.kernel.org/patch/9255245/
Perhaps we can move to that patch tree to continue the discussion.

I think Frank and William will help us to continue checking these.

Thanks

在 2016年12月15日 08:10, Doug Anderson 写道:

Hi,

On Wed, Dec 14, 2016 at 2:11 AM, Xing Zheng 
 wrote:

From: William wu 

We found that the suspend process was blocked when it run into
ehci/ohci module due to clk-480m of usb2-phy was disabled.

The root cause is that usb2-phy suspended earlier than ehci/ohci
(usb2-phy will be auto suspended if no devices plug-in).

This is really weird, but I can confirm it is true on my system too
(kernel-4.4 based).  At least I see:

[  208.012065] calling  usb1+ @ 4984, parent: fe38.usb, cb: 
usb_dev_suspend

[  208.569112] calling  ff77.syscon:usb2-phy@e450+ @ 4983, parent:
ff77.syscon, cb: platform_pm_suspend
[  208.569113] call ff77.syscon:usb2-phy@e450+ returned 0 after 0 
usecs

[  208.569439] calling  fe38.usb+ @ 4983, parent: platform, cb:
platform_pm_suspend
[  208.569444] call fe38.usb+ returned 0 after 4 usecs


In general I thought that suspend order was supposed to be related to
probe order.  So if your probe order is A, B, C then your suspend
order would be C, B, A.  ...and we know for sure that the USB PHY
needs to probe _before_ the main USB controller.  If it didn't then
you'd get an EPROBE_DEFER in the USB controller, right?  So that means
that the USB controller should be suspending before its PHY.

Any chance this is somehow related to async probe?  I'm not a huge
expert on async probe but I guess I could imagine things getting
confused if you had a sequence like this:

1. Start USB probe (async)
2. Start PHY probe
3. Finish PHY probe
4. In USB probe, ask for PHY--no problems since PHY probe finished
5. Finish USB probe

The probe order would be USB before PHY even though the USB probe
_depended_ on the PHY probe being finished...  :-/  Anyway, probably
I'm just misunderstanding something and someone can tell me how dumb I
am...

I also notice that the ehci_platform_power_off() function we're
actually making PHY commands right before the same commands that turn
off our clocks.  Presumably those commands aren't really so good to do
if the PHY has already been suspended?

Actually, does the PHY suspend from platform_pm_suspend() actually
even do anything?  It doesn't look like it.  It looks as if all the
PHY cares about is init/exit and on/off...  ...and it looks as if the
PHY should be turned off by the EHCI controller at about the same time
it turns off its clocks...

I haven't fully dug, but is there any chance that things are getting
confused between the OTG PHY and the Host PHY?  Maybe when we turn off
the OTG PHY it turns off something that the host PHY needs?



and the
clk-480m provided by it was disabled if no module used. However,
some suspend process related ehci/ohci are base on this clock,
so we should refer it into ehci/ohci driver to prevent this case.

Though I don't actually have details about the internals of the chip,
it does seem highly likely that the USB block actually uses this clock
for some things, so it doesn't seem insane (to me) to have the USB
controller request that the clock be on.  So, in general, I don't have
lots of objections to including the USB PHY Clock here.

...but I think you have the wrong clock (please correct me if I'm
wrong).  I think you really wanted your input clock to be
"clk_usbphy0_480m", 

Re: [RFC PATCH] Memory hotplug support for arm64 platform

2016-12-14 Thread Xishi Qiu
On 2016/12/15 14:18, Xishi Qiu wrote:

> On 2016/12/14 20:16, Maciej Bielski wrote:
> 
>>
>>  
>> -#ifdef CONFIG_MEMORY_HOTREMOVE
>> -int arch_remove_memory(u64 start, u64 size)
>> -{
>> -unsigned long start_pfn = start >> PAGE_SHIFT;
>> -unsigned long nr_pages = size >> PAGE_SHIFT;
>> -struct zone *zone;
>> -int ret;
>> +SetPageReserved(pfn_to_page(pfn));
>> +}
> 
> Hi Maciej,
> 
> Why we need to set reserved here?
> I think the new pages are already reserved in __add_zone() -> 
> memmap_init_zone(), right?
> 

Hi Maciej,

The reason is as follows, right?

It's because that in memmap_init_zone() -> early_pfn_valid(), the new page is 
still
invalid, so we need to init it after memblock_clear_nomap()

So why not use __init_single_page() and set_pageblock_migratetype()?

> Thanks,
> Xishi Qiu
> 





Re: [PATCH 1/1] platform/x86: surface3-wmi: Balance locking on error path

2016-12-14 Thread Darren Hart
On Thu, Dec 15, 2016 at 03:24:58AM +0200, Andy Shevchenko wrote:
> There is a possibility that lock will be left acquired.
> Consolidate error path under out_free_unlock label.
> 
> Reported-by: kbuild test robot 
> Cc: Benjamin Tissoires 
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/platform/x86/surface3-wmi.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/platform/x86/surface3-wmi.c 
> b/drivers/platform/x86/surface3-wmi.c
> index 5553b2b85e0a..ed7a3b77a0cc 100644
> --- a/drivers/platform/x86/surface3-wmi.c
> +++ b/drivers/platform/x86/surface3-wmi.c
> @@ -60,10 +60,10 @@ static DEFINE_MUTEX(s3_wmi_lock);
>  
>  static int s3_wmi_query_block(const char *guid, int instance, int *ret)
>  {
> + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
>   acpi_status status;
>   union acpi_object *obj;
> -
> - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
> + int error = 0;

I'd prefer ret throughout for consistency with the kernel in general, but as
error is used already, it's better to be self-consistent.

>  
>   mutex_lock(_wmi_lock);
>   status = wmi_query_block(guid, instance, );
> @@ -77,13 +77,14 @@ static int s3_wmi_query_block(const char *guid, int 
> instance, int *ret)
>  obj->type == ACPI_TYPE_BUFFER ?
>   obj->buffer.length : 0);
>   }
> - kfree(obj);
> - return -EINVAL;
> + error = -EINVAL;
> + goto out_free_unlock;
>   }
>   *ret = obj->integer.value;
> +out_free_unlock:

Please lead labels with a space:

 out_free_unlock:

This makes diffs a bit nicer as the label isn't used in lieu of the function
name. This is also consistent with the rest of the file.

Thanks,

-- 
Darren Hart
Intel Open Source Technology Center


Re: [PATCH] cifs: use %16phN for formatting md5 sum

2016-12-14 Thread Steve French
merged into cifs-2.6.git for-next

On Wed, Nov 30, 2016 at 4:40 PM, Rasmus Villemoes
 wrote:
> Passing a gazillion arguments takes a lot of code:
>
> add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-253 (-253)
>
> Signed-off-by: Rasmus Villemoes 
> ---
>  fs/cifs/link.c | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/fs/cifs/link.c b/fs/cifs/link.c
> index d031af8d3d4d..c4d996f78e1c 100644
> --- a/fs/cifs/link.c
> +++ b/fs/cifs/link.c
> @@ -45,13 +45,8 @@
> (CIFS_MF_SYMLINK_LINK_OFFSET + CIFS_MF_SYMLINK_LINK_MAXLEN)
>
>  #define CIFS_MF_SYMLINK_LEN_FORMAT "XSym\n%04u\n"
> -#define CIFS_MF_SYMLINK_MD5_FORMAT \
> -   "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n"
> -#define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) \
> -   md5_hash[0],  md5_hash[1],  md5_hash[2],  md5_hash[3], \
> -   md5_hash[4],  md5_hash[5],  md5_hash[6],  md5_hash[7], \
> -   md5_hash[8],  md5_hash[9],  md5_hash[10], md5_hash[11],\
> -   md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15]
> +#define CIFS_MF_SYMLINK_MD5_FORMAT "%16phN\n"
> +#define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash
>
>  static int
>  symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
> --
> 2.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Thanks,

Steve


Re: [PATCH 1/1] platform/x86: surface3-wmi: Balance locking on error path

2016-12-14 Thread Darren Hart
On Thu, Dec 15, 2016 at 03:24:58AM +0200, Andy Shevchenko wrote:
> There is a possibility that lock will be left acquired.
> Consolidate error path under out_free_unlock label.
> 
> Reported-by: kbuild test robot 
> Cc: Benjamin Tissoires 
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/platform/x86/surface3-wmi.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/platform/x86/surface3-wmi.c 
> b/drivers/platform/x86/surface3-wmi.c
> index 5553b2b85e0a..ed7a3b77a0cc 100644
> --- a/drivers/platform/x86/surface3-wmi.c
> +++ b/drivers/platform/x86/surface3-wmi.c
> @@ -60,10 +60,10 @@ static DEFINE_MUTEX(s3_wmi_lock);
>  
>  static int s3_wmi_query_block(const char *guid, int instance, int *ret)
>  {
> + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
>   acpi_status status;
>   union acpi_object *obj;
> -
> - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
> + int error = 0;

I'd prefer ret throughout for consistency with the kernel in general, but as
error is used already, it's better to be self-consistent.

>  
>   mutex_lock(_wmi_lock);
>   status = wmi_query_block(guid, instance, );
> @@ -77,13 +77,14 @@ static int s3_wmi_query_block(const char *guid, int 
> instance, int *ret)
>  obj->type == ACPI_TYPE_BUFFER ?
>   obj->buffer.length : 0);
>   }
> - kfree(obj);
> - return -EINVAL;
> + error = -EINVAL;
> + goto out_free_unlock;
>   }
>   *ret = obj->integer.value;
> +out_free_unlock:

Please lead labels with a space:

 out_free_unlock:

This makes diffs a bit nicer as the label isn't used in lieu of the function
name. This is also consistent with the rest of the file.

Thanks,

-- 
Darren Hart
Intel Open Source Technology Center


Re: [PATCH] cifs: use %16phN for formatting md5 sum

2016-12-14 Thread Steve French
merged into cifs-2.6.git for-next

On Wed, Nov 30, 2016 at 4:40 PM, Rasmus Villemoes
 wrote:
> Passing a gazillion arguments takes a lot of code:
>
> add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-253 (-253)
>
> Signed-off-by: Rasmus Villemoes 
> ---
>  fs/cifs/link.c | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/fs/cifs/link.c b/fs/cifs/link.c
> index d031af8d3d4d..c4d996f78e1c 100644
> --- a/fs/cifs/link.c
> +++ b/fs/cifs/link.c
> @@ -45,13 +45,8 @@
> (CIFS_MF_SYMLINK_LINK_OFFSET + CIFS_MF_SYMLINK_LINK_MAXLEN)
>
>  #define CIFS_MF_SYMLINK_LEN_FORMAT "XSym\n%04u\n"
> -#define CIFS_MF_SYMLINK_MD5_FORMAT \
> -   "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n"
> -#define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) \
> -   md5_hash[0],  md5_hash[1],  md5_hash[2],  md5_hash[3], \
> -   md5_hash[4],  md5_hash[5],  md5_hash[6],  md5_hash[7], \
> -   md5_hash[8],  md5_hash[9],  md5_hash[10], md5_hash[11],\
> -   md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15]
> +#define CIFS_MF_SYMLINK_MD5_FORMAT "%16phN\n"
> +#define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash
>
>  static int
>  symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
> --
> 2.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Thanks,

Steve


Re: [PATCH] vfio/type1: Restore mapping performance with mdev support

2016-12-14 Thread Kirti Wankhede


On 12/14/2016 2:28 AM, Alex Williamson wrote:
> As part of the mdev support, type1 now gets a task reference per
> vfio_dma and uses that to get an mm reference for the task while
> working on accounting.  That's the correct thing to do for paths
> where we can't rely on using current, but there are still hot paths
> where we can optimize because we know we're invoked by the user.
> 
> Specifically, vfio_pin_pages_remote() is only called when the user
> does DMA mapping (vfio_dma_do_map) or if an IOMMU group is added to
> a container with existing mappings (vfio_iommu_replay).  We can
> therefore use current->mm as well as rlimit() and capable() directly
> rather than going through the high overhead path via the stored
> task_struct.  We also know that vfio_dma_do_unmap() is only called
> via user ioctl, so we can also tune that path to be more lightweight.
> 
> In a synthetic guest mapping test emulating a 1TB VM backed by a
> single 4GB range remapped multiple times across the address space,
> the mdev changes to the type1 backend introduced a roughly 25% hit
> in runtime of this test.  These changes restore it to nearly the
> previous performance for the interfaces exercised here,
> VFIO_IOMMU_MAP_DMA and release on close.
> 
> Signed-off-by: Alex Williamson 
> ---
>  drivers/vfio/vfio_iommu_type1.c |  145 
> +--
>  1 file changed, 79 insertions(+), 66 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 9815e45..8dfeafb 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -103,6 +103,10 @@ struct vfio_pfn {
>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)  \
>   (!list_empty(>domain_list))
>  
> +/* Make function bool options readable */
> +#define IS_CURRENT   (true)
> +#define DO_ACCOUNTING(true)
> +
>  static int put_pfn(unsigned long pfn, int prot);
>  
>  /*
> @@ -264,7 +268,8 @@ static void vfio_lock_acct_bg(struct work_struct *work)
>   kfree(vwork);
>  }
>  
> -static void vfio_lock_acct(struct task_struct *task, long npage)
> +static void vfio_lock_acct(struct task_struct *task,
> +long npage, bool is_current)
>  {
>   struct vwork *vwork;
>   struct mm_struct *mm;
> @@ -272,24 +277,31 @@ static void vfio_lock_acct(struct task_struct *task, 
> long npage)
>   if (!npage)
>   return;
>  
> - mm = get_task_mm(task);
> + mm = is_current ? task->mm : get_task_mm(task);
>   if (!mm)
> - return; /* process exited or nothing to do */
> + return; /* process exited */
>  
>   if (down_write_trylock(>mmap_sem)) {
>   mm->locked_vm += npage;
>   up_write(>mmap_sem);
> - mmput(mm);
> + if (!is_current)
> + mmput(mm);
>   return;
>   }
>  
> + if (is_current) {
> + mm = get_task_mm(task);
> + if (!mm)
> + return;
> + }
> +
>   /*
>* Couldn't get mmap_sem lock, so must setup to update
>* mm->locked_vm later. If locked_vm were atomic, we
>* wouldn't need this silliness
>*/
>   vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
> - if (!vwork) {
> + if (WARN_ON(!vwork)) {
>   mmput(mm);
>   return;
>   }
> @@ -345,13 +357,13 @@ static int put_pfn(unsigned long pfn, int prot)
>  }
>  
>  static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
> -  int prot, unsigned long *pfn)
> +  int prot, unsigned long *pfn, bool is_current)
>  {
>   struct page *page[1];
>   struct vm_area_struct *vma;
>   int ret;
>  
> - if (mm == current->mm) {
> + if (is_current) {

With this change, if vfio_pin_page_external() gets called from QEMU
process context, for example in response to some BAR0 register access,
it will still fallback to slow path, get_user_pages_remote(). We don't
have to change this function. This path already takes care of taking
best possible path.

That also makes me think, vfio_pin_page_external() uses task structure
to get mlock limit and capability. Expectation is mdev vendor driver
shouldn't pin all system memory, but if any mdev driver does that, then
that driver might see such performance impact. Should we optimize this
path if (dma->task == current)?

Thanks,
Kirti

>   ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE),
> page);
>   } else {
> @@ -393,96 +405,92 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned 
> long vaddr,
>  static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> long npage, unsigned long *pfn_base)
>  {
> - unsigned long limit;
> - bool lock_cap = 

Re: [PATCH] vfio/type1: Restore mapping performance with mdev support

2016-12-14 Thread Kirti Wankhede


On 12/14/2016 2:28 AM, Alex Williamson wrote:
> As part of the mdev support, type1 now gets a task reference per
> vfio_dma and uses that to get an mm reference for the task while
> working on accounting.  That's the correct thing to do for paths
> where we can't rely on using current, but there are still hot paths
> where we can optimize because we know we're invoked by the user.
> 
> Specifically, vfio_pin_pages_remote() is only called when the user
> does DMA mapping (vfio_dma_do_map) or if an IOMMU group is added to
> a container with existing mappings (vfio_iommu_replay).  We can
> therefore use current->mm as well as rlimit() and capable() directly
> rather than going through the high overhead path via the stored
> task_struct.  We also know that vfio_dma_do_unmap() is only called
> via user ioctl, so we can also tune that path to be more lightweight.
> 
> In a synthetic guest mapping test emulating a 1TB VM backed by a
> single 4GB range remapped multiple times across the address space,
> the mdev changes to the type1 backend introduced a roughly 25% hit
> in runtime of this test.  These changes restore it to nearly the
> previous performance for the interfaces exercised here,
> VFIO_IOMMU_MAP_DMA and release on close.
> 
> Signed-off-by: Alex Williamson 
> ---
>  drivers/vfio/vfio_iommu_type1.c |  145 
> +--
>  1 file changed, 79 insertions(+), 66 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 9815e45..8dfeafb 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -103,6 +103,10 @@ struct vfio_pfn {
>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)  \
>   (!list_empty(>domain_list))
>  
> +/* Make function bool options readable */
> +#define IS_CURRENT   (true)
> +#define DO_ACCOUNTING(true)
> +
>  static int put_pfn(unsigned long pfn, int prot);
>  
>  /*
> @@ -264,7 +268,8 @@ static void vfio_lock_acct_bg(struct work_struct *work)
>   kfree(vwork);
>  }
>  
> -static void vfio_lock_acct(struct task_struct *task, long npage)
> +static void vfio_lock_acct(struct task_struct *task,
> +long npage, bool is_current)
>  {
>   struct vwork *vwork;
>   struct mm_struct *mm;
> @@ -272,24 +277,31 @@ static void vfio_lock_acct(struct task_struct *task, 
> long npage)
>   if (!npage)
>   return;
>  
> - mm = get_task_mm(task);
> + mm = is_current ? task->mm : get_task_mm(task);
>   if (!mm)
> - return; /* process exited or nothing to do */
> + return; /* process exited */
>  
>   if (down_write_trylock(>mmap_sem)) {
>   mm->locked_vm += npage;
>   up_write(>mmap_sem);
> - mmput(mm);
> + if (!is_current)
> + mmput(mm);
>   return;
>   }
>  
> + if (is_current) {
> + mm = get_task_mm(task);
> + if (!mm)
> + return;
> + }
> +
>   /*
>* Couldn't get mmap_sem lock, so must setup to update
>* mm->locked_vm later. If locked_vm were atomic, we
>* wouldn't need this silliness
>*/
>   vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
> - if (!vwork) {
> + if (WARN_ON(!vwork)) {
>   mmput(mm);
>   return;
>   }
> @@ -345,13 +357,13 @@ static int put_pfn(unsigned long pfn, int prot)
>  }
>  
>  static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
> -  int prot, unsigned long *pfn)
> +  int prot, unsigned long *pfn, bool is_current)
>  {
>   struct page *page[1];
>   struct vm_area_struct *vma;
>   int ret;
>  
> - if (mm == current->mm) {
> + if (is_current) {

With this change, if vfio_pin_page_external() gets called from QEMU
process context, for example in response to some BAR0 register access,
it will still fallback to slow path, get_user_pages_remote(). We don't
have to change this function. This path already takes care of taking
best possible path.

That also makes me think, vfio_pin_page_external() uses task structure
to get mlock limit and capability. Expectation is mdev vendor driver
shouldn't pin all system memory, but if any mdev driver does that, then
that driver might see such performance impact. Should we optimize this
path if (dma->task == current)?

Thanks,
Kirti

>   ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE),
> page);
>   } else {
> @@ -393,96 +405,92 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned 
> long vaddr,
>  static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> long npage, unsigned long *pfn_base)
>  {
> - unsigned long limit;
> - bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns,
> 

[PATCH 1/7] block: move existing elevator ops to union

2016-12-14 Thread Jens Axboe
Prep patch for adding MQ ops as well, since doing anon unions with
named initializers doesn't work on older compilers.

Signed-off-by: Jens Axboe 
---
 block/blk-ioc.c  |  8 +++
 block/blk-merge.c|  4 ++--
 block/blk.h  | 10 
 block/cfq-iosched.c  |  2 +-
 block/deadline-iosched.c |  2 +-
 block/elevator.c | 60 
 block/noop-iosched.c |  2 +-
 include/linux/elevator.h |  4 +++-
 8 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 381cb50a673c..ab372092a57d 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -43,8 +43,8 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED)
return;
 
-   if (et->ops.elevator_exit_icq_fn)
-   et->ops.elevator_exit_icq_fn(icq);
+   if (et->ops.sq.elevator_exit_icq_fn)
+   et->ops.sq.elevator_exit_icq_fn(icq);
 
icq->flags |= ICQ_EXITED;
 }
@@ -383,8 +383,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
if (likely(!radix_tree_insert(>icq_tree, q->id, icq))) {
hlist_add_head(>ioc_node, >icq_list);
list_add(>q_node, >icq_list);
-   if (et->ops.elevator_init_icq_fn)
-   et->ops.elevator_init_icq_fn(icq);
+   if (et->ops.sq.elevator_init_icq_fn)
+   et->ops.sq.elevator_init_icq_fn(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
icq = ioc_lookup_icq(ioc, q);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 182398cb1524..480570b691dc 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct 
request *rq,
 {
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_allow_rq_merge_fn)
-   if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
+   if (e->type->ops.sq.elevator_allow_rq_merge_fn)
+   if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0;
 
return attempt_merge(q, rq, next);
diff --git a/block/blk.h b/block/blk.h
index 041185e5f129..f46c0ac8ae3d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct 
request_queue *q)
return NULL;
}
if (unlikely(blk_queue_bypass(q)) ||
-   !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
+   !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
return NULL;
}
 }
@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue 
*q, struct request *rq)
 {
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_activate_req_fn)
-   e->type->ops.elevator_activate_req_fn(q, rq);
+   if (e->type->ops.sq.elevator_activate_req_fn)
+   e->type->ops.sq.elevator_activate_req_fn(q, rq);
 }
 
 static inline void elv_deactivate_rq(struct request_queue *q, struct request 
*rq)
 {
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_deactivate_req_fn)
-   e->type->ops.elevator_deactivate_req_fn(q, rq);
+   if (e->type->ops.sq.elevator_deactivate_req_fn)
+   e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
 }
 
 #ifdef CONFIG_FAIL_IO_TIMEOUT
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c73a6fcaeb9d..37aeb20fa454 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4837,7 +4837,7 @@ static struct elv_fs_entry cfq_attrs[] = {
 };
 
 static struct elevator_type iosched_cfq = {
-   .ops = {
+   .ops.sq = {
.elevator_merge_fn =cfq_merge,
.elevator_merged_fn =   cfq_merged_request,
.elevator_merge_req_fn =cfq_merged_requests,
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 55e0bb6d7da7..05fc0ea25a98 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = {
 };
 
 static struct elevator_type iosched_deadline = {
-   .ops = {
+   .ops.sq = {
.elevator_merge_fn =deadline_merge,
.elevator_merged_fn =   deadline_merged_request,
.elevator_merge_req_fn =deadline_merged_requests,
diff --git a/block/elevator.c b/block/elevator.c
index 40f0c04e5ad3..022a26830297 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -58,8 +58,8 @@ static int elv_iosched_allow_bio_merge(struct request *rq, 
struct bio *bio)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_allow_bio_merge_fn)

[PATCH 1/7] block: move existing elevator ops to union

2016-12-14 Thread Jens Axboe
Prep patch for adding MQ ops as well, since doing anon unions with
named initializers doesn't work on older compilers.

Signed-off-by: Jens Axboe 
---
 block/blk-ioc.c  |  8 +++
 block/blk-merge.c|  4 ++--
 block/blk.h  | 10 
 block/cfq-iosched.c  |  2 +-
 block/deadline-iosched.c |  2 +-
 block/elevator.c | 60 
 block/noop-iosched.c |  2 +-
 include/linux/elevator.h |  4 +++-
 8 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 381cb50a673c..ab372092a57d 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -43,8 +43,8 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED)
return;
 
-   if (et->ops.elevator_exit_icq_fn)
-   et->ops.elevator_exit_icq_fn(icq);
+   if (et->ops.sq.elevator_exit_icq_fn)
+   et->ops.sq.elevator_exit_icq_fn(icq);
 
icq->flags |= ICQ_EXITED;
 }
@@ -383,8 +383,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
if (likely(!radix_tree_insert(>icq_tree, q->id, icq))) {
hlist_add_head(>ioc_node, >icq_list);
list_add(>q_node, >icq_list);
-   if (et->ops.elevator_init_icq_fn)
-   et->ops.elevator_init_icq_fn(icq);
+   if (et->ops.sq.elevator_init_icq_fn)
+   et->ops.sq.elevator_init_icq_fn(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
icq = ioc_lookup_icq(ioc, q);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 182398cb1524..480570b691dc 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct 
request *rq,
 {
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_allow_rq_merge_fn)
-   if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
+   if (e->type->ops.sq.elevator_allow_rq_merge_fn)
+   if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0;
 
return attempt_merge(q, rq, next);
diff --git a/block/blk.h b/block/blk.h
index 041185e5f129..f46c0ac8ae3d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct 
request_queue *q)
return NULL;
}
if (unlikely(blk_queue_bypass(q)) ||
-   !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
+   !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
return NULL;
}
 }
@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue 
*q, struct request *rq)
 {
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_activate_req_fn)
-   e->type->ops.elevator_activate_req_fn(q, rq);
+   if (e->type->ops.sq.elevator_activate_req_fn)
+   e->type->ops.sq.elevator_activate_req_fn(q, rq);
 }
 
 static inline void elv_deactivate_rq(struct request_queue *q, struct request 
*rq)
 {
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_deactivate_req_fn)
-   e->type->ops.elevator_deactivate_req_fn(q, rq);
+   if (e->type->ops.sq.elevator_deactivate_req_fn)
+   e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
 }
 
 #ifdef CONFIG_FAIL_IO_TIMEOUT
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c73a6fcaeb9d..37aeb20fa454 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4837,7 +4837,7 @@ static struct elv_fs_entry cfq_attrs[] = {
 };
 
 static struct elevator_type iosched_cfq = {
-   .ops = {
+   .ops.sq = {
.elevator_merge_fn =cfq_merge,
.elevator_merged_fn =   cfq_merged_request,
.elevator_merge_req_fn =cfq_merged_requests,
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 55e0bb6d7da7..05fc0ea25a98 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = {
 };
 
 static struct elevator_type iosched_deadline = {
-   .ops = {
+   .ops.sq = {
.elevator_merge_fn =deadline_merge,
.elevator_merged_fn =   deadline_merged_request,
.elevator_merge_req_fn =deadline_merged_requests,
diff --git a/block/elevator.c b/block/elevator.c
index 40f0c04e5ad3..022a26830297 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -58,8 +58,8 @@ static int elv_iosched_allow_bio_merge(struct request *rq, 
struct bio *bio)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
 
-   if (e->type->ops.elevator_allow_bio_merge_fn)
-  

Re: [RFC PATCH] Memory hotplug support for arm64 platform

2016-12-14 Thread Xishi Qiu
On 2016/12/14 20:16, Maciej Bielski wrote:

> 
>  
> -#ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> -{
> - unsigned long start_pfn = start >> PAGE_SHIFT;
> - unsigned long nr_pages = size >> PAGE_SHIFT;
> - struct zone *zone;
> - int ret;
> + SetPageReserved(pfn_to_page(pfn));
> + }

Hi Maciej,

Why we need to set reserved here?
I think the new pages are already reserved in __add_zone() -> 
memmap_init_zone(), right?

Thanks,
Xishi Qiu

>  
> - zone = page_zone(pfn_to_page(start_pfn));
> - ret = __remove_pages(zone, start_pfn, nr_pages);
>   if (ret)
> - pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n",
> + pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
>   __func__, ret);
>  
>   return ret;
>  }
>  #endif
> -#endif
>  
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 05615a3..9efa7d1 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -493,6 +493,30 @@ void __init paging_init(void)
> SWAPPER_DIR_SIZE - PAGE_SIZE);
>  }
>  
> +#ifdef CONFIG_MEMORY_HOTPLUG
> +/*
> + * hotplug_paging() is used by memory hotplug to build new page tables
> + * for hot added memory.
> + */
> +void hotplug_paging(phys_addr_t start, phys_addr_t size)
> +{
> + phys_addr_t pgd_phys = pgd_pgtable_alloc();
> + pgd_t *pgd = pgd_set_fixmap(pgd_phys);
> +
> + memcpy(pgd, swapper_pg_dir, PAGE_SIZE);
> +
> + __create_pgd_mapping(pgd, start, __phys_to_virt(start), size,
> + PAGE_KERNEL, pgd_pgtable_alloc, false);
> +
> + cpu_replace_ttbr1(__va(pgd_phys));
> + memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
> + cpu_replace_ttbr1(swapper_pg_dir);
> +
> + pgd_clear_fixmap();
> + memblock_free(pgd_phys, PAGE_SIZE);
> +}
> +#endif
> +
>  /*
>   * Check whether a kernel address is valid (derived from arch/x86/).
>   */
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index 5b759c9..5f78257 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -92,6 +92,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t 
> size);
>  int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
>  int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
>  int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
> +int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
>  ulong choose_memblock_flags(void);
>  
>  /* Low level functions */
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 7608bc3..05e7676 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -814,6 +814,16 @@ int __init_memblock memblock_mark_nomap(phys_addr_t 
> base, phys_addr_t size)
>  }
>  
>  /**
> + * memblock_clear_nomap - Clear a flag of MEMBLOCK_NOMAP memory region
> + * @base: the base phys addr of the region
> + * @size: the size of the region
> + */
> +int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
> +{
> + return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
> +}
> +
> +/**
>   * __next_reserved_mem_region - next function for for_each_reserved_region()
>   * @idx: pointer to u64 loop variable
>   * @out_start: ptr to phys_addr_t for start address of the region, can be 
> %NULL





Re: [RFC PATCH] Memory hotplug support for arm64 platform

2016-12-14 Thread Xishi Qiu
On 2016/12/14 20:16, Maciej Bielski wrote:

> 
>  
> -#ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> -{
> - unsigned long start_pfn = start >> PAGE_SHIFT;
> - unsigned long nr_pages = size >> PAGE_SHIFT;
> - struct zone *zone;
> - int ret;
> + SetPageReserved(pfn_to_page(pfn));
> + }

Hi Maciej,

Why we need to set reserved here?
I think the new pages are already reserved in __add_zone() -> 
memmap_init_zone(), right?

Thanks,
Xishi Qiu

>  
> - zone = page_zone(pfn_to_page(start_pfn));
> - ret = __remove_pages(zone, start_pfn, nr_pages);
>   if (ret)
> - pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n",
> + pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
>   __func__, ret);
>  
>   return ret;
>  }
>  #endif
> -#endif
>  
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 05615a3..9efa7d1 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -493,6 +493,30 @@ void __init paging_init(void)
> SWAPPER_DIR_SIZE - PAGE_SIZE);
>  }
>  
> +#ifdef CONFIG_MEMORY_HOTPLUG
> +/*
> + * hotplug_paging() is used by memory hotplug to build new page tables
> + * for hot added memory.
> + */
> +void hotplug_paging(phys_addr_t start, phys_addr_t size)
> +{
> + phys_addr_t pgd_phys = pgd_pgtable_alloc();
> + pgd_t *pgd = pgd_set_fixmap(pgd_phys);
> +
> + memcpy(pgd, swapper_pg_dir, PAGE_SIZE);
> +
> + __create_pgd_mapping(pgd, start, __phys_to_virt(start), size,
> + PAGE_KERNEL, pgd_pgtable_alloc, false);
> +
> + cpu_replace_ttbr1(__va(pgd_phys));
> + memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
> + cpu_replace_ttbr1(swapper_pg_dir);
> +
> + pgd_clear_fixmap();
> + memblock_free(pgd_phys, PAGE_SIZE);
> +}
> +#endif
> +
>  /*
>   * Check whether a kernel address is valid (derived from arch/x86/).
>   */
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index 5b759c9..5f78257 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -92,6 +92,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t 
> size);
>  int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
>  int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
>  int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
> +int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
>  ulong choose_memblock_flags(void);
>  
>  /* Low level functions */
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 7608bc3..05e7676 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -814,6 +814,16 @@ int __init_memblock memblock_mark_nomap(phys_addr_t 
> base, phys_addr_t size)
>  }
>  
>  /**
> + * memblock_clear_nomap - Clear a flag of MEMBLOCK_NOMAP memory region
> + * @base: the base phys addr of the region
> + * @size: the size of the region
> + */
> +int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
> +{
> + return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
> +}
> +
> +/**
>   * __next_reserved_mem_region - next function for for_each_reserved_region()
>   * @idx: pointer to u64 loop variable
>   * @out_start: ptr to phys_addr_t for start address of the region, can be 
> %NULL





[PATCH 3/7] block: move rq_ioc() to blk.h

2016-12-14 Thread Jens Axboe
We want to use it outside of blk-core.c.

Signed-off-by: Jens Axboe 
---
 block/blk-core.c | 16 
 block/blk.h  | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c58b64..92baea07acbc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1040,22 +1040,6 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 }
 
 /**
- * rq_ioc - determine io_context for request allocation
- * @bio: request being allocated is for this bio (can be %NULL)
- *
- * Determine io_context to use for request allocation for @bio.  May return
- * %NULL if %current->io_context doesn't exist.
- */
-static struct io_context *rq_ioc(struct bio *bio)
-{
-#ifdef CONFIG_BLK_CGROUP
-   if (bio && bio->bi_ioc)
-   return bio->bi_ioc;
-#endif
-   return current->io_context;
-}
-
-/**
  * __get_request - get a free request
  * @rl: request list to allocate from
  * @op: operation and flags
diff --git a/block/blk.h b/block/blk.h
index f46c0ac8ae3d..9a716b5925a4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -264,6 +264,22 @@ void ioc_clear_queue(struct request_queue *q);
 int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
 
 /**
+ * rq_ioc - determine io_context for request allocation
+ * @bio: request being allocated is for this bio (can be %NULL)
+ *
+ * Determine io_context to use for request allocation for @bio.  May return
+ * %NULL if %current->io_context doesn't exist.
+ */
+static inline struct io_context *rq_ioc(struct bio *bio)
+{
+#ifdef CONFIG_BLK_CGROUP
+   if (bio && bio->bi_ioc)
+   return bio->bi_ioc;
+#endif
+   return current->io_context;
+}
+
+/**
  * create_io_context - try to create task->io_context
  * @gfp_mask: allocation mask
  * @node: allocation node
-- 
2.7.4



[PATCH 3/7] block: move rq_ioc() to blk.h

2016-12-14 Thread Jens Axboe
We want to use it outside of blk-core.c.

Signed-off-by: Jens Axboe 
---
 block/blk-core.c | 16 
 block/blk.h  | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c58b64..92baea07acbc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1040,22 +1040,6 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 }
 
 /**
- * rq_ioc - determine io_context for request allocation
- * @bio: request being allocated is for this bio (can be %NULL)
- *
- * Determine io_context to use for request allocation for @bio.  May return
- * %NULL if %current->io_context doesn't exist.
- */
-static struct io_context *rq_ioc(struct bio *bio)
-{
-#ifdef CONFIG_BLK_CGROUP
-   if (bio && bio->bi_ioc)
-   return bio->bi_ioc;
-#endif
-   return current->io_context;
-}
-
-/**
  * __get_request - get a free request
  * @rl: request list to allocate from
  * @op: operation and flags
diff --git a/block/blk.h b/block/blk.h
index f46c0ac8ae3d..9a716b5925a4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -264,6 +264,22 @@ void ioc_clear_queue(struct request_queue *q);
 int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
 
 /**
+ * rq_ioc - determine io_context for request allocation
+ * @bio: request being allocated is for this bio (can be %NULL)
+ *
+ * Determine io_context to use for request allocation for @bio.  May return
+ * %NULL if %current->io_context doesn't exist.
+ */
+static inline struct io_context *rq_ioc(struct bio *bio)
+{
+#ifdef CONFIG_BLK_CGROUP
+   if (bio && bio->bi_ioc)
+   return bio->bi_ioc;
+#endif
+   return current->io_context;
+}
+
+/**
  * create_io_context - try to create task->io_context
  * @gfp_mask: allocation mask
  * @node: allocation node
-- 
2.7.4



[PATCH 5/7] blk-mq-sched: add framework for MQ capable IO schedulers

2016-12-14 Thread Jens Axboe
Signed-off-by: Jens Axboe 
---
 block/Makefile   |   2 +-
 block/blk-core.c |   7 +-
 block/blk-exec.c |   3 +-
 block/blk-flush.c|   7 +-
 block/blk-mq-sched.c | 375 +++
 block/blk-mq-sched.h | 190 
 block/blk-mq-tag.c   |   1 +
 block/blk-mq.c   | 192 ++--
 block/blk-mq.h   |   3 +
 block/elevator.c | 186 +--
 include/linux/blk-mq.h   |   3 +-
 include/linux/elevator.h |  29 
 12 files changed, 833 insertions(+), 165 deletions(-)
 create mode 100644 block/blk-mq-sched.c
 create mode 100644 block/blk-mq-sched.h

diff --git a/block/Makefile b/block/Makefile
index a827f988c4e6..2eee9e1bb6db 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o 
blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
-   blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
+   blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 92baea07acbc..cb1e864cb23d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-sched.h"
 #include "blk-wbt.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -1413,7 +1414,7 @@ void __blk_put_request(struct request_queue *q, struct 
request *req)
return;
 
if (q->mq_ops) {
-   blk_mq_free_request(req);
+   blk_mq_sched_put_request(req);
return;
}
 
@@ -1449,7 +1450,7 @@ void blk_put_request(struct request *req)
struct request_queue *q = req->q;
 
if (q->mq_ops)
-   blk_mq_free_request(req);
+   blk_mq_sched_put_request(req);
else {
unsigned long flags;
 
@@ -2127,7 +2128,7 @@ int blk_insert_cloned_request(struct request_queue *q, 
struct request *rq)
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
-   blk_mq_insert_request(rq, false, true, false);
+   blk_mq_sched_insert_request(rq, false, true, false);
return 0;
}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3ecb00a6cf45..86656fdfa637 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -9,6 +9,7 @@
 #include 
 
 #include "blk.h"
+#include "blk-mq-sched.h"
 
 /*
  * for max sense size
@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct 
gendisk *bd_disk,
 * be reused after dying flag is set
 */
if (q->mq_ops) {
-   blk_mq_insert_request(rq, at_head, true, false);
+   blk_mq_sched_insert_request(rq, at_head, true, false);
return;
}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 20b7c7a02f1c..6a7c29d2eb3c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -74,6 +74,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
+#include "blk-mq-sched.h"
 
 /* FLUSH/FUA sequences */
 enum {
@@ -453,9 +454,9 @@ void blk_insert_flush(struct request *rq)
 */
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-   if (q->mq_ops) {
-   blk_mq_insert_request(rq, false, true, false);
-   } else
+   if (q->mq_ops)
+   blk_mq_sched_insert_request(rq, false, true, false);
+   else
list_add_tail(>queuelist, >queue_head);
return;
}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
new file mode 100644
index ..02ad17258666
--- /dev/null
+++ b/block/blk-mq-sched.c
@@ -0,0 +1,375 @@
+#include 
+#include 
+#include 
+
+#include 
+
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-sched.h"
+#include "blk-mq-tag.h"
+#include "blk-wbt.h"
+
+/*
+ * Empty set
+ */
+static const struct blk_mq_ops mq_sched_tag_ops = {
+};
+
+void blk_mq_sched_free_requests(struct blk_mq_tags *tags)
+{
+   blk_mq_free_rq_map(NULL, tags, 0);
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_free_requests);
+
+struct blk_mq_tags *blk_mq_sched_alloc_requests(unsigned int depth,
+   unsigned int numa_node)
+{
+   struct blk_mq_tag_set set = {
+   .ops= _sched_tag_ops,
+   .nr_hw_queues   = 1,
+   .queue_depth= depth,
+   .numa_node  = numa_node,
+   };
+

[PATCH 5/7] blk-mq-sched: add framework for MQ capable IO schedulers

2016-12-14 Thread Jens Axboe
Signed-off-by: Jens Axboe 
---
 block/Makefile   |   2 +-
 block/blk-core.c |   7 +-
 block/blk-exec.c |   3 +-
 block/blk-flush.c|   7 +-
 block/blk-mq-sched.c | 375 +++
 block/blk-mq-sched.h | 190 
 block/blk-mq-tag.c   |   1 +
 block/blk-mq.c   | 192 ++--
 block/blk-mq.h   |   3 +
 block/elevator.c | 186 +--
 include/linux/blk-mq.h   |   3 +-
 include/linux/elevator.h |  29 
 12 files changed, 833 insertions(+), 165 deletions(-)
 create mode 100644 block/blk-mq-sched.c
 create mode 100644 block/blk-mq-sched.h

diff --git a/block/Makefile b/block/Makefile
index a827f988c4e6..2eee9e1bb6db 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o 
blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
-   blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
+   blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 92baea07acbc..cb1e864cb23d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-sched.h"
 #include "blk-wbt.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -1413,7 +1414,7 @@ void __blk_put_request(struct request_queue *q, struct 
request *req)
return;
 
if (q->mq_ops) {
-   blk_mq_free_request(req);
+   blk_mq_sched_put_request(req);
return;
}
 
@@ -1449,7 +1450,7 @@ void blk_put_request(struct request *req)
struct request_queue *q = req->q;
 
if (q->mq_ops)
-   blk_mq_free_request(req);
+   blk_mq_sched_put_request(req);
else {
unsigned long flags;
 
@@ -2127,7 +2128,7 @@ int blk_insert_cloned_request(struct request_queue *q, 
struct request *rq)
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
-   blk_mq_insert_request(rq, false, true, false);
+   blk_mq_sched_insert_request(rq, false, true, false);
return 0;
}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3ecb00a6cf45..86656fdfa637 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -9,6 +9,7 @@
 #include 
 
 #include "blk.h"
+#include "blk-mq-sched.h"
 
 /*
  * for max sense size
@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct 
gendisk *bd_disk,
 * be reused after dying flag is set
 */
if (q->mq_ops) {
-   blk_mq_insert_request(rq, at_head, true, false);
+   blk_mq_sched_insert_request(rq, at_head, true, false);
return;
}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 20b7c7a02f1c..6a7c29d2eb3c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -74,6 +74,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
+#include "blk-mq-sched.h"
 
 /* FLUSH/FUA sequences */
 enum {
@@ -453,9 +454,9 @@ void blk_insert_flush(struct request *rq)
 */
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-   if (q->mq_ops) {
-   blk_mq_insert_request(rq, false, true, false);
-   } else
+   if (q->mq_ops)
+   blk_mq_sched_insert_request(rq, false, true, false);
+   else
list_add_tail(>queuelist, >queue_head);
return;
}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
new file mode 100644
index ..02ad17258666
--- /dev/null
+++ b/block/blk-mq-sched.c
@@ -0,0 +1,375 @@
+#include 
+#include 
+#include 
+
+#include 
+
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-sched.h"
+#include "blk-mq-tag.h"
+#include "blk-wbt.h"
+
+/*
+ * Empty set
+ */
+static const struct blk_mq_ops mq_sched_tag_ops = {
+};
+
+void blk_mq_sched_free_requests(struct blk_mq_tags *tags)
+{
+   blk_mq_free_rq_map(NULL, tags, 0);
+}
+EXPORT_SYMBOL_GPL(blk_mq_sched_free_requests);
+
+struct blk_mq_tags *blk_mq_sched_alloc_requests(unsigned int depth,
+   unsigned int numa_node)
+{
+   struct blk_mq_tag_set set = {
+   .ops= _sched_tag_ops,
+   .nr_hw_queues   = 1,
+   .queue_depth= depth,
+   .numa_node  = numa_node,
+   };
+
+   return 

Re: [PATCH V2] Coccinelle: check usleep_range() usage

2016-12-14 Thread Nicholas Mc Guire
On Thu, Dec 15, 2016 at 06:52:28AM +0100, Julia Lawall wrote:
> 
> 
> On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:
> 
> > Documentation/timers/timers-howto.txt outlines the intended usage of
> > usleep_range(), this spatch tries to locate missuse/out-of-spec cases.
> >
> > Signed-off-by: Nicholas Mc Guire <hof...@osadl.org>
> > ---
> > V2: added context mode as suggested by Julia Lawall <julia.law...@lip6.fr>
> > added min
> > added in the range checks as they are resonably reliable based on
> > a review of all 1648 call sites of usleep_range()
> >
> > 1648 calls total
> > 1488 pass numeric values only (90.29%)
> >   27 min below 10us (1.81%)
> >   40 min above 10ms (2.68%)
> >  min out of spec 4.50%
> >   76 preprocessor constants (4.61%)
> >1 min below 10us (1.31%)
> >8 min above 10ms (10.52%)
> >  min out of spec 11.84%
> >   85 expressions (5.15%)
> > 1(0) min below 10us (1.50%)*
> > 6(2) min above 10ms (7.50%)*
> >  min out of spec 9.0%
> > Errors:
> >   23 where min==max  (1.39%)
> >0 where max < min (0.00%)
> >
> > Total:
> >   Bugs: 6.48%-10.70%*
> >   Crit: 3.09%-3.15%* (min < 10, min==max, max < min)
> >   Detectable by coccinelle:
> >   Bugs: 74/103 (71.8%)
> >   Crit: 50/52 (96.1%)
> > * numbers estimated based on code review
> >
> > Patch is againts 4.9.0 (localversion-next is next-20161214)
> >
> >  scripts/coccinelle/api/bad_usleep_range.cocci | 88 
> > +++
> >  1 file changed, 88 insertions(+)
> >  create mode 100644 scripts/coccinelle/api/bad_usleep_range.cocci
> >
> > diff --git a/scripts/coccinelle/api/bad_usleep_range.cocci 
> > b/scripts/coccinelle/api/bad_usleep_range.cocci
> > new file mode 100644
> > index 000..003e9ef
> > --- /dev/null
> > +++ b/scripts/coccinelle/api/bad_usleep_range.cocci
> > @@ -0,0 +1,88 @@
> > +/// report bad/problematic usleep_range usage
> > +//
> > +// This is a checker for the documented intended use of usleep_range
> > +// see: Documentation/timers/timers-howto.txt and
> > +// Link: http://lkml.org/lkml/2016/11/29/54 for some notes on
> > +//   when mdelay might not be a suitable replacement
> > +//
> > +// Limitations:
> > +//  * The numeric limits are only checked when numeric constants are in
> > +//use (as of 4.9.0 thats 90.29% of the calls) no constant folding
> > +//is done - so this can miss some out-of-range cases - but in 4.9.0
> > +//it was catching 74 of the 103 bad cases (71.8%) and 50 of 52
> > +//(96.1%) of the critical cases (min < 10 and min==max - there
> > +//  * There may be RT use-cases where both min < 10 and min==max)
> > +//justified (e.g. high-throughput drivers on a shielded core)
> > +//
> > +// 1) warn if min == max
> > +//
> > +//  The problem is that usleep_range is calculating the delay by
> > +//  exp = ktime_add_us(ktime_get(), min)
> > +//  delta = (u64)(max - min) * NSEC_PER_USEC
> > +//  so delta is set to 0 if min==max
> > +//  and then calls
> > +//  schedule_hrtimeout_range(exp, 0,...)
> > +//  effectively this means that the clock subsystem has no room to
> > +//  optimize. usleep_range() is in non-atomic context so a 0 range
> > +//  makes very little sense as the task can be preempted anyway so
> > +//  there is no guarantee that the 0 range would be adding much
> > +//  precision - it just removes optimization potential, so it probably
> > +//  never really makes sense.
> > +//
> > +// 2) warn if min < 10 or min > 20ms
> > +//
> > +//  it makes little sense to use a non-atomic call for very short
> > +//  delays because the scheduling jitter will most likely exceed
> > +//  this limit - udelay() makes more sense in that case. For very
> > +//  large delays using hrtimers is useless as preemption becomes
> > +//  quite likely resulting in high inaccuracy anyway - so use
> > +//  jiffies based msleep and don't burden the hrtimer subsystem.
> > +//
> > +// 3) warn if max < min
> > +//
> > +//  Joe Perches <j...@perches.com> added a check for this case
> > +//  that is definitely wrong.
> > +//
> > +// Confidence: Moderate
> > +// Copyright: (C) 2016 Nicholas Mc Guire, OSADL.  GPLv2.
> > +// Comments:
> > +// Options: --no-includes --include-headers
> > +
> > +virtual org
> > +virtual report
> > +virtual context
> > +
> > +@nullrangectx dep

Re: [PATCH V2] Coccinelle: check usleep_range() usage

2016-12-14 Thread Nicholas Mc Guire
On Thu, Dec 15, 2016 at 06:52:28AM +0100, Julia Lawall wrote:
> 
> 
> On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:
> 
> > Documentation/timers/timers-howto.txt outlines the intended usage of
> > usleep_range(), this spatch tries to locate missuse/out-of-spec cases.
> >
> > Signed-off-by: Nicholas Mc Guire 
> > ---
> > V2: added context mode as suggested by Julia Lawall 
> > added min
> > added in the range checks as they are resonably reliable based on
> > a review of all 1648 call sites of usleep_range()
> >
> > 1648 calls total
> > 1488 pass numeric values only (90.29%)
> >   27 min below 10us (1.81%)
> >   40 min above 10ms (2.68%)
> >  min out of spec 4.50%
> >   76 preprocessor constants (4.61%)
> >1 min below 10us (1.31%)
> >8 min above 10ms (10.52%)
> >  min out of spec 11.84%
> >   85 expressions (5.15%)
> > 1(0) min below 10us (1.50%)*
> > 6(2) min above 10ms (7.50%)*
> >  min out of spec 9.0%
> > Errors:
> >   23 where min==max  (1.39%)
> >0 where max < min (0.00%)
> >
> > Total:
> >   Bugs: 6.48%-10.70%*
> >   Crit: 3.09%-3.15%* (min < 10, min==max, max < min)
> >   Detectable by coccinelle:
> >   Bugs: 74/103 (71.8%)
> >   Crit: 50/52 (96.1%)
> > * numbers estimated based on code review
> >
> > Patch is againts 4.9.0 (localversion-next is next-20161214)
> >
> >  scripts/coccinelle/api/bad_usleep_range.cocci | 88 
> > +++
> >  1 file changed, 88 insertions(+)
> >  create mode 100644 scripts/coccinelle/api/bad_usleep_range.cocci
> >
> > diff --git a/scripts/coccinelle/api/bad_usleep_range.cocci 
> > b/scripts/coccinelle/api/bad_usleep_range.cocci
> > new file mode 100644
> > index 000..003e9ef
> > --- /dev/null
> > +++ b/scripts/coccinelle/api/bad_usleep_range.cocci
> > @@ -0,0 +1,88 @@
> > +/// report bad/problematic usleep_range usage
> > +//
> > +// This is a checker for the documented intended use of usleep_range
> > +// see: Documentation/timers/timers-howto.txt and
> > +// Link: http://lkml.org/lkml/2016/11/29/54 for some notes on
> > +//   when mdelay might not be a suitable replacement
> > +//
> > +// Limitations:
> > +//  * The numeric limits are only checked when numeric constants are in
> > +//use (as of 4.9.0 thats 90.29% of the calls) no constant folding
> > +//is done - so this can miss some out-of-range cases - but in 4.9.0
> > +//it was catching 74 of the 103 bad cases (71.8%) and 50 of 52
> > +//(96.1%) of the critical cases (min < 10 and min==max - there
> > +//  * There may be RT use-cases where both min < 10 and min==max)
> > +//justified (e.g. high-throughput drivers on a shielded core)
> > +//
> > +// 1) warn if min == max
> > +//
> > +//  The problem is that usleep_range is calculating the delay by
> > +//  exp = ktime_add_us(ktime_get(), min)
> > +//  delta = (u64)(max - min) * NSEC_PER_USEC
> > +//  so delta is set to 0 if min==max
> > +//  and then calls
> > +//  schedule_hrtimeout_range(exp, 0,...)
> > +//  effectively this means that the clock subsystem has no room to
> > +//  optimize. usleep_range() is in non-atomic context so a 0 range
> > +//  makes very little sense as the task can be preempted anyway so
> > +//  there is no guarantee that the 0 range would be adding much
> > +//  precision - it just removes optimization potential, so it probably
> > +//  never really makes sense.
> > +//
> > +// 2) warn if min < 10 or min > 20ms
> > +//
> > +//  it makes little sense to use a non-atomic call for very short
> > +//  delays because the scheduling jitter will most likely exceed
> > +//  this limit - udelay() makes more sense in that case. For very
> > +//  large delays using hrtimers is useless as preemption becomes
> > +//  quite likely resulting in high inaccuracy anyway - so use
> > +//  jiffies based msleep and don't burden the hrtimer subsystem.
> > +//
> > +// 3) warn if max < min
> > +//
> > +//  Joe Perches  added a check for this case
> > +//  that is definitely wrong.
> > +//
> > +// Confidence: Moderate
> > +// Copyright: (C) 2016 Nicholas Mc Guire, OSADL.  GPLv2.
> > +// Comments:
> > +// Options: --no-includes --include-headers
> > +
> > +virtual org
> > +virtual report
> > +virtual context
> > +
> > +@nullrangectx depends on context@
> > +expression E1,E2;
> > +position p;
> > 

make[1]: *** No rule to make target 'include/config/auto.conf', needed by 'include/config/kernel.release'.

2016-12-14 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   5cc60aeedf315a7513f92e98314e86d515b986d1
commit: eed0eabd12ef061821cbfa20d903476e07645320 MIPS: generic: Introduce 
generic DT-based board support
date:   2 months ago
config: mips-allyesconfig (attached as .config)
compiler: mips-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout eed0eabd12ef061821cbfa20d903476e07645320
# save the attached .config to linux build tree
make.cross ARCH=mips 

All errors (new ones prefixed by >>):

   .config:8824:warning: symbol value '' invalid for MIPS_CPS_NS16550_BASE
   .config:8825:warning: symbol value '' invalid for MIPS_CPS_NS16550_SHIFT
   make[3]: *** [silentoldconfig] Error 1
   make[2]: *** [silentoldconfig] Error 2
>> make[1]: *** No rule to make target 'include/config/auto.conf', needed by 
>> 'include/config/kernel.release'.
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [sub-make] Error 2

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


make[1]: *** No rule to make target 'include/config/auto.conf', needed by 'include/config/kernel.release'.

2016-12-14 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   5cc60aeedf315a7513f92e98314e86d515b986d1
commit: eed0eabd12ef061821cbfa20d903476e07645320 MIPS: generic: Introduce 
generic DT-based board support
date:   2 months ago
config: mips-allyesconfig (attached as .config)
compiler: mips-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout eed0eabd12ef061821cbfa20d903476e07645320
# save the attached .config to linux build tree
make.cross ARCH=mips 

All errors (new ones prefixed by >>):

   .config:8824:warning: symbol value '' invalid for MIPS_CPS_NS16550_BASE
   .config:8825:warning: symbol value '' invalid for MIPS_CPS_NS16550_SHIFT
   make[3]: *** [silentoldconfig] Error 1
   make[2]: *** [silentoldconfig] Error 2
>> make[1]: *** No rule to make target 'include/config/auto.conf', needed by 
>> 'include/config/kernel.release'.
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [sub-make] Error 2

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH] sched/deadline:fix coding style issue

2016-12-14 Thread Gaosheng Wu
Line 45 of cpudeadline.c violates chapter 3 of CodingStyle - space 
required before the open parenthesis '('.

Signed-off-by: Gaosheng Wu 
---


checkpatch.pl complained as the following:
ERROR: space required before the open parenthesis '('
#45: FILE: kernel/sched/cpudeadline.c:45:
+   while(1) {

Patch was compile checked with: x86_64_defconfig

Patch is aginst:4.9.0 (localversion-next is next-20161215)

 kernel/sched/cpudeadline.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index e731190..3b7dc3c 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -42,7 +42,7 @@ static void cpudl_heapify_down(struct cpudl *cp, int idx)
return;
 
/* adapted from lib/prio_heap.c */
-   while(1) {
+   while (1) {
u64 largest_dl;
l = left_child(idx);
r = right_child(idx);
-- 
2.1.4




[PATCH] sched/deadline:fix coding style issue

2016-12-14 Thread Gaosheng Wu
Line 45 of cpudeadline.c violates chapter 3 of CodingStyle - space 
required before the open parenthesis '('.

Signed-off-by: Gaosheng Wu 
---


checkpatch.pl complained as the following:
ERROR: space required before the open parenthesis '('
#45: FILE: kernel/sched/cpudeadline.c:45:
+   while(1) {

Patch was compile checked with: x86_64_defconfig

Patch is aginst:4.9.0 (localversion-next is next-20161215)

 kernel/sched/cpudeadline.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index e731190..3b7dc3c 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -42,7 +42,7 @@ static void cpudl_heapify_down(struct cpudl *cp, int idx)
return;
 
/* adapted from lib/prio_heap.c */
-   while(1) {
+   while (1) {
u64 largest_dl;
l = left_child(idx);
r = right_child(idx);
-- 
2.1.4




[PATCH 4/7] blk-mq: export some helpers we need to the scheduling framework

2016-12-14 Thread Jens Axboe
Signed-off-by: Jens Axboe 
---
 block/blk-mq.c | 31 +--
 block/blk-mq.h | 25 +
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 87b7eaa1cb74..8d1cec8e25d1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -167,8 +167,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-  struct request *rq, unsigned int op)
+void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+   struct request *rq, unsigned int op)
 {
INIT_LIST_HEAD(>queuelist);
/* csd/requeue_work/fifo_time is initialized before use */
@@ -213,9 +213,10 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, 
struct blk_mq_ctx *ctx,
 
ctx->rq_dispatched[op_is_sync(op)]++;
 }
+EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
 
-static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op)
+struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
+  unsigned int op)
 {
struct request *rq;
unsigned int tag;
@@ -236,6 +237,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, 
unsigned int op)
 
return NULL;
 }
+EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags)
@@ -319,8 +321,8 @@ struct request *blk_mq_alloc_request_hctx(struct 
request_queue *q, int rw,
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx, struct request *rq)
+void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
+  struct request *rq)
 {
const int tag = rq->tag;
struct request_queue *q = rq->q;
@@ -803,7 +805,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int 
bitnr, void *data)
  * Process software queues that have been marked busy, splicing them
  * to the for-dispatch
  */
-static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
+void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 {
struct flush_busy_ctx_data data = {
.hctx = hctx,
@@ -812,6 +814,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, 
struct list_head *list)
 
sbitmap_for_each_set(>ctx_map, flush_busy_ctx, );
 }
+EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
 
 static inline unsigned int queued_to_index(unsigned int queued)
 {
@@ -922,7 +925,7 @@ static void blk_mq_process_rq_list(struct blk_mq_hw_ctx 
*hctx)
/*
 * Touch any software queue that has pending entries.
 */
-   flush_busy_ctxs(hctx, _list);
+   blk_mq_flush_busy_ctxs(hctx, _list);
 
/*
 * If we have previous entries on our dispatch list, grab them
@@ -1136,8 +1139,8 @@ static inline void __blk_mq_insert_req_list(struct 
blk_mq_hw_ctx *hctx,
list_add_tail(>queuelist, >rq_list);
 }
 
-static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
-   struct request *rq, bool at_head)
+void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+bool at_head)
 {
struct blk_mq_ctx *ctx = rq->mq_ctx;
 
@@ -1551,8 +1554,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue 
*q, struct bio *bio)
return cookie;
 }
 
-static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
-   struct blk_mq_tags *tags, unsigned int hctx_idx)
+void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
+   unsigned int hctx_idx)
 {
struct page *page;
 
@@ -1589,8 +1592,8 @@ static size_t order_to_size(unsigned int order)
return (size_t)PAGE_SIZE << order;
 }
 
-static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
-   unsigned int hctx_idx)
+struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
+  unsigned int hctx_idx)
 {
struct blk_mq_tags *tags;
unsigned int i, j, entries_per_page, max_order = 4;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 63e9116cddbd..e59f5ca520a2 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,21 @@ void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
+void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head 
*list);
+
+/*
+ * Internal helpers for allocating/freeing the request map
+ */

[PATCH 4/7] blk-mq: export some helpers we need to the scheduling framework

2016-12-14 Thread Jens Axboe
Signed-off-by: Jens Axboe 
---
 block/blk-mq.c | 31 +--
 block/blk-mq.h | 25 +
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 87b7eaa1cb74..8d1cec8e25d1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -167,8 +167,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-  struct request *rq, unsigned int op)
+void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+   struct request *rq, unsigned int op)
 {
INIT_LIST_HEAD(>queuelist);
/* csd/requeue_work/fifo_time is initialized before use */
@@ -213,9 +213,10 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, 
struct blk_mq_ctx *ctx,
 
ctx->rq_dispatched[op_is_sync(op)]++;
 }
+EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
 
-static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op)
+struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
+  unsigned int op)
 {
struct request *rq;
unsigned int tag;
@@ -236,6 +237,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, 
unsigned int op)
 
return NULL;
 }
+EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags)
@@ -319,8 +321,8 @@ struct request *blk_mq_alloc_request_hctx(struct 
request_queue *q, int rw,
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx, struct request *rq)
+void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
+  struct request *rq)
 {
const int tag = rq->tag;
struct request_queue *q = rq->q;
@@ -803,7 +805,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int 
bitnr, void *data)
  * Process software queues that have been marked busy, splicing them
  * to the for-dispatch
  */
-static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
+void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 {
struct flush_busy_ctx_data data = {
.hctx = hctx,
@@ -812,6 +814,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, 
struct list_head *list)
 
sbitmap_for_each_set(>ctx_map, flush_busy_ctx, );
 }
+EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
 
 static inline unsigned int queued_to_index(unsigned int queued)
 {
@@ -922,7 +925,7 @@ static void blk_mq_process_rq_list(struct blk_mq_hw_ctx 
*hctx)
/*
 * Touch any software queue that has pending entries.
 */
-   flush_busy_ctxs(hctx, _list);
+   blk_mq_flush_busy_ctxs(hctx, _list);
 
/*
 * If we have previous entries on our dispatch list, grab them
@@ -1136,8 +1139,8 @@ static inline void __blk_mq_insert_req_list(struct 
blk_mq_hw_ctx *hctx,
list_add_tail(>queuelist, >rq_list);
 }
 
-static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
-   struct request *rq, bool at_head)
+void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+bool at_head)
 {
struct blk_mq_ctx *ctx = rq->mq_ctx;
 
@@ -1551,8 +1554,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue 
*q, struct bio *bio)
return cookie;
 }
 
-static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
-   struct blk_mq_tags *tags, unsigned int hctx_idx)
+void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
+   unsigned int hctx_idx)
 {
struct page *page;
 
@@ -1589,8 +1592,8 @@ static size_t order_to_size(unsigned int order)
return (size_t)PAGE_SIZE << order;
 }
 
-static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
-   unsigned int hctx_idx)
+struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
+  unsigned int hctx_idx)
 {
struct blk_mq_tags *tags;
unsigned int i, j, entries_per_page, max_order = 4;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 63e9116cddbd..e59f5ca520a2 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,21 @@ void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
+void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head 
*list);
+
+/*
+ * Internal helpers for allocating/freeing the request map
+ */
+void 

Re: [PATCH v2] crypto: sun4i-ss: support the Security System PRNG

2016-12-14 Thread Herbert Xu
On Thu, Dec 15, 2016 at 12:47:16AM +0530, PrasannaKumar Muralidharan wrote:
> Should there be a mandate that driver will be accepted only when it
> passes 'rngtest'. This will make sure that prng drivers won't get
> added in future.

You cannot use software to distinguish between a PRNG and an HRNG.
We can only rely on the veracity of the documentation.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: [PATCH v2] crypto: sun4i-ss: support the Security System PRNG

2016-12-14 Thread Herbert Xu
On Thu, Dec 15, 2016 at 12:47:16AM +0530, PrasannaKumar Muralidharan wrote:
> Should there be a mandate that driver will be accepted only when it
> passes 'rngtest'. This will make sure that prng drivers won't get
> added in future.

You cannot use software to distinguish between a PRNG and an HRNG.
We can only rely on the veracity of the documentation.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


[i915] WARN_ON_ONCE(!intel_dp->lane_count)

2016-12-14 Thread Mike Galbraith
There's a FIXME there, but seems you may still want to hear about it,
so here ya go.


[4.481803] [drm] Initialized
[4.600103] [drm] Memory usable by graphics device = 4096M
[4.600108] checking generic (c000 1d5000) vs hw (c000 1000)
[4.600109] fb: switching to inteldrmfb from EFI VGA
[4.600200] Console: switching to colour dummy device 80x25
[4.601073] [drm] Replacing VGA console driver
[4.607781] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[4.607783] [drm] Driver supports precise vblank timestamp query.
[4.611373] i915 :00:02.0: vgaarb: changed VGA decodes: 
olddecodes=io+mem,decodes=io+mem:owns=io+mem
[4.613393] [drm] Finished loading i915/skl_dmc_ver1_26.bin (v1.26)
[4.645242] [drm] GuC firmware load skipped
[4.652382] ACPI: Video Device [GFX0] (multi-head: yes  rom: no  post: no)
[4.653127] random: crng init done
[4.653637] input: Video Bus as 
/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/LNXVIDEO:00/input/input6
[4.654286] [drm] Initialized i915 1.6.0 20161121 for :00:02.0 on minor 0
[4.57] [ cut here ]
[4.98] WARNING: CPU: 1 PID: 487 at drivers/gpu/drm/i915/intel_dp.c:4018 
intel_dp_check_link_status+0x1d3/0x1e0 [i915]
[4.99] WARN_ON_ONCE(!intel_dp->lane_count)
[4.666700] Modules linked in:
[4.666701]  rtsx_pci_sdmmc(E) mmc_core(E) ahci(E) libahci(E) rtsx_pci(E) 
mfd_core(E) libata(E) xhci_pci(E) i915(E) xhci_hcd(E) usbcore(E) 
i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) 
fb_sys_fops(E) drm(E) video(E) button(E) fjes(E) af_packet(E) sd_mod(E) vfat(E) 
fat(E) virtio_blk(E) virtio_mmio(E) virtio_pci(E) virtio_ring(E) virtio(E) 
ext4(E) crc16(E) jbd2(E) mbcache(E) dm_mod(E) loop(E) sg(E) scsi_mod(E) 
autofs4(E)
[4.666727] CPU: 1 PID: 487 Comm: kworker/u8:3 Tainted: GE   
4.10.0-preempt #1
[4.666728] Hardware name: HP HP Spectre x360 Convertible/804F, BIOS F.31 
01/15/2016
[4.666731] Workqueue: events_unbound async_run_entry_fn
[4.666732] Call Trace:
[4.666736]  dump_stack+0x85/0xc9
[4.666738]  __warn+0xd1/0xf0
[4.666740]  warn_slowpath_fmt+0x4f/0x60
[4.666746]  ? drm_dp_dpcd_read+0x57/0x70 [drm_kms_helper]
[4.666774]  intel_dp_check_link_status+0x1d3/0x1e0 [i915]
[4.666799]  intel_dp_detect+0x5a8/0x950 [i915]
[4.666803]  drm_helper_probe_single_connector_modes+0x296/0x4d0 
[drm_kms_helper]
[4.666807]  drm_setup_crtcs+0x8a/0xc70 [drm_kms_helper]
[4.666815]  ? mutex_lock_nested+0xad/0x6a0
[4.666819]  ? drm_fb_helper_initial_config+0x47/0x3f0 [drm_kms_helper]
[4.666823]  drm_fb_helper_initial_config+0x5b/0x3f0 [drm_kms_helper]
[4.666847]  intel_fbdev_initial_config+0x18/0x30 [i915]
[4.666849]  async_run_entry_fn+0x39/0x170
[4.666850]  process_one_work+0x1e1/0x6b0
[4.666851]  ? process_one_work+0x162/0x6b0
[4.666853]  worker_thread+0x137/0x4b0
[4.666855]  kthread+0x10c/0x140
[4.666856]  ? process_one_work+0x6b0/0x6b0
[4.666857]  ? kthread_parkme+0x40/0x40
[4.666859]  ret_from_fork+0x2a/0x40
[4.666861] ---[ end trace b9510d991aa51e41 ]---
[4.686370] fbcon: inteldrmfb (fb0) is primary device


[i915] WARN_ON_ONCE(!intel_dp->lane_count)

2016-12-14 Thread Mike Galbraith
There's a FIXME there, but seems you may still want to hear about it,
so here ya go.


[4.481803] [drm] Initialized
[4.600103] [drm] Memory usable by graphics device = 4096M
[4.600108] checking generic (c000 1d5000) vs hw (c000 1000)
[4.600109] fb: switching to inteldrmfb from EFI VGA
[4.600200] Console: switching to colour dummy device 80x25
[4.601073] [drm] Replacing VGA console driver
[4.607781] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[4.607783] [drm] Driver supports precise vblank timestamp query.
[4.611373] i915 :00:02.0: vgaarb: changed VGA decodes: 
olddecodes=io+mem,decodes=io+mem:owns=io+mem
[4.613393] [drm] Finished loading i915/skl_dmc_ver1_26.bin (v1.26)
[4.645242] [drm] GuC firmware load skipped
[4.652382] ACPI: Video Device [GFX0] (multi-head: yes  rom: no  post: no)
[4.653127] random: crng init done
[4.653637] input: Video Bus as 
/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/LNXVIDEO:00/input/input6
[4.654286] [drm] Initialized i915 1.6.0 20161121 for :00:02.0 on minor 0
[4.57] [ cut here ]
[4.98] WARNING: CPU: 1 PID: 487 at drivers/gpu/drm/i915/intel_dp.c:4018 
intel_dp_check_link_status+0x1d3/0x1e0 [i915]
[4.99] WARN_ON_ONCE(!intel_dp->lane_count)
[4.666700] Modules linked in:
[4.666701]  rtsx_pci_sdmmc(E) mmc_core(E) ahci(E) libahci(E) rtsx_pci(E) 
mfd_core(E) libata(E) xhci_pci(E) i915(E) xhci_hcd(E) usbcore(E) 
i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) 
fb_sys_fops(E) drm(E) video(E) button(E) fjes(E) af_packet(E) sd_mod(E) vfat(E) 
fat(E) virtio_blk(E) virtio_mmio(E) virtio_pci(E) virtio_ring(E) virtio(E) 
ext4(E) crc16(E) jbd2(E) mbcache(E) dm_mod(E) loop(E) sg(E) scsi_mod(E) 
autofs4(E)
[4.666727] CPU: 1 PID: 487 Comm: kworker/u8:3 Tainted: GE   
4.10.0-preempt #1
[4.666728] Hardware name: HP HP Spectre x360 Convertible/804F, BIOS F.31 
01/15/2016
[4.666731] Workqueue: events_unbound async_run_entry_fn
[4.666732] Call Trace:
[4.666736]  dump_stack+0x85/0xc9
[4.666738]  __warn+0xd1/0xf0
[4.666740]  warn_slowpath_fmt+0x4f/0x60
[4.666746]  ? drm_dp_dpcd_read+0x57/0x70 [drm_kms_helper]
[4.666774]  intel_dp_check_link_status+0x1d3/0x1e0 [i915]
[4.666799]  intel_dp_detect+0x5a8/0x950 [i915]
[4.666803]  drm_helper_probe_single_connector_modes+0x296/0x4d0 
[drm_kms_helper]
[4.666807]  drm_setup_crtcs+0x8a/0xc70 [drm_kms_helper]
[4.666815]  ? mutex_lock_nested+0xad/0x6a0
[4.666819]  ? drm_fb_helper_initial_config+0x47/0x3f0 [drm_kms_helper]
[4.666823]  drm_fb_helper_initial_config+0x5b/0x3f0 [drm_kms_helper]
[4.666847]  intel_fbdev_initial_config+0x18/0x30 [i915]
[4.666849]  async_run_entry_fn+0x39/0x170
[4.666850]  process_one_work+0x1e1/0x6b0
[4.666851]  ? process_one_work+0x162/0x6b0
[4.666853]  worker_thread+0x137/0x4b0
[4.666855]  kthread+0x10c/0x140
[4.666856]  ? process_one_work+0x6b0/0x6b0
[4.666857]  ? kthread_parkme+0x40/0x40
[4.666859]  ret_from_fork+0x2a/0x40
[4.666861] ---[ end trace b9510d991aa51e41 ]---
[4.686370] fbcon: inteldrmfb (fb0) is primary device


Re: [PATCH 00/39] ARM: dts: mvebu: Fix license text

2016-12-14 Thread Stefan Roese

On 14.12.2016 23:37, Alexandre Belloni wrote:

When the license was switched to dual GPLv2/X11, the text that was used
was missing a few characters. Fix that now.

I'll let the maintainers decide whether this change requires an ack of
every contributors. It has been separated with that in mind if
necessary.

Cc: Andrew Andrianov 
Cc: Arnaud Ebalard 
Cc: Arnd Bergmann 
Cc: Ben Dooks 
Cc: Benjamin Cama 
Cc: Benoit Masson 
Cc: Ben Peddell 
Cc: Boris Brezillon 
Cc: Chris Packham 
Cc: Ezequiel Garcia 
Cc: Florian Fainelli 
Cc: Geert Uytterhoeven 
Cc: Greg Ungerer 
Cc: Grzegorz Jaszczyk 
Cc: Heikki Krogerus 
Cc: Imre Kaloz 
Cc: Kevin Hilman 
Cc: Lior Amsalem 
Cc: Lorenzo Pieralisi 
Cc: Marcin Wojtas 
Cc: Mario Lange 
Cc: Maxime Ripard 
Cc: Nadav Haklai 
Cc: Nobuhiro Iwamatsu 
Cc: Paul Bolle 
Cc: Philipp Zabel 
Cc: Rafał Miłecki 
Cc: Roger Shimizu 
Cc: Russell King 
Cc: Ryan Press 
Cc: Sebastian Hesselbarth 
Cc: Simon Baatz 
Cc: Simon Guinot 
Cc: Stefan Roese 


For the complete patch series:

Acked-by: Stefan Roese 

Thanks,
Stefan


Re: [PATCH 00/39] ARM: dts: mvebu: Fix license text

2016-12-14 Thread Stefan Roese

On 14.12.2016 23:37, Alexandre Belloni wrote:

When the license was switched to dual GPLv2/X11, the text that was used
was missing a few characters. Fix that now.

I'll let the maintainers decide whether this change requires an ack of
every contributors. It has been separated with that in mind if
necessary.

Cc: Andrew Andrianov 
Cc: Arnaud Ebalard 
Cc: Arnd Bergmann 
Cc: Ben Dooks 
Cc: Benjamin Cama 
Cc: Benoit Masson 
Cc: Ben Peddell 
Cc: Boris Brezillon 
Cc: Chris Packham 
Cc: Ezequiel Garcia 
Cc: Florian Fainelli 
Cc: Geert Uytterhoeven 
Cc: Greg Ungerer 
Cc: Grzegorz Jaszczyk 
Cc: Heikki Krogerus 
Cc: Imre Kaloz 
Cc: Kevin Hilman 
Cc: Lior Amsalem 
Cc: Lorenzo Pieralisi 
Cc: Marcin Wojtas 
Cc: Mario Lange 
Cc: Maxime Ripard 
Cc: Nadav Haklai 
Cc: Nobuhiro Iwamatsu 
Cc: Paul Bolle 
Cc: Philipp Zabel 
Cc: Rafał Miłecki 
Cc: Roger Shimizu 
Cc: Russell King 
Cc: Ryan Press 
Cc: Sebastian Hesselbarth 
Cc: Simon Baatz 
Cc: Simon Guinot 
Cc: Stefan Roese 


For the complete patch series:

Acked-by: Stefan Roese 

Thanks,
Stefan


[PATCH 04/11] powerpc/kvm: Don't store values derivable from HPT order

2016-12-14 Thread David Gibson
Currently the kvm_hpt_info structure stores the hashed page table's order,
and also the number of HPTEs it contains and a mask for its size.  The
last two can be easily derived from the order, so remove them and just
calculate them as necessary with a couple of helper inlines.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 12 
 arch/powerpc/include/asm/kvm_host.h  |  2 --
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 28 +---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  | 18 +-
 4 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 8482921..8810327 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -350,6 +350,18 @@ extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
 
 extern void kvmhv_rm_send_ipi(int cpu);
 
+static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt)
+{
+   /* HPTEs are 2**4 bytes long */
+   return 1UL << (hpt->order - 4);
+}
+
+static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
+{
+   /* 128 (2**7) bytes in each HPTEG */
+   return (1UL << (hpt->order - 7)) - 1;
+}
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 2673271..3900f63 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -244,8 +244,6 @@ struct kvm_arch_memory_slot {
 struct kvm_hpt_info {
unsigned long virt;
struct revmap_entry *rev;
-   unsigned long npte;
-   unsigned long mask;
u32 order;
int cma;
 };
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b5799d1..fe88132 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -83,15 +83,11 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
kvm->arch.hpt.virt = hpt;
kvm->arch.hpt.order = order;
-   /* HPTEs are 2**4 bytes long */
-   kvm->arch.hpt.npte = 1ul << (order - 4);
-   /* 128 (2**7) bytes in each HPTEG */
-   kvm->arch.hpt.mask = (1ul << (order - 7)) - 1;
 
atomic64_set(>arch.mmio_update, 0);
 
/* Allocate reverse map array */
-   rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt.npte);
+   rev = vmalloc(sizeof(struct revmap_entry) * 
kvmppc_hpt_npte(>arch.hpt));
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
@@ -194,8 +190,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct 
kvm_memory_slot *memslot,
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
-   if (npages > kvm->arch.hpt.mask + 1)
-   npages = kvm->arch.hpt.mask + 1;
+   if (npages > kvmppc_hpt_mask(>arch.hpt) + 1)
+   npages = kvmppc_hpt_mask(>arch.hpt) + 1;
 
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@@ -205,7 +201,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct 
kvm_memory_slot *memslot,
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
-   hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & 
kvm->arch.hpt.mask;
+   hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
+   & kvmppc_hpt_mask(>arch.hpt);
/*
 * We assume that the hash table is empty and no
 * vcpus are using it at this stage.  Since we create
@@ -1306,7 +1303,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
__user *buf,
 
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
-   while (i < kvm->arch.hpt.npte &&
+   while (i < kvmppc_hpt_npte(>arch.hpt) &&
   !hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
@@ -1316,7 +1313,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
__user *buf,
hdr.index = i;
 
/* Grab a series of valid entries */
-   while (i < kvm->arch.hpt.npte &&
+   while (i < kvmppc_hpt_npte(>arch.hpt) &&
   hdr.n_valid < 0x &&
   nb + HPTE_SIZE < count &&
   record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
@@ -1332,7 +1329,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
__user *buf,
++revp;
}
/* Now skip invalid 

[PATCH 03/11] powerpc/kvm: Gather HPT related variables into sub-structure

2016-12-14 Thread David Gibson
Currently, the powerpc kvm_arch structure contains a number of variables
tracking the state of the guest's hashed page table (HPT) in KVM HV.  This
patch gathers them all together into a single kvm_hpt_info substructure.
This makes life more convenient for the upcoming HPT resizing
implementation.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_host.h | 16 ---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 90 ++---
 arch/powerpc/kvm/book3s_hv.c|  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 62 -
 4 files changed, 87 insertions(+), 83 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index e59b172..2673271 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -241,12 +241,20 @@ struct kvm_arch_memory_slot {
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
+struct kvm_hpt_info {
+   unsigned long virt;
+   struct revmap_entry *rev;
+   unsigned long npte;
+   unsigned long mask;
+   u32 order;
+   int cma;
+};
+
 struct kvm_arch {
unsigned int lpid;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
-   unsigned long hpt_virt;
-   struct revmap_entry *revmap;
+   struct kvm_hpt_info hpt;
atomic64_t mmio_update;
unsigned int host_lpid;
unsigned long host_lpcr;
@@ -256,14 +264,10 @@ struct kvm_arch {
unsigned long lpcr;
unsigned long vrma_slb_v;
int hpte_setup_done;
-   u32 hpt_order;
atomic_t vcpus_running;
u32 online_vcores;
-   unsigned long hpt_npte;
-   unsigned long hpt_mask;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
-   int hpt_cma_alloc;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ae17cdd..b5799d1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -61,12 +61,12 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
order = PPC_MIN_HPT_ORDER;
}
 
-   kvm->arch.hpt_cma_alloc = 0;
+   kvm->arch.hpt.cma = 0;
page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
-   kvm->arch.hpt_cma_alloc = 1;
+   kvm->arch.hpt.cma = 1;
}
 
/* Lastly try successively smaller sizes from the page allocator */
@@ -81,22 +81,22 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
if (!hpt)
return -ENOMEM;
 
-   kvm->arch.hpt_virt = hpt;
-   kvm->arch.hpt_order = order;
+   kvm->arch.hpt.virt = hpt;
+   kvm->arch.hpt.order = order;
/* HPTEs are 2**4 bytes long */
-   kvm->arch.hpt_npte = 1ul << (order - 4);
+   kvm->arch.hpt.npte = 1ul << (order - 4);
/* 128 (2**7) bytes in each HPTEG */
-   kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
+   kvm->arch.hpt.mask = (1ul << (order - 7)) - 1;
 
atomic64_set(>arch.mmio_update, 0);
 
/* Allocate reverse map array */
-   rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
+   rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt.npte);
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
}
-   kvm->arch.revmap = rev;
+   kvm->arch.hpt.rev = rev;
kvm->arch.sdr1 = __pa(hpt) | (order - 18);
 
pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
@@ -107,7 +107,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
return 0;
 
  out_freehpt:
-   if (kvm->arch.hpt_cma_alloc)
+   if (kvm->arch.hpt.cma)
kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
@@ -129,10 +129,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
goto out;
}
}
-   if (kvm->arch.hpt_virt) {
-   order = kvm->arch.hpt_order;
+   if (kvm->arch.hpt.virt) {
+   order = kvm->arch.hpt.order;
/* Set the entire HPT to 0, i.e. invalid HPTEs */
-   memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+   memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
/*
 * Reset all the reverse-mapping chains for all memslots
 */
@@ -153,13 +153,13 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
 void kvmppc_free_hpt(struct kvm *kvm)
 {
kvmppc_free_lpid(kvm->arch.lpid);
-   

[PATCH 02/11] powerpc/kvm: Rename kvm_alloc_hpt() for clarity

2016-12-14 Thread David Gibson
The difference between kvm_alloc_hpt() and kvmppc_alloc_hpt() is not at
all obvious from the name.  In practice kvmppc_alloc_hpt() allocates an HPT
by whatever means, and calls kvm_alloc_hpt() which will attempt to allocate
it with CMA only.

To make this less confusing, rename kvm_alloc_hpt() to kvm_alloc_hpt_cma().
Similarly, kvm_release_hpt() is renamed kvm_free_hpt_cma().

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_ppc.h   | 4 ++--
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 8 
 arch/powerpc/kvm/book3s_hv_builtin.c | 8 
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 2da67bf..3db6be9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -186,8 +186,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 unsigned long ioba);
-extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
-extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
+extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages);
+extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm *kvm,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b795dd1..ae17cdd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -62,7 +62,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
}
 
kvm->arch.hpt_cma_alloc = 0;
-   page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT));
+   page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
@@ -108,7 +108,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
  out_freehpt:
if (kvm->arch.hpt_cma_alloc)
-   kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
+   kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
@@ -155,8 +155,8 @@ void kvmppc_free_hpt(struct kvm *kvm)
kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
if (kvm->arch.hpt_cma_alloc)
-   kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
-   1 << (kvm->arch.hpt_order - PAGE_SHIFT));
+   kvm_free_hpt_cma(virt_to_page(kvm->arch.hpt_virt),
+1 << (kvm->arch.hpt_order - PAGE_SHIFT));
else
free_pages(kvm->arch.hpt_virt,
   kvm->arch.hpt_order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 5bb24be..4c4aa47 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -52,19 +52,19 @@ static int __init early_parse_kvm_cma_resv(char *p)
 }
 early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct page *kvm_alloc_hpt(unsigned long nr_pages)
+struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
 {
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
-EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
 
-void kvm_release_hpt(struct page *page, unsigned long nr_pages)
+void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
 {
cma_release(kvm_cma, page, nr_pages);
 }
-EXPORT_SYMBOL_GPL(kvm_release_hpt);
+EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
 
 /**
  * kvm_cma_reserve() - reserve area for kvm hash pagetable
-- 
2.9.3



[PATCH 04/11] powerpc/kvm: Don't store values derivable from HPT order

2016-12-14 Thread David Gibson
Currently the kvm_hpt_info structure stores the hashed page table's order,
and also the number of HPTEs it contains and a mask for its size.  The
last two can be easily derived from the order, so remove them and just
calculate them as necessary with a couple of helper inlines.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 12 
 arch/powerpc/include/asm/kvm_host.h  |  2 --
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 28 +---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  | 18 +-
 4 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 8482921..8810327 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -350,6 +350,18 @@ extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
 
 extern void kvmhv_rm_send_ipi(int cpu);
 
+static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt)
+{
+   /* HPTEs are 2**4 bytes long */
+   return 1UL << (hpt->order - 4);
+}
+
+static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
+{
+   /* 128 (2**7) bytes in each HPTEG */
+   return (1UL << (hpt->order - 7)) - 1;
+}
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 2673271..3900f63 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -244,8 +244,6 @@ struct kvm_arch_memory_slot {
 struct kvm_hpt_info {
unsigned long virt;
struct revmap_entry *rev;
-   unsigned long npte;
-   unsigned long mask;
u32 order;
int cma;
 };
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b5799d1..fe88132 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -83,15 +83,11 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
kvm->arch.hpt.virt = hpt;
kvm->arch.hpt.order = order;
-   /* HPTEs are 2**4 bytes long */
-   kvm->arch.hpt.npte = 1ul << (order - 4);
-   /* 128 (2**7) bytes in each HPTEG */
-   kvm->arch.hpt.mask = (1ul << (order - 7)) - 1;
 
atomic64_set(>arch.mmio_update, 0);
 
/* Allocate reverse map array */
-   rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt.npte);
+   rev = vmalloc(sizeof(struct revmap_entry) * 
kvmppc_hpt_npte(>arch.hpt));
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
@@ -194,8 +190,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct 
kvm_memory_slot *memslot,
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
-   if (npages > kvm->arch.hpt.mask + 1)
-   npages = kvm->arch.hpt.mask + 1;
+   if (npages > kvmppc_hpt_mask(>arch.hpt) + 1)
+   npages = kvmppc_hpt_mask(>arch.hpt) + 1;
 
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@@ -205,7 +201,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct 
kvm_memory_slot *memslot,
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
-   hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & 
kvm->arch.hpt.mask;
+   hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
+   & kvmppc_hpt_mask(>arch.hpt);
/*
 * We assume that the hash table is empty and no
 * vcpus are using it at this stage.  Since we create
@@ -1306,7 +1303,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
__user *buf,
 
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
-   while (i < kvm->arch.hpt.npte &&
+   while (i < kvmppc_hpt_npte(>arch.hpt) &&
   !hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
@@ -1316,7 +1313,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
__user *buf,
hdr.index = i;
 
/* Grab a series of valid entries */
-   while (i < kvm->arch.hpt.npte &&
+   while (i < kvmppc_hpt_npte(>arch.hpt) &&
   hdr.n_valid < 0x &&
   nb + HPTE_SIZE < count &&
   record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
@@ -1332,7 +1329,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
__user *buf,
++revp;
}
/* Now skip invalid entries while we can */
-

[PATCH 03/11] powerpc/kvm: Gather HPT related variables into sub-structure

2016-12-14 Thread David Gibson
Currently, the powerpc kvm_arch structure contains a number of variables
tracking the state of the guest's hashed page table (HPT) in KVM HV.  This
patch gathers them all together into a single kvm_hpt_info substructure.
This makes life more convenient for the upcoming HPT resizing
implementation.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_host.h | 16 ---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 90 ++---
 arch/powerpc/kvm/book3s_hv.c|  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 62 -
 4 files changed, 87 insertions(+), 83 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index e59b172..2673271 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -241,12 +241,20 @@ struct kvm_arch_memory_slot {
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
+struct kvm_hpt_info {
+   unsigned long virt;
+   struct revmap_entry *rev;
+   unsigned long npte;
+   unsigned long mask;
+   u32 order;
+   int cma;
+};
+
 struct kvm_arch {
unsigned int lpid;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
-   unsigned long hpt_virt;
-   struct revmap_entry *revmap;
+   struct kvm_hpt_info hpt;
atomic64_t mmio_update;
unsigned int host_lpid;
unsigned long host_lpcr;
@@ -256,14 +264,10 @@ struct kvm_arch {
unsigned long lpcr;
unsigned long vrma_slb_v;
int hpte_setup_done;
-   u32 hpt_order;
atomic_t vcpus_running;
u32 online_vcores;
-   unsigned long hpt_npte;
-   unsigned long hpt_mask;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
-   int hpt_cma_alloc;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ae17cdd..b5799d1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -61,12 +61,12 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
order = PPC_MIN_HPT_ORDER;
}
 
-   kvm->arch.hpt_cma_alloc = 0;
+   kvm->arch.hpt.cma = 0;
page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
-   kvm->arch.hpt_cma_alloc = 1;
+   kvm->arch.hpt.cma = 1;
}
 
/* Lastly try successively smaller sizes from the page allocator */
@@ -81,22 +81,22 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
if (!hpt)
return -ENOMEM;
 
-   kvm->arch.hpt_virt = hpt;
-   kvm->arch.hpt_order = order;
+   kvm->arch.hpt.virt = hpt;
+   kvm->arch.hpt.order = order;
/* HPTEs are 2**4 bytes long */
-   kvm->arch.hpt_npte = 1ul << (order - 4);
+   kvm->arch.hpt.npte = 1ul << (order - 4);
/* 128 (2**7) bytes in each HPTEG */
-   kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
+   kvm->arch.hpt.mask = (1ul << (order - 7)) - 1;
 
atomic64_set(>arch.mmio_update, 0);
 
/* Allocate reverse map array */
-   rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
+   rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt.npte);
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
}
-   kvm->arch.revmap = rev;
+   kvm->arch.hpt.rev = rev;
kvm->arch.sdr1 = __pa(hpt) | (order - 18);
 
pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
@@ -107,7 +107,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
return 0;
 
  out_freehpt:
-   if (kvm->arch.hpt_cma_alloc)
+   if (kvm->arch.hpt.cma)
kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
@@ -129,10 +129,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
goto out;
}
}
-   if (kvm->arch.hpt_virt) {
-   order = kvm->arch.hpt_order;
+   if (kvm->arch.hpt.virt) {
+   order = kvm->arch.hpt.order;
/* Set the entire HPT to 0, i.e. invalid HPTEs */
-   memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+   memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
/*
 * Reset all the reverse-mapping chains for all memslots
 */
@@ -153,13 +153,13 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
 void kvmppc_free_hpt(struct kvm *kvm)
 {
kvmppc_free_lpid(kvm->arch.lpid);
-   vfree(kvm->arch.revmap);
-   if 

[PATCH 02/11] powerpc/kvm: Rename kvm_alloc_hpt() for clarity

2016-12-14 Thread David Gibson
The difference between kvm_alloc_hpt() and kvmppc_alloc_hpt() is not at
all obvious from the name.  In practice kvmppc_alloc_hpt() allocates an HPT
by whatever means, and calls kvm_alloc_hpt() which will attempt to allocate
it with CMA only.

To make this less confusing, rename kvm_alloc_hpt() to kvm_alloc_hpt_cma().
Similarly, kvm_release_hpt() is renamed kvm_free_hpt_cma().

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_ppc.h   | 4 ++--
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 8 
 arch/powerpc/kvm/book3s_hv_builtin.c | 8 
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 2da67bf..3db6be9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -186,8 +186,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 unsigned long ioba);
-extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
-extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
+extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages);
+extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm *kvm,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b795dd1..ae17cdd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -62,7 +62,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
}
 
kvm->arch.hpt_cma_alloc = 0;
-   page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT));
+   page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
@@ -108,7 +108,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
  out_freehpt:
if (kvm->arch.hpt_cma_alloc)
-   kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
+   kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
@@ -155,8 +155,8 @@ void kvmppc_free_hpt(struct kvm *kvm)
kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
if (kvm->arch.hpt_cma_alloc)
-   kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
-   1 << (kvm->arch.hpt_order - PAGE_SHIFT));
+   kvm_free_hpt_cma(virt_to_page(kvm->arch.hpt_virt),
+1 << (kvm->arch.hpt_order - PAGE_SHIFT));
else
free_pages(kvm->arch.hpt_virt,
   kvm->arch.hpt_order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 5bb24be..4c4aa47 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -52,19 +52,19 @@ static int __init early_parse_kvm_cma_resv(char *p)
 }
 early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct page *kvm_alloc_hpt(unsigned long nr_pages)
+struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
 {
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
-EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
 
-void kvm_release_hpt(struct page *page, unsigned long nr_pages)
+void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
 {
cma_release(kvm_cma, page, nr_pages);
 }
-EXPORT_SYMBOL_GPL(kvm_release_hpt);
+EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
 
 /**
  * kvm_cma_reserve() - reserve area for kvm hash pagetable
-- 
2.9.3



[PATCH 06/11] powerpc/kvm: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size

2016-12-14 Thread David Gibson
The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
table (HPT) that userspace expects a guest VM to have, and is also used to
clear that HPT when necessary (e.g. guest reboot).

At present, once the ioctl() is called for the first time, the HPT size can
never be changed thereafter - it will be cleared but always sized as from
the first call.

With upcoming HPT resize implementation, we're going to need to allow
userspace to resize the HPT at reset (to change it back to the default size
if the guest changed it).

So, we need to allow this ioctl() to change the HPT size.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 53 -
 arch/powerpc/kvm/book3s_hv.c|  5 +---
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 41575b8..3b837bc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -157,7 +157,7 @@ extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
 extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
-extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
 extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68bb228..8e5ac2f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -104,10 +104,22 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info 
*info)
info->virt, (long)info->order, kvm->arch.lpid);
 }
 
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+void kvmppc_free_hpt(struct kvm_hpt_info *info)
+{
+   vfree(info->rev);
+   if (info->cma)
+   kvm_free_hpt_cma(virt_to_page(info->virt),
+1 << (info->order - PAGE_SHIFT));
+   else
+   free_pages(info->virt, info->order - PAGE_SHIFT);
+   info->virt = 0;
+   info->order = 0;
+}
+
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 {
long err = -EBUSY;
-   long order;
+   struct kvm_hpt_info info;
 
mutex_lock(>lock);
if (kvm->arch.hpte_setup_done) {
@@ -119,8 +131,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
goto out;
}
}
-   if (kvm->arch.hpt.virt) {
-   order = kvm->arch.hpt.order;
+   if (kvm->arch.hpt.order == order) {
+   /* We already have a suitable HPT */
+
/* Set the entire HPT to 0, i.e. invalid HPTEs */
memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
/*
@@ -129,33 +142,23 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
kvmppc_rmap_reset(kvm);
/* Ensure that each vcpu will flush its TLB on next entry. */
cpumask_setall(>arch.need_tlb_flush);
-   *htab_orderp = order;
err = 0;
-   } else {
-   struct kvm_hpt_info info;
-
-   err = kvmppc_allocate_hpt(, *htab_orderp);
-   if (err < 0)
-   goto out;
-   kvmppc_set_hpt(kvm, );
+   goto out;
}
- out:
+
+   if (kvm->arch.hpt.virt)
+   kvmppc_free_hpt(>arch.hpt);
+
+   err = kvmppc_allocate_hpt(, order);
+   if (err < 0)
+   goto out;
+   kvmppc_set_hpt(kvm, );
+
+out:
mutex_unlock(>lock);
return err;
 }
 
-void kvmppc_free_hpt(struct kvm_hpt_info *info)
-{
-   vfree(info->rev);
-   if (info->cma)
-   kvm_free_hpt_cma(virt_to_page(info->virt),
-1 << (info->order - PAGE_SHIFT));
-   else
-   free_pages(info->virt, info->order - PAGE_SHIFT);
-   info->virt = 0;
-   info->order = 0;
-}
-
 /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
 static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 71c5adb..957e473 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3600,12 +3600,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
r = -EFAULT;
if (get_user(htab_order, (u32 __user *)argp))
break;
-   r = kvmppc_alloc_reset_hpt(kvm, _order);
+   r = kvmppc_alloc_reset_hpt(kvm, htab_order);
if (r)
break;
-   r 

[PATCH 06/11] powerpc/kvm: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size

2016-12-14 Thread David Gibson
The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
table (HPT) that userspace expects a guest VM to have, and is also used to
clear that HPT when necessary (e.g. guest reboot).

At present, once the ioctl() is called for the first time, the HPT size can
never be changed thereafter - it will be cleared but always sized as from
the first call.

With upcoming HPT resize implementation, we're going to need to allow
userspace to resize the HPT at reset (to change it back to the default size
if the guest changed it).

So, we need to allow this ioctl() to change the HPT size.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 53 -
 arch/powerpc/kvm/book3s_hv.c|  5 +---
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 41575b8..3b837bc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -157,7 +157,7 @@ extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
 extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
-extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
 extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68bb228..8e5ac2f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -104,10 +104,22 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info 
*info)
info->virt, (long)info->order, kvm->arch.lpid);
 }
 
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+void kvmppc_free_hpt(struct kvm_hpt_info *info)
+{
+   vfree(info->rev);
+   if (info->cma)
+   kvm_free_hpt_cma(virt_to_page(info->virt),
+1 << (info->order - PAGE_SHIFT));
+   else
+   free_pages(info->virt, info->order - PAGE_SHIFT);
+   info->virt = 0;
+   info->order = 0;
+}
+
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 {
long err = -EBUSY;
-   long order;
+   struct kvm_hpt_info info;
 
mutex_lock(>lock);
if (kvm->arch.hpte_setup_done) {
@@ -119,8 +131,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
goto out;
}
}
-   if (kvm->arch.hpt.virt) {
-   order = kvm->arch.hpt.order;
+   if (kvm->arch.hpt.order == order) {
+   /* We already have a suitable HPT */
+
/* Set the entire HPT to 0, i.e. invalid HPTEs */
memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
/*
@@ -129,33 +142,23 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 
*htab_orderp)
kvmppc_rmap_reset(kvm);
/* Ensure that each vcpu will flush its TLB on next entry. */
cpumask_setall(>arch.need_tlb_flush);
-   *htab_orderp = order;
err = 0;
-   } else {
-   struct kvm_hpt_info info;
-
-   err = kvmppc_allocate_hpt(, *htab_orderp);
-   if (err < 0)
-   goto out;
-   kvmppc_set_hpt(kvm, );
+   goto out;
}
- out:
+
+   if (kvm->arch.hpt.virt)
+   kvmppc_free_hpt(>arch.hpt);
+
+   err = kvmppc_allocate_hpt(, order);
+   if (err < 0)
+   goto out;
+   kvmppc_set_hpt(kvm, );
+
+out:
mutex_unlock(>lock);
return err;
 }
 
-void kvmppc_free_hpt(struct kvm_hpt_info *info)
-{
-   vfree(info->rev);
-   if (info->cma)
-   kvm_free_hpt_cma(virt_to_page(info->virt),
-1 << (info->order - PAGE_SHIFT));
-   else
-   free_pages(info->virt, info->order - PAGE_SHIFT);
-   info->virt = 0;
-   info->order = 0;
-}
-
 /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
 static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 71c5adb..957e473 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3600,12 +3600,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
r = -EFAULT;
if (get_user(htab_order, (u32 __user *)argp))
break;
-   r = kvmppc_alloc_reset_hpt(kvm, _order);
+   r = kvmppc_alloc_reset_hpt(kvm, htab_order);
if (r)
break;
-   r = -EFAULT;
-   if 

[PATCH 05/11] powerpc/kvm: Split HPT allocation from activation

2016-12-14 Thread David Gibson
Currently, kvmppc_alloc_hpt() both allocates a new hashed page table (HPT)
and sets it up as the active page table for a VM.  For the upcoming HPT
resize implementation we're going to want to allocate HPTs separately from
activating them.

So, split the allocation itself out into kvmppc_allocate_hpt() and perform
the activation with a new kvmppc_set_hpt() function.  Likewise we split
kvmppc_free_hpt(), which just frees the HPT, from kvmppc_release_hpt()
which unsets it as an active HPT, then frees it.

We also move the logic to fall back to smaller HPT sizes if the first try
fails into the single caller which used that behaviour,
kvmppc_hv_setup_htab_rma().  This introduces a slight semantic change, in
that previously if the initial attempt at CMA allocation failed, we would
fall back to attempting smaller sizes with the page allocator.  Now, we
try first CMA, then the page allocator at each size.  As far as I can tell
this change should be harmless.

To match, we make kvmppc_free_hpt() just free the actual HPT itself.  The
call to kvmppc_free_lpid() that was there, we move to the single caller.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  3 ++
 arch/powerpc/include/asm/kvm_ppc.h   |  5 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 90 
 arch/powerpc/kvm/book3s_hv.c | 18 +--
 4 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 8810327..6dc4004 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -22,6 +22,9 @@
 
 #include 
 
+/* Power architecture requires HPT is at least 256kB */
+#define PPC_MIN_HPT_ORDER  18
+
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu 
*vcpu)
 {
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 3db6be9..41575b8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -155,9 +155,10 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
 extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
-extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
+extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
 extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
-extern void kvmppc_free_hpt(struct kvm *kvm);
+extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index fe88132..68bb228 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -40,76 +40,70 @@
 
 #include "trace_hv.h"
 
-/* Power architecture requires HPT is at least 256kB */
-#define PPC_MIN_HPT_ORDER  18
-
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
 static void kvmppc_rmap_reset(struct kvm *kvm);
 
-long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
+int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
 {
unsigned long hpt = 0;
-   struct revmap_entry *rev;
+   int cma = 0;
struct page *page = NULL;
-   long order = KVM_DEFAULT_HPT_ORDER;
-
-   if (htab_orderp) {
-   order = *htab_orderp;
-   if (order < PPC_MIN_HPT_ORDER)
-   order = PPC_MIN_HPT_ORDER;
-   }
+   struct revmap_entry *rev;
+   unsigned long npte;
 
-   kvm->arch.hpt.cma = 0;
+   hpt = 0;
+   cma = 0;
page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
-   kvm->arch.hpt.cma = 1;
+   cma = 1;
}
 
-   /* Lastly try successively smaller sizes from the page allocator */
-   /* Only do this if userspace didn't specify a size via ioctl */
-   while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
-   hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-  __GFP_NOWARN, order - PAGE_SHIFT);
-   if (!hpt)
-   --order;
-   }
+   if (!hpt)
+   hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+  |__GFP_NOWARN, order - PAGE_SHIFT);
 
if 

[PATCH 07/11] powerpc/kvm: Create kvmppc_unmap_hpte_helper()

2016-12-14 Thread David Gibson
The kvm_unmap_rmapp() function, called from certain MMU notifiers, is used
to force all guest mappings of a particular host page to be set ABSENT, and
removed from the reverse mappings.

For HPT resizing, we will have some cases where we want to set just a
single guest HPTE ABSENT and remove its reverse mappings.  To prepare with
this, we split out the logic from kvm_unmap_rmapp() to evict a single HPTE,
moving it to a new helper function.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 77 +
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8e5ac2f..cd145eb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -740,13 +740,53 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long 
hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
 }
 
+/* Must be called with both HPTE and rmap locked */
+static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
+ unsigned long *rmapp, unsigned long gfn)
+{
+   __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+   struct revmap_entry *rev = kvm->arch.hpt.rev;
+   unsigned long j, h;
+   unsigned long ptel, psize, rcbits;
+
+   j = rev[i].forw;
+   if (j == i) {
+   /* chain is now empty */
+   *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+   } else {
+   /* remove i from chain */
+   h = rev[i].back;
+   rev[h].forw = j;
+   rev[j].back = h;
+   rev[i].forw = rev[i].back = i;
+   *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+   }
+
+   /* Now check and modify the HPTE */
+   ptel = rev[i].guest_rpte;
+   psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
+   if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+   hpte_rpn(ptel, psize) == gfn) {
+   hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+   kvmppc_invalidate_hpte(kvm, hptep, i);
+   hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+   /* Harvest R and C */
+   rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
+   *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+   if (rcbits & HPTE_R_C)
+   kvmppc_update_rmap_change(rmapp, psize);
+   if (rcbits & ~rev[i].guest_rpte) {
+   rev[i].guest_rpte = ptel | rcbits;
+   note_hpte_modification(kvm, [i]);
+   }
+   }   
+}
+
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
   unsigned long gfn)
 {
-   struct revmap_entry *rev = kvm->arch.hpt.rev;
-   unsigned long h, i, j;
+   unsigned long i;
__be64 *hptep;
-   unsigned long ptel, psize, rcbits;
 
for (;;) {
lock_rmap(rmapp);
@@ -769,37 +809,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long 
*rmapp,
cpu_relax();
continue;
}
-   j = rev[i].forw;
-   if (j == i) {
-   /* chain is now empty */
-   *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
-   } else {
-   /* remove i from chain */
-   h = rev[i].back;
-   rev[h].forw = j;
-   rev[j].back = h;
-   rev[i].forw = rev[i].back = i;
-   *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
-   }
 
-   /* Now check and modify the HPTE */
-   ptel = rev[i].guest_rpte;
-   psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
-   if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
-   hpte_rpn(ptel, psize) == gfn) {
-   hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
-   kvmppc_invalidate_hpte(kvm, hptep, i);
-   hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
-   /* Harvest R and C */
-   rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
-   *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
-   if (rcbits & HPTE_R_C)
-   kvmppc_update_rmap_change(rmapp, psize);
-   if (rcbits & ~rev[i].guest_rpte) {
-   rev[i].guest_rpte = ptel | rcbits;
-   note_hpte_modification(kvm, [i]);
-   }
-   }
+   kvmppc_unmap_hpte(kvm, i, rmapp, gfn);
unlock_rmap(rmapp);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
-- 
2.9.3



[PATCH 08/11] powerpc/kvm: KVM-HV HPT resizing stub implementation

2016-12-14 Thread David Gibson
This patch adds a stub (always failing) implementation of the ioctl()s
for the HPT resizing PAPR extension.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_ppc.h  |  4 
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 16 
 arch/powerpc/kvm/book3s_hv.c| 22 ++
 3 files changed, 42 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 3b837bc..f8eaed0 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -215,6 +215,10 @@ extern void kvmppc_bookehv_exit(void);
 extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+   struct kvm_ppc_resize_hpt *rhpt);
+extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+  struct kvm_ppc_resize_hpt *rhpt);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index cd145eb..ac0f18b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1160,6 +1160,22 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, 
unsigned long gpa,
 }
 
 /*
+ * HPT resizing
+ */
+
+long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+struct kvm_ppc_resize_hpt *rhpt)
+{
+   return -EIO;
+}
+
+long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+   struct kvm_ppc_resize_hpt *rhpt)
+{
+   return -EIO;
+}
+
+/*
  * Functions for reading and writing the hash table via reads and
  * writes on a file descriptor.
  *
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 957e473..d022322 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3617,6 +3617,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
break;
}
 
+   case KVM_PPC_RESIZE_HPT_PREPARE: {
+   struct kvm_ppc_resize_hpt rhpt;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(rhpt)))
+   break;
+
+   r = kvm_vm_ioctl_resize_hpt_prepare(kvm, );
+   break;
+   }
+
+   case KVM_PPC_RESIZE_HPT_COMMIT: {
+   struct kvm_ppc_resize_hpt rhpt;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(rhpt)))
+   break;
+
+   r = kvm_vm_ioctl_resize_hpt_commit(kvm, );
+   break;
+   }
+
default:
r = -ENOTTY;
}
-- 
2.9.3



[PATCH 05/11] powerpc/kvm: Split HPT allocation from activation

2016-12-14 Thread David Gibson
Currently, kvmppc_alloc_hpt() both allocates a new hashed page table (HPT)
and sets it up as the active page table for a VM.  For the upcoming HPT
resize implementation we're going to want to allocate HPTs separately from
activating them.

So, split the allocation itself out into kvmppc_allocate_hpt() and perform
the activation with a new kvmppc_set_hpt() function.  Likewise we split
kvmppc_free_hpt(), which just frees the HPT, from kvmppc_release_hpt()
which unsets it as an active HPT, then frees it.

We also move the logic to fall back to smaller HPT sizes if the first try
fails into the single caller which used that behaviour,
kvmppc_hv_setup_htab_rma().  This introduces a slight semantic change, in
that previously if the initial attempt at CMA allocation failed, we would
fall back to attempting smaller sizes with the page allocator.  Now, we
try first CMA, then the page allocator at each size.  As far as I can tell
this change should be harmless.

To match, we make kvmppc_free_hpt() just free the actual HPT itself.  The
call to kvmppc_free_lpid() that was there, we move to the single caller.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  3 ++
 arch/powerpc/include/asm/kvm_ppc.h   |  5 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 90 
 arch/powerpc/kvm/book3s_hv.c | 18 +--
 4 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 8810327..6dc4004 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -22,6 +22,9 @@
 
 #include 
 
+/* Power architecture requires HPT is at least 256kB */
+#define PPC_MIN_HPT_ORDER  18
+
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu 
*vcpu)
 {
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 3db6be9..41575b8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -155,9 +155,10 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
 extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
-extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
+extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
 extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
-extern void kvmppc_free_hpt(struct kvm *kvm);
+extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index fe88132..68bb228 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -40,76 +40,70 @@
 
 #include "trace_hv.h"
 
-/* Power architecture requires HPT is at least 256kB */
-#define PPC_MIN_HPT_ORDER  18
-
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
 static void kvmppc_rmap_reset(struct kvm *kvm);
 
-long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
+int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
 {
unsigned long hpt = 0;
-   struct revmap_entry *rev;
+   int cma = 0;
struct page *page = NULL;
-   long order = KVM_DEFAULT_HPT_ORDER;
-
-   if (htab_orderp) {
-   order = *htab_orderp;
-   if (order < PPC_MIN_HPT_ORDER)
-   order = PPC_MIN_HPT_ORDER;
-   }
+   struct revmap_entry *rev;
+   unsigned long npte;
 
-   kvm->arch.hpt.cma = 0;
+   hpt = 0;
+   cma = 0;
page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
-   kvm->arch.hpt.cma = 1;
+   cma = 1;
}
 
-   /* Lastly try successively smaller sizes from the page allocator */
-   /* Only do this if userspace didn't specify a size via ioctl */
-   while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
-   hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-  __GFP_NOWARN, order - PAGE_SHIFT);
-   if (!hpt)
-   --order;
-   }
+   if (!hpt)
+   hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+  |__GFP_NOWARN, order - PAGE_SHIFT);
 
if (!hpt)
return 

[PATCH 07/11] powerpc/kvm: Create kvmppc_unmap_hpte_helper()

2016-12-14 Thread David Gibson
The kvm_unmap_rmapp() function, called from certain MMU notifiers, is used
to force all guest mappings of a particular host page to be set ABSENT, and
removed from the reverse mappings.

For HPT resizing, we will have some cases where we want to set just a
single guest HPTE ABSENT and remove its reverse mappings.  To prepare with
this, we split out the logic from kvm_unmap_rmapp() to evict a single HPTE,
moving it to a new helper function.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 77 +
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8e5ac2f..cd145eb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -740,13 +740,53 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long 
hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
 }
 
+/* Must be called with both HPTE and rmap locked */
+static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
+ unsigned long *rmapp, unsigned long gfn)
+{
+   __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+   struct revmap_entry *rev = kvm->arch.hpt.rev;
+   unsigned long j, h;
+   unsigned long ptel, psize, rcbits;
+
+   j = rev[i].forw;
+   if (j == i) {
+   /* chain is now empty */
+   *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+   } else {
+   /* remove i from chain */
+   h = rev[i].back;
+   rev[h].forw = j;
+   rev[j].back = h;
+   rev[i].forw = rev[i].back = i;
+   *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+   }
+
+   /* Now check and modify the HPTE */
+   ptel = rev[i].guest_rpte;
+   psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
+   if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+   hpte_rpn(ptel, psize) == gfn) {
+   hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+   kvmppc_invalidate_hpte(kvm, hptep, i);
+   hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+   /* Harvest R and C */
+   rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
+   *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+   if (rcbits & HPTE_R_C)
+   kvmppc_update_rmap_change(rmapp, psize);
+   if (rcbits & ~rev[i].guest_rpte) {
+   rev[i].guest_rpte = ptel | rcbits;
+   note_hpte_modification(kvm, [i]);
+   }
+   }   
+}
+
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
   unsigned long gfn)
 {
-   struct revmap_entry *rev = kvm->arch.hpt.rev;
-   unsigned long h, i, j;
+   unsigned long i;
__be64 *hptep;
-   unsigned long ptel, psize, rcbits;
 
for (;;) {
lock_rmap(rmapp);
@@ -769,37 +809,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long 
*rmapp,
cpu_relax();
continue;
}
-   j = rev[i].forw;
-   if (j == i) {
-   /* chain is now empty */
-   *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
-   } else {
-   /* remove i from chain */
-   h = rev[i].back;
-   rev[h].forw = j;
-   rev[j].back = h;
-   rev[i].forw = rev[i].back = i;
-   *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
-   }
 
-   /* Now check and modify the HPTE */
-   ptel = rev[i].guest_rpte;
-   psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
-   if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
-   hpte_rpn(ptel, psize) == gfn) {
-   hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
-   kvmppc_invalidate_hpte(kvm, hptep, i);
-   hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
-   /* Harvest R and C */
-   rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
-   *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
-   if (rcbits & HPTE_R_C)
-   kvmppc_update_rmap_change(rmapp, psize);
-   if (rcbits & ~rev[i].guest_rpte) {
-   rev[i].guest_rpte = ptel | rcbits;
-   note_hpte_modification(kvm, [i]);
-   }
-   }
+   kvmppc_unmap_hpte(kvm, i, rmapp, gfn);
unlock_rmap(rmapp);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
-- 
2.9.3



[PATCH 08/11] powerpc/kvm: KVM-HV HPT resizing stub implementation

2016-12-14 Thread David Gibson
This patch adds a stub (always failing) implementation of the ioctl()s
for the HPT resizing PAPR extension.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_ppc.h  |  4 
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 16 
 arch/powerpc/kvm/book3s_hv.c| 22 ++
 3 files changed, 42 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 3b837bc..f8eaed0 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -215,6 +215,10 @@ extern void kvmppc_bookehv_exit(void);
 extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+   struct kvm_ppc_resize_hpt *rhpt);
+extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+  struct kvm_ppc_resize_hpt *rhpt);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index cd145eb..ac0f18b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1160,6 +1160,22 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, 
unsigned long gpa,
 }
 
 /*
+ * HPT resizing
+ */
+
+long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+struct kvm_ppc_resize_hpt *rhpt)
+{
+   return -EIO;
+}
+
+long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+   struct kvm_ppc_resize_hpt *rhpt)
+{
+   return -EIO;
+}
+
+/*
  * Functions for reading and writing the hash table via reads and
  * writes on a file descriptor.
  *
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 957e473..d022322 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3617,6 +3617,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
break;
}
 
+   case KVM_PPC_RESIZE_HPT_PREPARE: {
+   struct kvm_ppc_resize_hpt rhpt;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(rhpt)))
+   break;
+
+   r = kvm_vm_ioctl_resize_hpt_prepare(kvm, );
+   break;
+   }
+
+   case KVM_PPC_RESIZE_HPT_COMMIT: {
+   struct kvm_ppc_resize_hpt rhpt;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(rhpt)))
+   break;
+
+   r = kvm_vm_ioctl_resize_hpt_commit(kvm, );
+   break;
+   }
+
default:
r = -ENOTTY;
}
-- 
2.9.3



[PATCH 10/11] powerpc/kvm: KVM-HV HPT resizing implementation

2016-12-14 Thread David Gibson
This adds the "guts" of the implementation for the HPT resizing PAPR
extension.  It has the code to allocate and clear a new HPT, rehash an
existing HPT's entries into it, and accomplish the switchover for a
KVM guest from the old HPT to the new one.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 188 +++-
 1 file changed, 187 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index a2ac749..2a86c07 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -66,6 +66,10 @@ struct kvm_resize_hpt {
/* These fields protected by kvm->lock */
int error;
bool prepare_done;
+
+   /* Private to the work thread, until prepare_done is true,
+* then protected by kvm->resize_hpt_sem */
+   struct kvm_hpt_info hpt;
 };
 
 static void kvmppc_rmap_reset(struct kvm *kvm);
@@ -1189,21 +1193,203 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void 
*va, unsigned long gpa,
  */
 static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
 {
+   int rc;
+
+   rc = kvmppc_allocate_hpt(>hpt, resize->order);
+   if (rc < 0)
+   return rc;
+
+   resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
+resize->hpt.virt);
+
return 0;
 }
 
+static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+   unsigned long idx)
+{
+   struct kvm *kvm = resize->kvm;
+   struct kvm_hpt_info *old = >arch.hpt;
+   struct kvm_hpt_info *new = >hpt;
+   unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
+   unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
+   __be64 *hptep, *new_hptep;
+   unsigned long vpte, rpte, guest_rpte;
+   int ret;
+   struct revmap_entry *rev;
+   unsigned long apsize, psize, avpn, pteg, hash;
+   unsigned long new_idx, new_pteg, replace_vpte;
+
+   hptep = (__be64 *)(old->virt + (idx << 4));
+
+   /* Guest is stopped, so new HPTEs can't be added or faulted
+* in, only unmapped or altered by host actions.  So, it's
+* safe to check this before we take the HPTE lock */
+   vpte = be64_to_cpu(hptep[0]);
+   if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+   return 0; /* nothing to do */
+
+   while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+   cpu_relax();
+
+   vpte = be64_to_cpu(hptep[0]);
+
+   ret = 0;
+   if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+   /* Nothing to do */
+   goto out;
+
+   /* Unmap */
+   rev = >rev[idx];
+   guest_rpte = rev->guest_rpte;
+
+   ret = -EIO;
+   apsize = hpte_page_size(vpte, guest_rpte);
+   if (!apsize)
+   goto out;
+
+   if (vpte & HPTE_V_VALID) {
+   unsigned long gfn = hpte_rpn(guest_rpte, apsize);
+   int srcu_idx = srcu_read_lock(>srcu);
+   struct kvm_memory_slot *memslot =
+   __gfn_to_memslot(kvm_memslots(kvm), gfn);
+
+   if (memslot) {
+   unsigned long *rmapp;
+   rmapp = >arch.rmap[gfn - memslot->base_gfn];
+
+   lock_rmap(rmapp);
+   kvmppc_unmap_hpte(kvm, idx, rmapp, gfn);
+   unlock_rmap(rmapp);
+   }
+
+   srcu_read_unlock(>srcu, srcu_idx);
+   }
+
+   /* Reload PTE after unmap */
+   vpte = be64_to_cpu(hptep[0]);
+
+   BUG_ON(vpte & HPTE_V_VALID);
+   BUG_ON(!(vpte & HPTE_V_ABSENT));
+
+   ret = 0;
+   if (!(vpte & HPTE_V_BOLTED))
+   goto out;
+
+   rpte = be64_to_cpu(hptep[1]);
+   psize = hpte_base_page_size(vpte, rpte);
+   avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
+   pteg = idx / HPTES_PER_GROUP;
+   if (vpte & HPTE_V_SECONDARY)
+   pteg = ~pteg;
+
+   if (!(vpte & HPTE_V_1TB_SEG)) {
+   unsigned long offset, vsid;
+
+   /* We only have 28 - 23 bits of offset in avpn */
+   offset = (avpn & 0x1f) << 23;
+   vsid = avpn >> 5;
+   /* We can find more bits from the pteg value */
+   if (psize < (1ULL << 23))
+   offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
+
+   hash = vsid ^ (offset / psize);
+   } else {
+   unsigned long offset, vsid;
+
+   /* We only have 40 - 23 bits of seg_off in avpn */
+   offset = (avpn & 0x1) << 23;
+   vsid = avpn >> 17;
+   if (psize < (1ULL << 23))
+   offset |= ((vsid ^ (vsid << 25) ^ pteg) & 
old_hash_mask) * psize;
+
+   hash = vsid ^ (vsid << 25) ^ (offset / psize);
+   }
+
+  

[PATCH 00/11] KVM implementation of PAPR HPT resizing extension

2016-12-14 Thread David Gibson
Here is the KVM implementation for the proposed PAPR extension which
allows the runtime resizing of a PAPR guest's Hashed Page Table (HPT).

Using this requires a guest kernel with support for the extension.
Patches for guest side support in Linux were posted earlier:
  https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-December/152164.html

It also requires userspace (i.e. qemu) to intercept the HPT resizing
hypercalls and invoke the KVM ioctl()s to implement them.  This is
done instead of having KVM direclty intercept the hypercalls, so that
userspace can, if useful, impose additional restrictions on resizes:
for example it could refuse them entirely if policy for the VM
precludes resizing, or it could limit the size of HPT the guest can
request to meet resource limits.

Patches to implement the userspace part of HPT resizing are proposed
for qemu-2.9, and can be found at:
  https://github.com/dgibson/qemu/tree/upstream/hpt-resize

I'm posting these now, in the hopes that both these and the
corresponding guest side patches can be staged and merged for the 4.11
window.

David Gibson (11):
  powerpc/kvm: Reserve capabilities and ioctls for HPT resizing
  powerpc/kvm: Rename kvm_alloc_hpt() for clarity
  powerpc/kvm: Gather HPT related variables into sub-structure
  powerpc/kvm: Don't store values derivable from HPT order
  powerpc/kvm: Split HPT allocation from activation
  powerpc/kvm: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size
  powerpc/kvm: Create kvmppc_unmap_hpte_helper()
  powerpc/kvm: KVM-HV HPT resizing stub implementation
  powerpc/kvm: Outline of KVM-HV HPT resizing implementation
  powerpc/kvm: KVM-HV HPT resizing implementation
  powerpc/kvm: Advertise availablity of HPT resizing on KVM HV

 arch/powerpc/include/asm/kvm_book3s_64.h |  15 +
 arch/powerpc/include/asm/kvm_host.h  |  17 +-
 arch/powerpc/include/asm/kvm_ppc.h   |  15 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 629 +--
 arch/powerpc/kvm/book3s_hv.c |  50 ++-
 arch/powerpc/kvm/book3s_hv_builtin.c |   8 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |  62 +--
 arch/powerpc/kvm/powerpc.c   |   6 +
 include/uapi/linux/kvm.h |  10 +
 9 files changed, 637 insertions(+), 175 deletions(-)

-- 
2.9.3



[PATCH 10/11] powerpc/kvm: KVM-HV HPT resizing implementation

2016-12-14 Thread David Gibson
This adds the "guts" of the implementation for the HPT resizing PAPR
extension.  It has the code to allocate and clear a new HPT, rehash an
existing HPT's entries into it, and accomplish the switchover for a
KVM guest from the old HPT to the new one.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 188 +++-
 1 file changed, 187 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index a2ac749..2a86c07 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -66,6 +66,10 @@ struct kvm_resize_hpt {
/* These fields protected by kvm->lock */
int error;
bool prepare_done;
+
+   /* Private to the work thread, until prepare_done is true,
+* then protected by kvm->resize_hpt_sem */
+   struct kvm_hpt_info hpt;
 };
 
 static void kvmppc_rmap_reset(struct kvm *kvm);
@@ -1189,21 +1193,203 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void 
*va, unsigned long gpa,
  */
 static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
 {
+   int rc;
+
+   rc = kvmppc_allocate_hpt(>hpt, resize->order);
+   if (rc < 0)
+   return rc;
+
+   resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
+resize->hpt.virt);
+
return 0;
 }
 
+static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+   unsigned long idx)
+{
+   struct kvm *kvm = resize->kvm;
+   struct kvm_hpt_info *old = >arch.hpt;
+   struct kvm_hpt_info *new = >hpt;
+   unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
+   unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
+   __be64 *hptep, *new_hptep;
+   unsigned long vpte, rpte, guest_rpte;
+   int ret;
+   struct revmap_entry *rev;
+   unsigned long apsize, psize, avpn, pteg, hash;
+   unsigned long new_idx, new_pteg, replace_vpte;
+
+   hptep = (__be64 *)(old->virt + (idx << 4));
+
+   /* Guest is stopped, so new HPTEs can't be added or faulted
+* in, only unmapped or altered by host actions.  So, it's
+* safe to check this before we take the HPTE lock */
+   vpte = be64_to_cpu(hptep[0]);
+   if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+   return 0; /* nothing to do */
+
+   while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+   cpu_relax();
+
+   vpte = be64_to_cpu(hptep[0]);
+
+   ret = 0;
+   if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+   /* Nothing to do */
+   goto out;
+
+   /* Unmap */
+   rev = >rev[idx];
+   guest_rpte = rev->guest_rpte;
+
+   ret = -EIO;
+   apsize = hpte_page_size(vpte, guest_rpte);
+   if (!apsize)
+   goto out;
+
+   if (vpte & HPTE_V_VALID) {
+   unsigned long gfn = hpte_rpn(guest_rpte, apsize);
+   int srcu_idx = srcu_read_lock(>srcu);
+   struct kvm_memory_slot *memslot =
+   __gfn_to_memslot(kvm_memslots(kvm), gfn);
+
+   if (memslot) {
+   unsigned long *rmapp;
+   rmapp = >arch.rmap[gfn - memslot->base_gfn];
+
+   lock_rmap(rmapp);
+   kvmppc_unmap_hpte(kvm, idx, rmapp, gfn);
+   unlock_rmap(rmapp);
+   }
+
+   srcu_read_unlock(>srcu, srcu_idx);
+   }
+
+   /* Reload PTE after unmap */
+   vpte = be64_to_cpu(hptep[0]);
+
+   BUG_ON(vpte & HPTE_V_VALID);
+   BUG_ON(!(vpte & HPTE_V_ABSENT));
+
+   ret = 0;
+   if (!(vpte & HPTE_V_BOLTED))
+   goto out;
+
+   rpte = be64_to_cpu(hptep[1]);
+   psize = hpte_base_page_size(vpte, rpte);
+   avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
+   pteg = idx / HPTES_PER_GROUP;
+   if (vpte & HPTE_V_SECONDARY)
+   pteg = ~pteg;
+
+   if (!(vpte & HPTE_V_1TB_SEG)) {
+   unsigned long offset, vsid;
+
+   /* We only have 28 - 23 bits of offset in avpn */
+   offset = (avpn & 0x1f) << 23;
+   vsid = avpn >> 5;
+   /* We can find more bits from the pteg value */
+   if (psize < (1ULL << 23))
+   offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
+
+   hash = vsid ^ (offset / psize);
+   } else {
+   unsigned long offset, vsid;
+
+   /* We only have 40 - 23 bits of seg_off in avpn */
+   offset = (avpn & 0x1) << 23;
+   vsid = avpn >> 17;
+   if (psize < (1ULL << 23))
+   offset |= ((vsid ^ (vsid << 25) ^ pteg) & 
old_hash_mask) * psize;
+
+   hash = vsid ^ (vsid << 25) ^ (offset / psize);
+   }
+
+   new_pteg = hash & 

[PATCH 00/11] KVM implementation of PAPR HPT resizing extension

2016-12-14 Thread David Gibson
Here is the KVM implementation for the proposed PAPR extension which
allows the runtime resizing of a PAPR guest's Hashed Page Table (HPT).

Using this requires a guest kernel with support for the extension.
Patches for guest side support in Linux were posted earlier:
  https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-December/152164.html

It also requires userspace (i.e. qemu) to intercept the HPT resizing
hypercalls and invoke the KVM ioctl()s to implement them.  This is
done instead of having KVM direclty intercept the hypercalls, so that
userspace can, if useful, impose additional restrictions on resizes:
for example it could refuse them entirely if policy for the VM
precludes resizing, or it could limit the size of HPT the guest can
request to meet resource limits.

Patches to implement the userspace part of HPT resizing are proposed
for qemu-2.9, and can be found at:
  https://github.com/dgibson/qemu/tree/upstream/hpt-resize

I'm posting these now, in the hopes that both these and the
corresponding guest side patches can be staged and merged for the 4.11
window.

David Gibson (11):
  powerpc/kvm: Reserve capabilities and ioctls for HPT resizing
  powerpc/kvm: Rename kvm_alloc_hpt() for clarity
  powerpc/kvm: Gather HPT related variables into sub-structure
  powerpc/kvm: Don't store values derivable from HPT order
  powerpc/kvm: Split HPT allocation from activation
  powerpc/kvm: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size
  powerpc/kvm: Create kvmppc_unmap_hpte_helper()
  powerpc/kvm: KVM-HV HPT resizing stub implementation
  powerpc/kvm: Outline of KVM-HV HPT resizing implementation
  powerpc/kvm: KVM-HV HPT resizing implementation
  powerpc/kvm: Advertise availablity of HPT resizing on KVM HV

 arch/powerpc/include/asm/kvm_book3s_64.h |  15 +
 arch/powerpc/include/asm/kvm_host.h  |  17 +-
 arch/powerpc/include/asm/kvm_ppc.h   |  15 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c  | 629 +--
 arch/powerpc/kvm/book3s_hv.c |  50 ++-
 arch/powerpc/kvm/book3s_hv_builtin.c |   8 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |  62 +--
 arch/powerpc/kvm/powerpc.c   |   6 +
 include/uapi/linux/kvm.h |  10 +
 9 files changed, 637 insertions(+), 175 deletions(-)

-- 
2.9.3



[PATCH 01/11] powerpc/kvm: Reserve capabilities and ioctls for HPT resizing

2016-12-14 Thread David Gibson
This adds a new powerpc-specific KVM_CAP_SPAPR_RESIZE_HPT capability to
advertise whether KVM is capable of handling the PAPR extensions for
resizing the hashed page table during guest runtime.

At present, HPT resizing is possible with KVM PR without kernel
modification, since the HPT is managed within qemu.  It's not possible yet
with KVM HV, because the HPT is managed by KVM.  At present, qemu has to
use other capabilities which (by accident) reveal whether PR or HV is in
use to know if it can advertise HPT resizing capability to the guest.

To avoid ambiguity with existing kernels, the encoding is a bit odd.
0 means "unknown" since that's what previous kernels will return
1 means "HPT resize possible if available if and only if the HPT is 
allocated in
  userspace, rather than in the kernel".  Userspace can check
  KVM_CAP_PPC_ALLOC_HTAB to determine if that's the case.  In practice
  this will give the same results as userspace's fallback check.
2 will mean "HPT resize available and implemented via ioctl()s
  KVM_PPC_RESIZE_HPT_PREPARE and KVM_PPC_RESIZE_HPT_COMMIT"

For now we always return 1, but the intention is to return 2 once HPT
resize is implemented for KVM HV.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/powerpc.c |  3 +++
 include/uapi/linux/kvm.h   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index efd1183..bb23923 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -605,6 +605,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPAPR_MULTITCE:
r = 1;
break;
+   case KVM_CAP_SPAPR_RESIZE_HPT:
+   r = 1; /* resize allowed only if HPT is outside kernel */
+   break;
 #endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cac48ed..904afe0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -685,6 +685,12 @@ struct kvm_ppc_smmu_info {
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+   __u64 flags;
+   __u32 shift;
+};
+
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -871,6 +877,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_INSTR0 130
 #define KVM_CAP_MSI_DEVID 131
 #define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1187,6 +1194,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_SET_DEVICE_ADDR  _IOW(KVMIO,  0xab, struct 
kvm_arm_device_addr)
 /* Available with KVM_CAP_PPC_RTAS */
 #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct 
kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE_IOWR(KVMIO,  0xe0, struct kvm_create_device)
-- 
2.9.3



[PATCH 11/11] powerpc/kvm: Advertise availablity of HPT resizing on KVM HV

2016-12-14 Thread David Gibson
This updates the KVM_CAP_SPAPR_RESIZE_HPT capability to advertise the
presence of in-kernel HPT resizing on KVM HV.  In fact the HPT resizing
isn't fully implemented, but this allows us to experiment with what's
there.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/powerpc.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bb23923..965d26b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -606,7 +606,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_SPAPR_RESIZE_HPT:
-   r = 1; /* resize allowed only if HPT is outside kernel */
+   if (hv_enabled)
+   r = 2; /* In-kernel resize implementation */
+   else
+   r = 1; /* outside kernel resize allowed */
break;
 #endif
case KVM_CAP_PPC_HTM:
-- 
2.9.3



[PATCH 09/11] powerpc/kvm: Outline of KVM-HV HPT resizing implementation

2016-12-14 Thread David Gibson
This adds an outline (not yet working) of an implementation for the HPT
resizing PAPR extension.  Specifically it adds the work function which will
handle preparation for the resize, and synchronization between this, the
the HPT resizing hypercalls, the guest page fault path and guest HPT update
paths.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_host.h |   3 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 179 +++-
 arch/powerpc/kvm/book3s_hv.c|   3 +
 3 files changed, 183 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3900f63..23559c3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -248,6 +248,8 @@ struct kvm_hpt_info {
int cma;
 };
 
+struct kvm_resize_hpt;
+
 struct kvm_arch {
unsigned int lpid;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -268,6 +270,7 @@ struct kvm_arch {
cpumask_t need_tlb_flush;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
+   struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
struct mutex hpt_mutex;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ac0f18b..a2ac749 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -40,9 +40,34 @@
 
 #include "trace_hv.h"
 
+//#define DEBUG_RESIZE_HPT 1
+
+#ifdef DEBUG_RESIZE_HPT
+#define resize_hpt_debug(resize, ...)  \
+   do {\
+   printk(KERN_DEBUG "RESIZE HPT %p: ", resize);   \
+   printk(__VA_ARGS__);\
+   } while (0)
+#else
+#define resize_hpt_debug(resize, ...)  \
+   do { } while (0)
+#endif
+
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
+
+struct kvm_resize_hpt {
+   /* These fields read-only after init */
+   struct kvm *kvm;
+   struct work_struct work;
+   u32 order;
+
+   /* These fields protected by kvm->lock */
+   int error;
+   bool prepare_done;
+};
+
 static void kvmppc_rmap_reset(struct kvm *kvm);
 
 int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
@@ -1162,17 +1187,167 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void 
*va, unsigned long gpa,
 /*
  * HPT resizing
  */
+static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
+{
+   return 0;
+}
+
+static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
+{
+   return -EIO;
+}
+
+static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
+{
+}
+
+static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
+{
+   BUG_ON(kvm->arch.resize_hpt != resize);
+   kvm->arch.resize_hpt = NULL;
+   kfree(resize);
+}
+
+static void resize_hpt_prepare_work(struct work_struct *work)
+{
+   struct kvm_resize_hpt *resize = container_of(work,
+struct kvm_resize_hpt,
+work);
+   struct kvm *kvm = resize->kvm;
+   int err;
+
+   resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+resize->order);
+
+   err = resize_hpt_allocate(resize);
+
+   mutex_lock(>lock);
+
+   resize->error = err;
+   resize->prepare_done = true;
+
+   mutex_unlock(>lock);
+}
 
 long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 struct kvm_ppc_resize_hpt *rhpt)
 {
-   return -EIO;
+   unsigned long flags = rhpt->flags;
+   unsigned long shift = rhpt->shift;
+   struct kvm_resize_hpt *resize;
+   int ret;
+
+   if (flags != 0)
+   return -EINVAL;
+
+   if (shift && ((shift < 18) || (shift > 46)))
+   return -EINVAL;
+
+   mutex_lock(>lock);
+
+   resize = kvm->arch.resize_hpt;
+
+   if (resize) {
+   if (resize->order == shift) {
+   /* Suitable resize in progress */
+   if (resize->prepare_done) {
+   ret = resize->error;
+   if (ret != 0)
+   resize_hpt_release(kvm, resize);
+   } else {
+   ret = 100; /* estimated time in ms */
+   }
+
+   goto out;
+   }
+
+   /* not suitable, cancel it */
+   resize_hpt_release(kvm, resize);
+   }
+
+   ret = 0;
+   if (!shift)
+   goto out; /* 

[PATCH 01/11] powerpc/kvm: Reserve capabilities and ioctls for HPT resizing

2016-12-14 Thread David Gibson
This adds a new powerpc-specific KVM_CAP_SPAPR_RESIZE_HPT capability to
advertise whether KVM is capable of handling the PAPR extensions for
resizing the hashed page table during guest runtime.

At present, HPT resizing is possible with KVM PR without kernel
modification, since the HPT is managed within qemu.  It's not possible yet
with KVM HV, because the HPT is managed by KVM.  At present, qemu has to
use other capabilities which (by accident) reveal whether PR or HV is in
use to know if it can advertise HPT resizing capability to the guest.

To avoid ambiguity with existing kernels, the encoding is a bit odd.
0 means "unknown" since that's what previous kernels will return
1 means "HPT resize possible if available if and only if the HPT is 
allocated in
  userspace, rather than in the kernel".  Userspace can check
  KVM_CAP_PPC_ALLOC_HTAB to determine if that's the case.  In practice
  this will give the same results as userspace's fallback check.
2 will mean "HPT resize available and implemented via ioctl()s
  KVM_PPC_RESIZE_HPT_PREPARE and KVM_PPC_RESIZE_HPT_COMMIT"

For now we always return 1, but the intention is to return 2 once HPT
resize is implemented for KVM HV.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/powerpc.c |  3 +++
 include/uapi/linux/kvm.h   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index efd1183..bb23923 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -605,6 +605,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPAPR_MULTITCE:
r = 1;
break;
+   case KVM_CAP_SPAPR_RESIZE_HPT:
+   r = 1; /* resize allowed only if HPT is outside kernel */
+   break;
 #endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cac48ed..904afe0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -685,6 +685,12 @@ struct kvm_ppc_smmu_info {
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+   __u64 flags;
+   __u32 shift;
+};
+
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -871,6 +877,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_INSTR0 130
 #define KVM_CAP_MSI_DEVID 131
 #define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1187,6 +1194,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_SET_DEVICE_ADDR  _IOW(KVMIO,  0xab, struct 
kvm_arm_device_addr)
 /* Available with KVM_CAP_PPC_RTAS */
 #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct 
kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE_IOWR(KVMIO,  0xe0, struct kvm_create_device)
-- 
2.9.3



[PATCH 11/11] powerpc/kvm: Advertise availablity of HPT resizing on KVM HV

2016-12-14 Thread David Gibson
This updates the KVM_CAP_SPAPR_RESIZE_HPT capability to advertise the
presence of in-kernel HPT resizing on KVM HV.  In fact the HPT resizing
isn't fully implemented, but this allows us to experiment with what's
there.

Signed-off-by: David Gibson 
---
 arch/powerpc/kvm/powerpc.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bb23923..965d26b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -606,7 +606,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_SPAPR_RESIZE_HPT:
-   r = 1; /* resize allowed only if HPT is outside kernel */
+   if (hv_enabled)
+   r = 2; /* In-kernel resize implementation */
+   else
+   r = 1; /* outside kernel resize allowed */
break;
 #endif
case KVM_CAP_PPC_HTM:
-- 
2.9.3



[PATCH 09/11] powerpc/kvm: Outline of KVM-HV HPT resizing implementation

2016-12-14 Thread David Gibson
This adds an outline (not yet working) of an implementation for the HPT
resizing PAPR extension.  Specifically it adds the work function which will
handle preparation for the resize, and synchronization between this, the
the HPT resizing hypercalls, the guest page fault path and guest HPT update
paths.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_host.h |   3 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 179 +++-
 arch/powerpc/kvm/book3s_hv.c|   3 +
 3 files changed, 183 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3900f63..23559c3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -248,6 +248,8 @@ struct kvm_hpt_info {
int cma;
 };
 
+struct kvm_resize_hpt;
+
 struct kvm_arch {
unsigned int lpid;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -268,6 +270,7 @@ struct kvm_arch {
cpumask_t need_tlb_flush;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
+   struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
struct mutex hpt_mutex;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ac0f18b..a2ac749 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -40,9 +40,34 @@
 
 #include "trace_hv.h"
 
+//#define DEBUG_RESIZE_HPT 1
+
+#ifdef DEBUG_RESIZE_HPT
+#define resize_hpt_debug(resize, ...)  \
+   do {\
+   printk(KERN_DEBUG "RESIZE HPT %p: ", resize);   \
+   printk(__VA_ARGS__);\
+   } while (0)
+#else
+#define resize_hpt_debug(resize, ...)  \
+   do { } while (0)
+#endif
+
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
+
+struct kvm_resize_hpt {
+   /* These fields read-only after init */
+   struct kvm *kvm;
+   struct work_struct work;
+   u32 order;
+
+   /* These fields protected by kvm->lock */
+   int error;
+   bool prepare_done;
+};
+
 static void kvmppc_rmap_reset(struct kvm *kvm);
 
 int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
@@ -1162,17 +1187,167 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void 
*va, unsigned long gpa,
 /*
  * HPT resizing
  */
+static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
+{
+   return 0;
+}
+
+static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
+{
+   return -EIO;
+}
+
+static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
+{
+}
+
+static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
+{
+   BUG_ON(kvm->arch.resize_hpt != resize);
+   kvm->arch.resize_hpt = NULL;
+   kfree(resize);
+}
+
+static void resize_hpt_prepare_work(struct work_struct *work)
+{
+   struct kvm_resize_hpt *resize = container_of(work,
+struct kvm_resize_hpt,
+work);
+   struct kvm *kvm = resize->kvm;
+   int err;
+
+   resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+resize->order);
+
+   err = resize_hpt_allocate(resize);
+
+   mutex_lock(>lock);
+
+   resize->error = err;
+   resize->prepare_done = true;
+
+   mutex_unlock(>lock);
+}
 
 long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 struct kvm_ppc_resize_hpt *rhpt)
 {
-   return -EIO;
+   unsigned long flags = rhpt->flags;
+   unsigned long shift = rhpt->shift;
+   struct kvm_resize_hpt *resize;
+   int ret;
+
+   if (flags != 0)
+   return -EINVAL;
+
+   if (shift && ((shift < 18) || (shift > 46)))
+   return -EINVAL;
+
+   mutex_lock(>lock);
+
+   resize = kvm->arch.resize_hpt;
+
+   if (resize) {
+   if (resize->order == shift) {
+   /* Suitable resize in progress */
+   if (resize->prepare_done) {
+   ret = resize->error;
+   if (ret != 0)
+   resize_hpt_release(kvm, resize);
+   } else {
+   ret = 100; /* estimated time in ms */
+   }
+
+   goto out;
+   }
+
+   /* not suitable, cancel it */
+   resize_hpt_release(kvm, resize);
+   }
+
+   ret = 0;
+   if (!shift)
+   goto out; /* nothing to do */
+
+   /* 

[PATCH 2/7] blk-mq: make mq_ops a const pointer

2016-12-14 Thread Jens Axboe
We never change it, make that clear.

Signed-off-by: Jens Axboe 
---
 block/blk-mq.c | 2 +-
 include/linux/blk-mq.h | 2 +-
 include/linux/blkdev.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d79fdc11b1ee..87b7eaa1cb74 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -639,7 +639,7 @@ struct blk_mq_timeout_data {
 
 void blk_mq_rq_timed_out(struct request *req, bool reserved)
 {
-   struct blk_mq_ops *ops = req->q->mq_ops;
+   const struct blk_mq_ops *ops = req->q->mq_ops;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
 
/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4a2ab5d99ff7..afc81d77e471 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -60,7 +60,7 @@ struct blk_mq_hw_ctx {
 
 struct blk_mq_tag_set {
unsigned int*mq_map;
-   struct blk_mq_ops   *ops;
+   const struct blk_mq_ops *ops;
unsigned intnr_hw_queues;
unsigned intqueue_depth;/* max hw supported */
unsigned intreserved_tags;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 286b2a264383..7c40fb838b44 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -408,7 +408,7 @@ struct request_queue {
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
 
-   struct blk_mq_ops   *mq_ops;
+   const struct blk_mq_ops *mq_ops;
 
unsigned int*mq_map;
 
-- 
2.7.4



[PATCH 2/7] blk-mq: make mq_ops a const pointer

2016-12-14 Thread Jens Axboe
We never change it, make that clear.

Signed-off-by: Jens Axboe 
---
 block/blk-mq.c | 2 +-
 include/linux/blk-mq.h | 2 +-
 include/linux/blkdev.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d79fdc11b1ee..87b7eaa1cb74 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -639,7 +639,7 @@ struct blk_mq_timeout_data {
 
 void blk_mq_rq_timed_out(struct request *req, bool reserved)
 {
-   struct blk_mq_ops *ops = req->q->mq_ops;
+   const struct blk_mq_ops *ops = req->q->mq_ops;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
 
/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4a2ab5d99ff7..afc81d77e471 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -60,7 +60,7 @@ struct blk_mq_hw_ctx {
 
 struct blk_mq_tag_set {
unsigned int*mq_map;
-   struct blk_mq_ops   *ops;
+   const struct blk_mq_ops *ops;
unsigned intnr_hw_queues;
unsigned intqueue_depth;/* max hw supported */
unsigned intreserved_tags;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 286b2a264383..7c40fb838b44 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -408,7 +408,7 @@ struct request_queue {
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
 
-   struct blk_mq_ops   *mq_ops;
+   const struct blk_mq_ops *mq_ops;
 
unsigned int*mq_map;
 
-- 
2.7.4



Re: [PATCH V2] Coccinelle: check usleep_range() usage

2016-12-14 Thread Julia Lawall


On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:

> Documentation/timers/timers-howto.txt outlines the intended usage of
> usleep_range(), this spatch tries to locate missuse/out-of-spec cases.
>
> Signed-off-by: Nicholas Mc Guire <hof...@osadl.org>
> ---
> V2: added context mode as suggested by Julia Lawall <julia.law...@lip6.fr>
> added min
> added in the range checks as they are resonably reliable based on
> a review of all 1648 call sites of usleep_range()
>
> 1648 calls total
> 1488 pass numeric values only (90.29%)
>   27 min below 10us (1.81%)
>   40 min above 10ms (2.68%)
>  min out of spec 4.50%
>   76 preprocessor constants (4.61%)
>1 min below 10us (1.31%)
>8 min above 10ms (10.52%)
>  min out of spec 11.84%
>   85 expressions (5.15%)
> 1(0) min below 10us (1.50%)*
> 6(2) min above 10ms (7.50%)*
>  min out of spec 9.0%
> Errors:
>   23 where min==max  (1.39%)
>0 where max < min (0.00%)
>
> Total:
>   Bugs: 6.48%-10.70%*
>   Crit: 3.09%-3.15%* (min < 10, min==max, max < min)
>   Detectable by coccinelle:
>   Bugs: 74/103 (71.8%)
>   Crit: 50/52 (96.1%)
> * numbers estimated based on code review
>
> Patch is againts 4.9.0 (localversion-next is next-20161214)
>
>  scripts/coccinelle/api/bad_usleep_range.cocci | 88 
> +++
>  1 file changed, 88 insertions(+)
>  create mode 100644 scripts/coccinelle/api/bad_usleep_range.cocci
>
> diff --git a/scripts/coccinelle/api/bad_usleep_range.cocci 
> b/scripts/coccinelle/api/bad_usleep_range.cocci
> new file mode 100644
> index 000..003e9ef
> --- /dev/null
> +++ b/scripts/coccinelle/api/bad_usleep_range.cocci
> @@ -0,0 +1,88 @@
> +/// report bad/problematic usleep_range usage
> +//
> +// This is a checker for the documented intended use of usleep_range
> +// see: Documentation/timers/timers-howto.txt and
> +// Link: http://lkml.org/lkml/2016/11/29/54 for some notes on
> +//   when mdelay might not be a suitable replacement
> +//
> +// Limitations:
> +//  * The numeric limits are only checked when numeric constants are in
> +//use (as of 4.9.0 thats 90.29% of the calls) no constant folding
> +//is done - so this can miss some out-of-range cases - but in 4.9.0
> +//it was catching 74 of the 103 bad cases (71.8%) and 50 of 52
> +//(96.1%) of the critical cases (min < 10 and min==max - there
> +//  * There may be RT use-cases where both min < 10 and min==max)
> +//justified (e.g. high-throughput drivers on a shielded core)
> +//
> +// 1) warn if min == max
> +//
> +//  The problem is that usleep_range is calculating the delay by
> +//  exp = ktime_add_us(ktime_get(), min)
> +//  delta = (u64)(max - min) * NSEC_PER_USEC
> +//  so delta is set to 0 if min==max
> +//  and then calls
> +//  schedule_hrtimeout_range(exp, 0,...)
> +//  effectively this means that the clock subsystem has no room to
> +//  optimize. usleep_range() is in non-atomic context so a 0 range
> +//  makes very little sense as the task can be preempted anyway so
> +//  there is no guarantee that the 0 range would be adding much
> +//  precision - it just removes optimization potential, so it probably
> +//  never really makes sense.
> +//
> +// 2) warn if min < 10 or min > 20ms
> +//
> +//  it makes little sense to use a non-atomic call for very short
> +//  delays because the scheduling jitter will most likely exceed
> +//  this limit - udelay() makes more sense in that case. For very
> +//  large delays using hrtimers is useless as preemption becomes
> +//  quite likely resulting in high inaccuracy anyway - so use
> +//  jiffies based msleep and don't burden the hrtimer subsystem.
> +//
> +// 3) warn if max < min
> +//
> +//  Joe Perches <j...@perches.com> added a check for this case
> +//  that is definitely wrong.
> +//
> +// Confidence: Moderate
> +// Copyright: (C) 2016 Nicholas Mc Guire, OSADL.  GPLv2.
> +// Comments:
> +// Options: --no-includes --include-headers
> +
> +virtual org
> +virtual report
> +virtual context
> +
> +@nullrangectx depends on context@
> +expression E1,E2;
> +position p;
> +@@
> +
> +* usleep_range@p(E1,E2)

This is going to give a context warning on every call to usleep_range.
Why not E1,E1?

> +
> +
> +@nullrange@
> +expression E1,E2;
> +position p;
> +@@
> +
> +  usleep_range@p(E1,E2)
> +
> +@script:python depends on !context@
> +p << nullrange.p;
> +min << nullrange.E1;
> +max << nullrange.E2;
> +@@
> +
> +if(min == max):
> +   msg = "WARNING: usleep_range min == max (%s) - consider delta " % (min)
> +   coccilib.repo

Re: [PATCH V2] Coccinelle: check usleep_range() usage

2016-12-14 Thread Julia Lawall


On Thu, 15 Dec 2016, Nicholas Mc Guire wrote:

> Documentation/timers/timers-howto.txt outlines the intended usage of
> usleep_range(), this spatch tries to locate missuse/out-of-spec cases.
>
> Signed-off-by: Nicholas Mc Guire 
> ---
> V2: added context mode as suggested by Julia Lawall 
> added min
> added in the range checks as they are resonably reliable based on
> a review of all 1648 call sites of usleep_range()
>
> 1648 calls total
> 1488 pass numeric values only (90.29%)
>   27 min below 10us (1.81%)
>   40 min above 10ms (2.68%)
>  min out of spec 4.50%
>   76 preprocessor constants (4.61%)
>1 min below 10us (1.31%)
>8 min above 10ms (10.52%)
>  min out of spec 11.84%
>   85 expressions (5.15%)
> 1(0) min below 10us (1.50%)*
> 6(2) min above 10ms (7.50%)*
>  min out of spec 9.0%
> Errors:
>   23 where min==max  (1.39%)
>0 where max < min (0.00%)
>
> Total:
>   Bugs: 6.48%-10.70%*
>   Crit: 3.09%-3.15%* (min < 10, min==max, max < min)
>   Detectable by coccinelle:
>   Bugs: 74/103 (71.8%)
>   Crit: 50/52 (96.1%)
> * numbers estimated based on code review
>
> Patch is againts 4.9.0 (localversion-next is next-20161214)
>
>  scripts/coccinelle/api/bad_usleep_range.cocci | 88 
> +++
>  1 file changed, 88 insertions(+)
>  create mode 100644 scripts/coccinelle/api/bad_usleep_range.cocci
>
> diff --git a/scripts/coccinelle/api/bad_usleep_range.cocci 
> b/scripts/coccinelle/api/bad_usleep_range.cocci
> new file mode 100644
> index 000..003e9ef
> --- /dev/null
> +++ b/scripts/coccinelle/api/bad_usleep_range.cocci
> @@ -0,0 +1,88 @@
> +/// report bad/problematic usleep_range usage
> +//
> +// This is a checker for the documented intended use of usleep_range
> +// see: Documentation/timers/timers-howto.txt and
> +// Link: http://lkml.org/lkml/2016/11/29/54 for some notes on
> +//   when mdelay might not be a suitable replacement
> +//
> +// Limitations:
> +//  * The numeric limits are only checked when numeric constants are in
> +//use (as of 4.9.0 thats 90.29% of the calls) no constant folding
> +//is done - so this can miss some out-of-range cases - but in 4.9.0
> +//it was catching 74 of the 103 bad cases (71.8%) and 50 of 52
> +//(96.1%) of the critical cases (min < 10 and min==max - there
> +//  * There may be RT use-cases where both min < 10 and min==max)
> +//justified (e.g. high-throughput drivers on a shielded core)
> +//
> +// 1) warn if min == max
> +//
> +//  The problem is that usleep_range is calculating the delay by
> +//  exp = ktime_add_us(ktime_get(), min)
> +//  delta = (u64)(max - min) * NSEC_PER_USEC
> +//  so delta is set to 0 if min==max
> +//  and then calls
> +//  schedule_hrtimeout_range(exp, 0,...)
> +//  effectively this means that the clock subsystem has no room to
> +//  optimize. usleep_range() is in non-atomic context so a 0 range
> +//  makes very little sense as the task can be preempted anyway so
> +//  there is no guarantee that the 0 range would be adding much
> +//  precision - it just removes optimization potential, so it probably
> +//  never really makes sense.
> +//
> +// 2) warn if min < 10 or min > 20ms
> +//
> +//  it makes little sense to use a non-atomic call for very short
> +//  delays because the scheduling jitter will most likely exceed
> +//  this limit - udelay() makes more sense in that case. For very
> +//  large delays using hrtimers is useless as preemption becomes
> +//  quite likely resulting in high inaccuracy anyway - so use
> +//  jiffies based msleep and don't burden the hrtimer subsystem.
> +//
> +// 3) warn if max < min
> +//
> +//  Joe Perches  added a check for this case
> +//  that is definitely wrong.
> +//
> +// Confidence: Moderate
> +// Copyright: (C) 2016 Nicholas Mc Guire, OSADL.  GPLv2.
> +// Comments:
> +// Options: --no-includes --include-headers
> +
> +virtual org
> +virtual report
> +virtual context
> +
> +@nullrangectx depends on context@
> +expression E1,E2;
> +position p;
> +@@
> +
> +* usleep_range@p(E1,E2)

This is going to give a context warning on every call to usleep_range.
Why not E1,E1?

> +
> +
> +@nullrange@
> +expression E1,E2;
> +position p;
> +@@
> +
> +  usleep_range@p(E1,E2)
> +
> +@script:python depends on !context@
> +p << nullrange.p;
> +min << nullrange.E1;
> +max << nullrange.E2;
> +@@
> +
> +if(min == max):
> +   msg = "WARNING: usleep_range min == max (%s) - consider delta " % (min)
> +   coccilib.report.print_report(p[0], msg)
> +if str.isdigit(min):

I guess this checks if

Re: [GIT PULL] xfs: updates for 4.10-rc1

2016-12-14 Thread Linus Torvalds
On Wed, Dec 14, 2016 at 8:16 PM, Dave Chinner  wrote:
>
> Can you please pull the XFS update from the tag below?

Pulled, going through my usual build stuff now.

> In other news you need to know about, this is likely the last pull
> request you'll see from me for some time. I'm about to go on
> extended leave at xmas time and won't be back until July next year.
> While I'm gone, Darrick is going to be taking over the job  [...]

.. and thanks for the heads-up.

I will try to act like I remembered this email when Darrick sends me a
pull request.

[ More likely, I'll instead be momentarily confused, search my email
archives, find this and go "Ahh.. " ]

Linus


Re: [GIT PULL] xfs: updates for 4.10-rc1

2016-12-14 Thread Linus Torvalds
On Wed, Dec 14, 2016 at 8:16 PM, Dave Chinner  wrote:
>
> Can you please pull the XFS update from the tag below?

Pulled, going through my usual build stuff now.

> In other news you need to know about, this is likely the last pull
> request you'll see from me for some time. I'm about to go on
> extended leave at xmas time and won't be back until July next year.
> While I'm gone, Darrick is going to be taking over the job  [...]

.. and thanks for the heads-up.

I will try to act like I remembered this email when Darrick sends me a
pull request.

[ More likely, I'll instead be momentarily confused, search my email
archives, find this and go "Ahh.. " ]

Linus


Re: [PATCH 01/37] ARM: dts: imx6dl-aristainetos2: Correct license text

2016-12-14 Thread Heiko Schocher

Hello Alexandre,

Am 15.12.2016 um 00:57 schrieb Alexandre Belloni:

The license test has been mangled at some point then copy pasted across
multiple files. Restore it to what it should be.
Note that this is not intended as a license change.

Cc: Dong Aisheng 
Cc: Geert Uytterhoeven 
Cc: Heiko Schocher 
Cc: Rafał Miłecki 
Cc: Ulf Hansson 
Signed-off-by: Alexandre Belloni 
---
  arch/arm/boot/dts/imx6dl-aristainetos2_4.dts | 10 +-
  arch/arm/boot/dts/imx6dl-aristainetos2_7.dts | 10 +-
  arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi | 10 +-
  3 files changed, 15 insertions(+), 15 deletions(-)


Thanks!

Reviewed-by: Heiko Schocher 

Hmm.. BTW ... is this copy and paste not boring? Should we not
use "SPDX-License-Identifier" [1] like for example in U-Boot ?

bye,
Heiko
[1] https://spdx.org/licenses/



diff --git a/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts 
b/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts
index bb92f309c191..0677625463d6 100644
--- a/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts
+++ b/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts
@@ -12,17 +12,17 @@
   * modify it under the terms of the GNU General Public License
   * version 2 as published by the Free Software Foundation.
   *
- * This file is distributed in the hope that it will be useful
+ * This file is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
- * Or, alternatively
+ * Or, alternatively,
   *
   *  b) Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
   * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use
+ * restriction, including without limitation the rights to use,
   * copy, modify, merge, publish, distribute, sublicense, and/or
   * sell copies of the Software, and to permit persons to whom the
   * Software is furnished to do so, subject to the following
@@ -31,11 +31,11 @@
   * The above copyright notice and this permission notice shall be
   * included in all copies or substantial portions of the Software.
   *
- * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   * OTHER DEALINGS IN THE SOFTWARE.
diff --git a/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts 
b/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts
index 3d5ad2cc7e22..805b1318b7f7 100644
--- a/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts
+++ b/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts
@@ -12,17 +12,17 @@
   * modify it under the terms of the GNU General Public License
   * version 2 as published by the Free Software Foundation.
   *
- * This file is distributed in the hope that it will be useful
+ * This file is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
- * Or, alternatively
+ * Or, alternatively,
   *
   *  b) Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
   * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use
+ * restriction, including without limitation the rights to use,
   * copy, modify, merge, publish, distribute, sublicense, and/or
   * sell copies of the Software, and to permit persons to whom the
   * Software is furnished to do so, subject to the following
@@ -31,11 +31,11 @@
   * The above copyright notice and this permission notice shall be
   * included in all copies or substantial portions of the Software.
   *
- * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   * NONINFRINGEMENT. IN 

Re: [PATCH 01/37] ARM: dts: imx6dl-aristainetos2: Correct license text

2016-12-14 Thread Heiko Schocher

Hello Alexandre,

Am 15.12.2016 um 00:57 schrieb Alexandre Belloni:

The license test has been mangled at some point then copy pasted across
multiple files. Restore it to what it should be.
Note that this is not intended as a license change.

Cc: Dong Aisheng 
Cc: Geert Uytterhoeven 
Cc: Heiko Schocher 
Cc: Rafał Miłecki 
Cc: Ulf Hansson 
Signed-off-by: Alexandre Belloni 
---
  arch/arm/boot/dts/imx6dl-aristainetos2_4.dts | 10 +-
  arch/arm/boot/dts/imx6dl-aristainetos2_7.dts | 10 +-
  arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi | 10 +-
  3 files changed, 15 insertions(+), 15 deletions(-)


Thanks!

Reviewed-by: Heiko Schocher 

Hmm.. BTW ... is this copy and paste not boring? Should we not
use "SPDX-License-Identifier" [1] like for example in U-Boot ?

bye,
Heiko
[1] https://spdx.org/licenses/



diff --git a/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts 
b/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts
index bb92f309c191..0677625463d6 100644
--- a/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts
+++ b/arch/arm/boot/dts/imx6dl-aristainetos2_4.dts
@@ -12,17 +12,17 @@
   * modify it under the terms of the GNU General Public License
   * version 2 as published by the Free Software Foundation.
   *
- * This file is distributed in the hope that it will be useful
+ * This file is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
- * Or, alternatively
+ * Or, alternatively,
   *
   *  b) Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
   * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use
+ * restriction, including without limitation the rights to use,
   * copy, modify, merge, publish, distribute, sublicense, and/or
   * sell copies of the Software, and to permit persons to whom the
   * Software is furnished to do so, subject to the following
@@ -31,11 +31,11 @@
   * The above copyright notice and this permission notice shall be
   * included in all copies or substantial portions of the Software.
   *
- * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   * OTHER DEALINGS IN THE SOFTWARE.
diff --git a/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts 
b/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts
index 3d5ad2cc7e22..805b1318b7f7 100644
--- a/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts
+++ b/arch/arm/boot/dts/imx6dl-aristainetos2_7.dts
@@ -12,17 +12,17 @@
   * modify it under the terms of the GNU General Public License
   * version 2 as published by the Free Software Foundation.
   *
- * This file is distributed in the hope that it will be useful
+ * This file is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
- * Or, alternatively
+ * Or, alternatively,
   *
   *  b) Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
   * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use
+ * restriction, including without limitation the rights to use,
   * copy, modify, merge, publish, distribute, sublicense, and/or
   * sell copies of the Software, and to permit persons to whom the
   * Software is furnished to do so, subject to the following
@@ -31,11 +31,11 @@
   * The above copyright notice and this permission notice shall be
   * included in all copies or substantial portions of the Software.
   *
- * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 

  1   2   3   4   5   6   7   8   9   10   >