[PATCH v3] kvm/x86 : Remove redundant function implement

2020-05-20 Thread Richard
pic_in_kernel(),ioapic_in_kernel() and irqchip_kernel() have the
same implementation.

Signed-off-by: Peng Hao 
---
 arch/x86/kvm/ioapic.h  |  8 ++--
 arch/x86/kvm/irq.h | 14 --
 arch/x86/kvm/lapic.c   |  1 +
 arch/x86/kvm/mmu/mmu.c |  1 +
 arch/x86/kvm/x86.c |  1 +
 5 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 2fb2e3c..7a3c53b 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -5,7 +5,7 @@
 #include 

 #include 
-
+#include "irq.h"
 struct kvm;
 struct kvm_vcpu;

@@ -108,11 +108,7 @@ do {\

 static inline int ioapic_in_kernel(struct kvm *kvm)
 {
-int mode = kvm->arch.irqchip_mode;
-
-/* Matches smp_wmb() when setting irqchip_mode */
-smp_rmb();
-return mode == KVM_IRQCHIP_KERNEL;
+return irqchip_kernel(kvm);
 }

 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f173ab6..e133c1a 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -16,7 +16,6 @@
 #include 

 #include 
-#include "ioapic.h"
 #include "lapic.h"

 #define PIC_NUM_PINS 16
@@ -66,15 +65,6 @@ void kvm_pic_destroy(struct kvm *kvm);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);

-static inline int pic_in_kernel(struct kvm *kvm)
-{
-int mode = kvm->arch.irqchip_mode;
-
-/* Matches smp_wmb() when setting irqchip_mode */
-smp_rmb();
-return mode == KVM_IRQCHIP_KERNEL;
-}
-
 static inline int irqchip_split(struct kvm *kvm)
 {
 int mode = kvm->arch.irqchip_mode;
@@ -93,6 +83,10 @@ static inline int irqchip_kernel(struct kvm *kvm)
 return mode == KVM_IRQCHIP_KERNEL;
 }

+static inline int pic_in_kernel(struct kvm *kvm)
+{
+return irqchip_kernel(kvm);
+}
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 int mode = kvm->arch.irqchip_mode;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9af25c9..de4d046 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -36,6 +36,7 @@
 #include 
 #include "kvm_cache_regs.h"
 #include "irq.h"
+#include "ioapic.h"
 #include "trace.h"
 #include "x86.h"
 #include "cpuid.h"
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8071952..6133f69 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -16,6 +16,7 @@
  */

 #include "irq.h"
+#include "ioapic.h"
 #include "mmu.h"
 #include "x86.h"
 #include "kvm_cache_regs.h"
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d786c7d..c8b62ac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -18,6 +18,7 @@

 #include 
 #include "irq.h"
+#include "ioapic.h"
 #include "mmu.h"
 #include "i8254.h"
 #include "tss.h"
--
2.7.4


OPPO

本电子邮件及其附件含有OPPO公司的保密信息,仅限于邮件指明的收件人使用(包含个人及群组)。禁止任何人在未经授权的情况下以任何形式使用。如果您错收了本邮件,请立即以电子邮件通知发件人并删除本邮件及其附件。

This e-mail and its attachments contain confidential information from OPPO, 
which is intended only for the person or entity whose address is listed above. 
Any use of the information contained herein in any way (including, but not 
limited to, total or partial disclosure, reproduction, or dissemination) by 
persons other than the intended recipient(s) is prohibited. If you receive this 
e-mail in error, please notify the sender by phone or email immediately and 
delete it!


Re: [PATCH] kthread: Use TASK_IDLE state for newly created kernel threads

2020-05-20 Thread Greg Kroah-Hartman
On Thu, May 21, 2020 at 07:05:44AM +0530, Pavan Kondeti wrote:
> On Wed, May 20, 2020 at 08:18:58PM +0200, Greg Kroah-Hartman wrote:
> > On Wed, May 20, 2020 at 05:25:09PM +0530, Pavankumar Kondeti wrote:
> > > When kernel threads are created for later use, they will be in
> > > TASK_UNINTERRUPTIBLE state until they are woken up. This results
> > > in increased loadavg and false hung task reports. To fix this,
> > > use TASK_IDLE state instead of TASK_UNINTERRUPTIBLE when
> > > a kernel thread schedules out for the first time.
> > > 
> > > Signed-off-by: Pavankumar Kondeti 
> > > ---
> > >  kernel/kthread.c | 6 +++---
> > >  1 file changed, 3 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/kernel/kthread.c b/kernel/kthread.c
> > > index bfbfa48..b74ed8e 100644
> > > --- a/kernel/kthread.c
> > > +++ b/kernel/kthread.c
> > > @@ -250,7 +250,7 @@ static int kthread(void *_create)
> > >   current->vfork_done = >exited;
> > >  
> > >   /* OK, tell user we're spawned, wait for stop or wakeup */
> > > - __set_current_state(TASK_UNINTERRUPTIBLE);
> > > + __set_current_state(TASK_IDLE);
> > >   create->result = current;
> > >   /*
> > >* Thread is going to call schedule(), do not preempt it,
> > > @@ -428,7 +428,7 @@ static void __kthread_bind(struct task_struct *p, 
> > > unsigned int cpu, long state)
> > >  
> > >  void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
> > >  {
> > > - __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
> > > + __kthread_bind_mask(p, mask, TASK_IDLE);
> > >  }
> > >  
> > >  /**
> > > @@ -442,7 +442,7 @@ void kthread_bind_mask(struct task_struct *p, const 
> > > struct cpumask *mask)
> > >   */
> > >  void kthread_bind(struct task_struct *p, unsigned int cpu)
> > >  {
> > > - __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
> > > + __kthread_bind(p, cpu, TASK_IDLE);
> > >  }
> > >  EXPORT_SYMBOL(kthread_bind);
> > 
> > It's as if people never read mailing lists:
> > 
> > https://lore.kernel.org/r/dm6pr11mb3531d3b164357b2dc476102ddf...@dm6pr11mb3531.namprd11.prod.outlook.com
> > 
> > Given that this is an identical resend of the previous patch, why are
> > you doing so, and what has changed since that original rejection?
> > 
> I did not know that it is attempted before. Thanks for pointing to the
> previous discussion. 
> 
> We have seen hung task reports from customers and it is due to a downstream
> change which create bunch of kernel threads for later use.

Do you have a pointer to that specific change?

> From Peter's reply, I understood that one must wake up the kthread
> after creation and put it in INTERRUPTIBLE sleep. I will pass on the
> message.

Just go fix that code, it sounds like it's in your tree already :)

thanks,

greg k-h


Re: [PATCHv3 4/5] Input: EXC3000: Add support to query model and fw_version

2020-05-20 Thread Dmitry Torokhov
On Wed, May 20, 2020 at 11:25:40PM +0200, Sebastian Reichel wrote:
> Hi,
> 
> On Wed, May 20, 2020 at 10:49:52AM -0700, Dmitry Torokhov wrote:
> > Hi Sebastian,
> > 
> > On Wed, May 20, 2020 at 05:39:35PM +0200, Sebastian Reichel wrote:
> > > Expose model and fw_version via sysfs. Also query the model
> > > in probe to make sure, that the I2C communication with the
> > > device works before successfully probing the driver.
> > > 
> > > This is a bit complicated, since EETI devices do not have
> > > a sync interface. Sending the commands and directly reading
> > > does not work. Sending the command and waiting for some time
> > > is also not an option, since there might be touch events in
> > > the mean time.
> > > 
> > > Last but not least we do not cache the results, since this
> > > interface can be used to check the I2C communication is still
> > > working as expected.
> > > 
> > > Signed-off-by: Sebastian Reichel 
> > > ---
> > >  .../ABI/testing/sysfs-driver-input-exc3000|  15 ++
> > >  drivers/input/touchscreen/exc3000.c   | 145 +-
> > >  2 files changed, 159 insertions(+), 1 deletion(-)
> > >  create mode 100644 Documentation/ABI/testing/sysfs-driver-input-exc3000
> > > 
> > > diff --git a/Documentation/ABI/testing/sysfs-driver-input-exc3000 
> > > b/Documentation/ABI/testing/sysfs-driver-input-exc3000
> > > new file mode 100644
> > > index ..d79da4f869af
> > > --- /dev/null
> > > +++ b/Documentation/ABI/testing/sysfs-driver-input-exc3000
> > > @@ -0,0 +1,15 @@
> > > +What:/sys/class/input/inputX/fw_version
> > > +Date:May 2020
> > > +Contact: linux-in...@vger.kernel.org
> > > +Description: Reports the firmware version provided by the 
> > > touchscreen, for example "00_T6" on a EXC80H60
> > > +
> > > + Access: Read
> > > + Valid values: Represented as string
> > > +
> > > +What:/sys/class/input/inputX/model
> > > +Date:May 2020
> > > +Contact: linux-in...@vger.kernel.org
> > > +Description: Reports the model identification provided by the 
> > > touchscreen, for example "Orion_1320" on a EXC80H60
> > > +
> > > + Access: Read
> > > + Valid values: Represented as string
> > 
> > These are properties of the controller (i2c device), not input
> > abstraction class on top of it, so the attributes should be attached to
> > i2c_client instance.
> > 
> > Please use devm_device_add_group() in probe to instantiate them at the
> > proper level.
> 
> As written in the cover letter using devm_device_add_group() in
> probe routine results in a udev race condition:
> 
> http://kroah.com/log/blog/2013/06/26/how-to-create-a-sysfs-file-correctly/

This race has been solved with the addition of KOBJ_BIND/KOBJ_UNBIND
uevents that signal when driver is bound or unbound from the device.
Granted, current systemd/udev drops them as it does not know how to
"add" to the device state, but this is on systemd to solve.

Thanks.

-- 
Dmitry


[RFC PATCH] optee: __optee_enumerate_devices() can be static

2020-05-20 Thread kbuild test robot


Signed-off-by: kbuild test robot 
---
 device.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c
index 8263b308efd56..d4931dad07aaa 100644
--- a/drivers/tee/optee/device.c
+++ b/drivers/tee/optee/device.c
@@ -87,7 +87,7 @@ static int optee_register_device(const uuid_t *device_uuid, 
u32 device_id)
return rc;
 }
 
-int __optee_enumerate_devices(u32 func)
+static int __optee_enumerate_devices(u32 func)
 {
const uuid_t pta_uuid =
UUID_INIT(0x7011a688, 0xddde, 0x4053,


Re: [PATCH 2/6] soc: ti: omap-prm: Add basic power domain support

2020-05-20 Thread kbuild test robot
Hi Tony,

I love your patch! Perhaps something to improve:

[auto build test WARNING on omap/for-next]
[also build test WARNING on robh/for-next keystone/next v5.7-rc6 next-20200519]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Tony-Lindgren/Add-initial-genpd-support-for-omap-PRM-driver/20200521-063328
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git 
for-next
config: arm-defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All warnings (new ones prefixed by >>, old ones prefixed by <<):

drivers/soc/ti/omap_prm.c: In function 'omap_prm_domain_detach_dev':
>> drivers/soc/ti/omap_prm.c:347:26: warning: variable 'prmd' set but not used 
>> [-Wunused-but-set-variable]
347 |  struct omap_prm_domain *prmd;
|  ^~~~
At top level:
drivers/soc/ti/omap_prm.c:123:21: warning: 'omap_prm_onoff_noauto' defined but 
not used [-Wunused-const-variable=]
123 | omap_prm_domain_map omap_prm_onoff_noauto = {
| ^
drivers/soc/ti/omap_prm.c:115:21: warning: 'omap_prm_nooff' defined but not 
used [-Wunused-const-variable=]
115 | omap_prm_domain_map omap_prm_nooff = {
| ^~
drivers/soc/ti/omap_prm.c:107:21: warning: 'omap_prm_noinact' defined but not 
used [-Wunused-const-variable=]
107 | omap_prm_domain_map omap_prm_noinact = {
| ^~~~
drivers/soc/ti/omap_prm.c:99:21: warning: 'omap_prm_all' defined but not used 
[-Wunused-const-variable=]
99 | omap_prm_domain_map omap_prm_all = {
| ^~~~

vim +/prmd +347 drivers/soc/ti/omap_prm.c

   342  
   343  static void omap_prm_domain_detach_dev(struct generic_pm_domain *domain,
   344 struct device *dev)
   345  {
   346  struct generic_pm_domain_data *genpd_data;
 > 347  struct omap_prm_domain *prmd;
   348  
   349  prmd = genpd_to_prm_domain(domain);
   350  
   351  genpd_data = dev_gpd_data(dev);
   352  genpd_data->data = NULL;
   353  }
   354  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH] clk: qcom: gcc: Fix parent for gpll0_out_even

2020-05-20 Thread Bjorn Andersson
On Wed 20 May 22:27 PDT 2020, Vinod Koul wrote:

> Documentation says that gpll0 is parent of gpll0_out_even, somehow
> driver coded that as bi_tcxo, so fix it
> 
> Fixes: 2a1d7eb854bb ("clk: qcom: gcc: Add global clock controller driver for 
> SM8150")
> Reported-by: Jonathan Marek 
> Signed-off-by: Vinod Koul 

Reviewed-by: Bjorn Andersson 

Regards,
Bjorn

> ---
>  drivers/clk/qcom/gcc-sm8150.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c
> index 2bc08e7125bf..72524cf11048 100644
> --- a/drivers/clk/qcom/gcc-sm8150.c
> +++ b/drivers/clk/qcom/gcc-sm8150.c
> @@ -76,8 +76,7 @@ static struct clk_alpha_pll_postdiv gpll0_out_even = {
>   .clkr.hw.init = &(struct clk_init_data){
>   .name = "gpll0_out_even",
>   .parent_data = &(const struct clk_parent_data){
> - .fw_name = "bi_tcxo",
> - .name = "bi_tcxo",
> + .hw = ,
>   },
>   .num_parents = 1,
>   .ops = _trion_pll_postdiv_ops,
> -- 
> 2.25.4
> 


Re: [PATCH 2/2] kvm/x86: don't expose MSR_IA32_UMWAIT_CONTROL unconditionally

2020-05-20 Thread Tao Xu




On 5/21/2020 12:33 PM, Xiaoyao Li wrote:

On 5/21/2020 5:05 AM, Paolo Bonzini wrote:

On 20/05/20 18:07, Maxim Levitsky wrote:

This msr is only available when the host supports WAITPKG feature.

This breaks a nested guest, if the L1 hypervisor is set to ignore
unknown msrs, because the only other safety check that the
kernel does is that it attempts to read the msr and
rejects it if it gets an exception.

Fixes: 6e3ba4abce KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL

Signed-off-by: Maxim Levitsky 
---
  arch/x86/kvm/x86.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe3a24fd6b263..9c507b32b1b77 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5314,6 +5314,10 @@ static void kvm_init_msr_list(void)
  if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
  min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
  continue;
+    break;
+    case MSR_IA32_UMWAIT_CONTROL:
+    if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
+    continue;
  default:
  break;
  }


The patch is correct, and matches what is done for the other entries of
msrs_to_save_all.  However, while looking at it I noticed that
X86_FEATURE_WAITPKG is actually never added, and that is because it was
also not added to the supported CPUID in commit e69e72faa3a0 ("KVM: x86:
Add support for user wait instructions", 2019-09-24), which was before
the kvm_cpu_cap mechanism was added.

So while at it you should also fix that.  The right way to do that is to
add a

 if (vmx_waitpkg_supported())
 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);


+ Tao

I remember there is certainly some reason why we don't expose WAITPKG to 
guest by default.


Tao, please help clarify it.

Thanks,
-Xiaoyao



Because in VM, umwait and tpause can put a (psysical) CPU into a power 
saving state. So from host view, this cpu will be 100% usage by VM. 
Although umwait and tpause just cause short wait(maybe 100 
microseconds), we still want to unconditionally expose WAITPKG in VM.


[PATCH] platform: cros_ec_debugfs: control uptime information request

2020-05-20 Thread Gwendal Grignou
When EC does not support uptime command (EC_CMD_GET_UPTIME_INFO),
return -EPROTO to read of /sys/kernel/debug/cros_ec/uptime without
calling the EC after the first try.

The EC console log will not contain EC_CMD_GET_UPTIME_INFO anymore.

Signed-off-by: Gwendal Grignou 
---
 drivers/platform/chrome/cros_ec_debugfs.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c 
b/drivers/platform/chrome/cros_ec_debugfs.c
index 6ae484989d1f5..70a29afb6d9e7 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -49,6 +49,8 @@ struct cros_ec_debugfs {
struct delayed_work log_poll_work;
/* EC panicinfo */
struct debugfs_blob_wrapper panicinfo_blob;
+   /* EC uptime */
+   bool uptime_supported;
 };
 
 /*
@@ -256,12 +258,19 @@ static ssize_t cros_ec_uptime_read(struct file *file, 
char __user *user_buf,
char read_buf[32];
int ret;
 
+   if (!debug_info->uptime_supported)
+   return -EPROTO;
+
resp = (struct ec_response_uptime_info *)
 
msg.cmd.command = EC_CMD_GET_UPTIME_INFO;
msg.cmd.insize = sizeof(*resp);
 
ret = cros_ec_cmd_xfer_status(ec_dev, );
+   if (ret == -EPROTO && msg.cmd.result == EC_RES_INVALID_COMMAND) {
+   debug_info->uptime_supported = false;
+   return ret;
+   }
if (ret < 0)
return ret;
 
@@ -434,6 +443,9 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
debug_info->ec = ec;
debug_info->dir = debugfs_create_dir(name, NULL);
 
+   /* Give uptime a chance to run. */
+   debug_info->uptime_supported = true;
+
ret = cros_ec_create_panicinfo(debug_info);
if (ret)
goto remove_debugfs;
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH] clk: qcom: gcc: Fix parent for gpll0_out_even

2020-05-20 Thread Vinod Koul
Documentation says that gpll0 is parent of gpll0_out_even, somehow
driver coded that as bi_tcxo, so fix it

Fixes: 2a1d7eb854bb ("clk: qcom: gcc: Add global clock controller driver for 
SM8150")
Reported-by: Jonathan Marek 
Signed-off-by: Vinod Koul 
---
 drivers/clk/qcom/gcc-sm8150.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c
index 2bc08e7125bf..72524cf11048 100644
--- a/drivers/clk/qcom/gcc-sm8150.c
+++ b/drivers/clk/qcom/gcc-sm8150.c
@@ -76,8 +76,7 @@ static struct clk_alpha_pll_postdiv gpll0_out_even = {
.clkr.hw.init = &(struct clk_init_data){
.name = "gpll0_out_even",
.parent_data = &(const struct clk_parent_data){
-   .fw_name = "bi_tcxo",
-   .name = "bi_tcxo",
+   .hw = ,
},
.num_parents = 1,
.ops = _trion_pll_postdiv_ops,
-- 
2.25.4



Re: Re: [PATCH] Input: omap-keypad - fix runtime pm imbalance on error

2020-05-20 Thread dinghao . liu
Fixing this in the PM core will influence all callers of pm_runtime_get_sync().
Therefore I think the better solution is to fix its misused callers.

Regards,
Dinghao

Dmitry Torokhov dmitry.torok...@gmail.com写道:
> Hi Dinghao,
> 
> On Wed, May 20, 2020 at 6:35 AM Dinghao Liu  wrote:
> >
> > pm_runtime_get_sync() increments the runtime PM usage counter even
> > the call returns an error code. Thus a pairing decrement is needed
> > on the error handling path to keep the counter balanced.
> 
> This is a very surprising behavior and I wonder if this should be
> fixed in the PM core (or the required cleanup steps need to be called
> out in the function description). I also see that a few drivers that
> handle this situation correctly (?) call pm_runtime_put_noidle()
> instead of pm_runtime_put_sync() in the error path.
> 
> Rafael, do you have any guidance here?
> 
> Thanks.
> 
> -- 
> Dmitry


[PATCH v1 1/1] drivers: mtd: spi-nor: update read capabilities for w25q64 and s25fl064k

2020-05-20 Thread Rayagonda Kokatanur
Both w25q64 and s25fl064k nor flash support QUAD and DUAL read
command, hence update the same in flash_info table.

Signed-off-by: Rayagonda Kokatanur 
---
 drivers/mtd/spi-nor/spansion.c | 3 ++-
 drivers/mtd/spi-nor/winbond.c  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c
index 6756202ace4b..c91bbb8d9cd6 100644
--- a/drivers/mtd/spi-nor/spansion.c
+++ b/drivers/mtd/spi-nor/spansion.c
@@ -52,7 +52,8 @@ static const struct flash_info spansion_parts[] = {
 SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "s25fl016k",  INFO(0xef4015,  0,  64 * 1024,  32,
 SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
-   { "s25fl064k",  INFO(0xef4017,  0,  64 * 1024, 128, SECT_4K) },
+   { "s25fl064k",  INFO(0xef4017,  0,  64 * 1024, 128,
+SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "s25fl116k",  INFO(0x014015,  0,  64 * 1024,  32,
 SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "s25fl132k",  INFO(0x014016,  0,  64 * 1024,  64, SECT_4K) },
diff --git a/drivers/mtd/spi-nor/winbond.c b/drivers/mtd/spi-nor/winbond.c
index 17deabad57e1..2028cab3eff9 100644
--- a/drivers/mtd/spi-nor/winbond.c
+++ b/drivers/mtd/spi-nor/winbond.c
@@ -39,7 +39,8 @@ static const struct flash_info winbond_parts[] = {
SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
-   { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+   { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128,
+SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128,
   SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
   SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
-- 
2.17.1



Re: [PATCH v3 03/14] remoteproc: Add new operation and flags for synchronistation

2020-05-20 Thread Bjorn Andersson
On Wed 20 May 15:06 PDT 2020, Mathieu Poirier wrote:

> On Mon, May 18, 2020 at 05:55:00PM -0700, Bjorn Andersson wrote:
> > On Fri 15 May 12:24 PDT 2020, Mathieu Poirier wrote:
> > 
> > > Good day Bjorn,
> > > 
> > > On Wed, May 13, 2020 at 06:32:24PM -0700, Bjorn Andersson wrote:
> > > > On Fri 08 May 14:01 PDT 2020, Mathieu Poirier wrote:
> > > > 
> > > > > On Tue, May 05, 2020 at 05:22:53PM -0700, Bjorn Andersson wrote:
> > > > > > On Fri 24 Apr 13:01 PDT 2020, Mathieu Poirier wrote:
[..]
> > > > > > > + bool after_crash;
> > > > > > 
> > > > > > Similarly what is the expected steps to be taken by the core when 
> > > > > > this
> > > > > > is true? Should rproc_report_crash() simply stop/start the 
> > > > > > subdevices
> > > > > > and upon one of the ops somehow tell the remote controller that it 
> > > > > > can
> > > > > > proceed with the recovery?
> > > > > 
> > > > > The exact same sequence of steps will be carried out as they are 
> > > > > today, except
> > > > > that if after_crash == true, the remoteproc core won't be switching 
> > > > > the remote
> > > > > processor on, exactly as it would do when on_init == true.
> > > > > 
> > > > 
> > > > Just to make sure we're on the same page:
> > > > 
> > > > after_crash = false is what we have today, and would mean:
> > > > 
> > > > 1) stop subdevices
> > > > 2) power off
> > > > 3) unprepare subdevices
> > > > 4) generate coredump
> > > > 5) request firmware
> > > > 6) load segments
> > > > 7) find resource table
> > > > 8) prepare subdevices
> > > > 9) "boot"
> > > > 10) start subdevices
> > > 
> > > Exactly
> > > 
> > > > 
> > > > after_crash = true would mean:
> > > > 
> > > > 1) stop subdevices
> > > > 2) "detach"
> > > > 3) unprepare subdevices
> > > > 4) prepare subdevices
> > > > 5) "attach"
> > > > 6) start subdevices
> > > >
> > > 
> > > Yes
> > >  
> > > > State diagram wise both of these would represent the transition RUNNING
> > > > -> CRASHED -> RUNNING, but somehow the platform driver needs to be able
> > > > to specify which of these sequences to perform. Per your naming
> > > > suggestion above, this does sound like a "autonomous_recovery" boolean
> > > > to me.
> > > 
> > > Right, semantically "rproc->autonomous" would apply quite well.
> > > 
> > > In function rproc_crash_handler_work(), a call to rproc_set_sync_flag() 
> > > has been
> > > strategically placed to set the value of rproc->autonomous based on
> > > "after_crash".  From there the core knows which rproc_ops to use.  Here 
> > > too we
> > > have to rely on the rproc_ops provided by the platform to do the right 
> > > thing
> > > based on the scenario to enact.
> > > 
> > 
> > Do you think that autonomous_recovery would be something that changes
> > for a given remoteproc instance? I envisioned it as something that you
> > know at registration time, but perhaps I'm missing some details here.
> 
> I don't envision any of the transision flags to change once they are set by 
> the
> platform.   The same applies to the new rproc_ops, it can be set only once.
> Otherwise combination of possible scenarios becomes too hard to manage, 
> leading
> to situations where the core and MCU get out of sync and can't talk to each
> other.
> 

Sounds good, I share this expectation, just wanted to check with you.

> > 
> > > > 
> > > > > These flags are there to indicate how to set rproc::sync_with_rproc 
> > > > > after
> > > > > different events, that is when the remoteproc core boots, when the 
> > > > > remoteproc
> > > > > has been stopped or when it has crashed.
> > > > > 
> > > > 
> > > > Right, that was clear from your patches. Sorry that my reply didn't
> > > > convey the information that I had understood this.
> > > > 
> > > > > > 
> > > > > > > +};
> > > > > > > +
> > > > > > >  /**
> > > > > > >   * struct rproc_ops - platform-specific device handlers
> > > > > > >   * @start:   power on the device and boot it
> > > > > > > @@ -459,6 +476,9 @@ struct rproc_dump_segment {
> > > > > > >   * @firmware: name of firmware file to be loaded
> > > > > > >   * @priv: private data which belongs to the platform-specific 
> > > > > > > rproc module
> > > > > > >   * @ops: platform-specific start/stop rproc handlers
> > > > > > > + * @sync_ops: platform-specific start/stop rproc handlers when
> > > > > > > + * synchronising with a remote processor.
> > > > > > > + * @sync_flags: Determine the rproc_ops to choose in specific 
> > > > > > > states.
> > > > > > >   * @dev: virtual device for refcounting and common remoteproc 
> > > > > > > behavior
> > > > > > >   * @power: refcount of users who need this rproc powered up
> > > > > > >   * @state: state of the device
> > > > > > > @@ -482,6 +502,7 @@ struct rproc_dump_segment {
> > > > > > >   * @table_sz: size of @cached_table
> > > > > > >   * @has_iommu: flag to indicate if remote processor is behind an 
> > > > > > > MMU
> > > > > > >   * @auto_boot: flag to indicate if remote processor should be 
> > > > > > > 

Re: [RFC][PATCH 3/5] thermal: Add support for setting notification thresholds

2020-05-20 Thread Amit Kucheria
Hi Srinivas,

On Wed, May 20, 2020 at 11:46 PM Srinivas Pandruvada
 wrote:
>
> On Wed, 2020-05-20 at 09:58 +0530, Amit Kucheria wrote:
> > On Tue, May 19, 2020 at 5:10 AM Srinivas Pandruvada
> >  wrote:
> > > On Mon, 2020-05-18 at 18:37 +0200, Daniel Lezcano wrote:
> > > > On 04/05/2020 20:16, Srinivas Pandruvada wrote:
> > > > > Add new attributes in thermal syfs when a thermal drivers
> > > > > provides
> > > > > callbacks for them and CONFIG_THERMAL_USER_EVENT_INTERFACE is
> > > > > defined.
> > > > >
> > > > > These attribute allow user space to stop polling for
> > > > > temperature.
> > > > >
> > > > > These attributes are:
> > > > > - temp_thres_low: Specify a notification temperature for a low
> > > > > temperature threshold event.
> > > > > temp_thres_high: Specify a notification temperature for a high
> > > > > temperature threshold event.
> > > > > temp_thres_hyst: Specify a change in temperature to send
> > > > > notification
> > > > > again.
> > > > >
> > > > > This is implemented by adding additional sysfs attribute group.
> > > > > The
> > > > > changes in this patch are trivial to add new attributes in
> > > > > thermal
> > > > > sysfs as done for other attributes.
> > > >
> > > > Isn't it duplicate with the trip point?
> > > A trip point is where an in-kernel governor takes some action. This
> > > is
> > > not same as a notification temperature. For example at trip point
> > > configured by ACPI at 85C, the thermal governor may start
> > > aggressive
> > > throttling.
> > > But a user space can set a notification threshold at 80C and start
> > > some
> > > active controls like activate some fan to reduce the impact of
> > > passive
> > > control on performance.
> >
> > Then what is the use of thermal trip type "ACTIVE" ?
> This is an example.
> The defaults are set by the OEMs via ACPI. User can't modify that if
> they want to optimize for their usage on Linux. There are fan control
> daemon's which user use on top.

-ENOPARSE. Are you saying users "can" modify these?

In any case, how is what you described earlier not possible with an
ACTIVE trip point directly wired to the fan as a cooling device or
with a HOT trip point that causes the platform driver to send
notification to userspace where a fan control daemon can do what it
needs to?

Basically, I think the issue of polling is orthogonal to the
introduction of the new attributes introduced in this patch and I
don't understand the reason for these attributes from your commit
description.

> > > We need a way to distinguish between temperature notification
> > > threshold
> > > and actual trip point. Changing a trip point means that user wants
> > > kernel to throttle at temperature.
>


RE: [PATCH 2/2] soundwire: intel: transition to 3 steps initialization

2020-05-20 Thread Liao, Bard
> -Original Message-
> From: Vinod Koul 
> Sent: Thursday, May 21, 2020 12:37 PM
> To: Liao, Bard 
> Cc: Bard Liao ; alsa-de...@alsa-project.org;
> linux-kernel@vger.kernel.org; ti...@suse.de; broo...@kernel.org;
> gre...@linuxfoundation.org; j...@cadence.com;
> srinivas.kandaga...@linaro.org; rander.w...@linux.intel.com;
> ranjani.sridha...@linux.intel.com; hui.w...@canonical.com; pierre-
> louis.boss...@linux.intel.com; Kale, Sanyog R ;
> Blauciak, Slawomir ; Lin, Mengdong
> 
> Subject: Re: [PATCH 2/2] soundwire: intel: transition to 3 steps 
> initialization
> 
> On 21-05-20, 02:23, Liao, Bard wrote:
> > > -Original Message-
> > > From: Vinod Koul 
> > > Sent: Wednesday, May 20, 2020 9:54 PM
> > > To: Bard Liao 
> > > Cc: alsa-de...@alsa-project.org; linux-kernel@vger.kernel.org;
> > > ti...@suse.de; broo...@kernel.org; gre...@linuxfoundation.org;
> > > j...@cadence.com; srinivas.kandaga...@linaro.org;
> > > rander.w...@linux.intel.com; ranjani.sridha...@linux.intel.com;
> > > hui.w...@canonical.com; pierre- louis.boss...@linux.intel.com; Kale,
> > > Sanyog R ; Blauciak, Slawomir
> > > ; Lin, Mengdong
> > > ; Liao, Bard 
> > > Subject: Re: [PATCH 2/2] soundwire: intel: transition to 3 steps
> > > initialization
> > >
> > > On 20-05-20, 03:19, Bard Liao wrote:
> > > > From: Pierre-Louis Bossart 
> > > >
> > > > Rather than a plain-vanilla init/exit, this patch provides 3 steps
> > > > in the initialization (ACPI scan, probe, startup) which makes it
> > > > easier to detect platform support for SoundWire, allocate required
> > > > resources as early as possible, and conversely help make the
> > > > startup() callback lighter-weight with only hardware register setup.
> > >
> > > Okay but can you add details in changelog on what each step would do?
> >
> > Sure. Will do.
> >
> > >
> > > > @@ -1134,25 +1142,15 @@ static int intel_probe(struct
> > > > platform_device
> > > *pdev)
> > > >
> > > > intel_pdi_ch_update(sdw);
> > > >
> > > > -   /* Acquire IRQ */
> > > > -   ret = request_threaded_irq(sdw->link_res->irq,
> > > > -  sdw_cdns_irq, sdw_cdns_thread,
> > > > -  IRQF_SHARED, KBUILD_MODNAME, 
> > > >cdns);
> > >
> > > This is removed here but not added anywhere else, do we have no irq
> > > after this patch?
> >
> > We use a single irq for all Intel Audio DSP events and it will be
> > requested in the SOF driver.
> 
> And how will the irq be propagated to sdw/cdns drivers here?

We export the handler and call it on SOF driver.

> 
> --
> ~Vinod


powerpc64-linux-ld: mm/page_alloc.o:undefined reference to `node_reclaim_distance'

2020-05-20 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   b85051e755b0e9d6dd8f17ef1da083851b83287d
commit: a55c7454a8c887b226a01d7eed088ccb5374d81e sched/topology: Improve load 
balancing on AMD EPYC systems
date:   9 months ago
config: powerpc-randconfig-c004-20200520 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout a55c7454a8c887b226a01d7eed088ccb5374d81e
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>, old ones prefixed by <<):

powerpc64-linux-ld: warning: orphan section `.gnu.hash' from `linker stubs' 
being placed in section `.gnu.hash'
>> powerpc64-linux-ld: mm/page_alloc.o:(.toc+0x0): undefined reference to 
>> `node_reclaim_distance'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: Re: [PATCH] wlcore: fix runtime pm imbalance in wl1271_op_suspend

2020-05-20 Thread dinghao . liu
There is a check against ret after out_sleep tag. If 
wl1271_configure_suspend_ap()
returns an error code, ret will be caught by this check and a warning will be 
issued.


Tony Lindgren t...@atomide.com写道:
> * Dinghao Liu  [200520 12:58]:
> > When wlcore_hw_interrupt_notify() returns an error code,
> > a pairing runtime PM usage counter decrement is needed to
> > keep the counter balanced.
> 
> We should probably keep the warning though, nothing will
> get shown for wl1271_configure_suspend_ap() errors.
> 
> Otherwise looks good to me.
> 
> Regards,
> 
> Tony


Re: linux-next: manual merge of the rcu tree with the powerpc tree

2020-05-20 Thread Stephen Rothwell
Hi all,

On Tue, 19 May 2020 17:23:16 +1000 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the rcu tree got a conflict in:
> 
>   arch/powerpc/kernel/traps.c
> 
> between commit:
> 
>   116ac378bb3f ("powerpc/64s: machine check interrupt update NMI accounting")
> 
> from the powerpc tree and commit:
> 
>   187416eeb388 ("hardirq/nmi: Allow nested nmi_enter()")
> 
> from the rcu tree.
> 
> I fixed it up (I used the powerpc tree version for now) and can carry the
> fix as necessary. This is now fixed as far as linux-next is concerned,
> but any non trivial conflicts should be mentioned to your upstream
> maintainer when your tree is submitted for merging.  You may also want
> to consider cooperating with the maintainer of the conflicting tree to
> minimise any particularly complex conflicts.

This is now a conflict between the powerpc commit and commit

  69ea03b56ed2 ("hardirq/nmi: Allow nested nmi_enter()")

from the tip tree.  I assume that the rcu and tip trees are sharing
some patches (but not commits) :-(

-- 
Cheers,
Stephen Rothwell


pgpqCKNtba24A.pgp
Description: OpenPGP digital signature


Re: [PATCH v6 12/12] mmap locking API: convert mmap_sem comments

2020-05-20 Thread Michel Lespinasse
Looks good, thanks !

On Wed, May 20, 2020 at 8:22 PM Andrew Morton  wrote:
> On Tue, 19 May 2020 22:29:08 -0700 Michel Lespinasse  
> wrote:
> > Convert comments that reference mmap_sem to reference mmap_lock instead.
>
> This may not be complete..
>
> From: Andrew Morton 
> Subject: mmap-locking-api-convert-mmap_sem-comments-fix
>
> fix up linux-next leftovers
>
> Cc: Daniel Jordan 
> Cc: Davidlohr Bueso 
> Cc: David Rientjes 
> Cc: Hugh Dickins 
> Cc: Jason Gunthorpe 
> Cc: Jerome Glisse 
> Cc: John Hubbard 
> Cc: Laurent Dufour 
> Cc: Liam Howlett 
> Cc: Matthew Wilcox 
> Cc: Michel Lespinasse 
> Cc: Peter Zijlstra 
> Cc: Vlastimil Babka 
> Cc: Ying Han 
> Signed-off-by: Andrew Morton 

Reviewed-by: Michel Lespinasse 


Re: [PATCH v6 05/12] mmap locking API: convert mmap_sem call sites missed by coccinelle

2020-05-20 Thread Michel Lespinasse
Looks good. I'm not sure if you need a review, but just in case:

On Wed, May 20, 2020 at 8:23 PM Andrew Morton  wrote:
> On Tue, 19 May 2020 22:29:01 -0700 Michel Lespinasse  
> wrote:
>
> > Convert the last few remaining mmap_sem rwsem calls to use the new
> > mmap locking API. These were missed by coccinelle for some reason
> > (I think coccinelle does not support some of the preprocessor
> > constructs in these files ?)
>
> From: Andrew Morton 
> Subject: mmap-locking-api-convert-mmap_sem-call-sites-missed-by-coccinelle-fix
>
> convert linux-next leftovers
>
> Cc: Michel Lespinasse 
> Cc: Daniel Jordan 
> Cc: Laurent Dufour 
> Cc: Vlastimil Babka 
> Cc: Davidlohr Bueso 
> Cc: David Rientjes 
> Cc: Hugh Dickins 
> Cc: Jason Gunthorpe 
> Cc: Jerome Glisse 
> Cc: John Hubbard 
> Cc: Liam Howlett 
> Cc: Matthew Wilcox 
> Cc: Peter Zijlstra 
> Cc: Ying Han 
> Signed-off-by: Andrew Morton 

Reviewed-by: Michel Lespinasse 


Re: [PATCH] HID: intel-ish-hid: Replace PCI_DEV_FLAGS_NO_D3 with pci_save_state

2020-05-20 Thread Kai-Heng Feng
Hi Srinivas,

> On May 9, 2020, at 01:45, Srinivas Pandruvada 
>  wrote:
> 
> On Tue, 2020-05-05 at 21:17 +0800, Kai-Heng Feng wrote:
>> PCI_DEV_FLAGS_NO_D3 should not be used outside of PCI core.
>> 
>> Instead, we can use pci_save_state() to hint PCI core that the device
>> should stay at D0 during suspend.
> 
> Your changes are doing more than just changing the flag. Can you
> explain more about the other changes?

By using pci_save_state(), in addition to keep itself stay at D0, the parent 
bridge will also stay at D0.
So it's a better approach to achieve the same thing.

> Also make sure that you test on both platforms which has regular S3 and
> S0ix (modern standby system).

Actually I don't have any physical hardware to test the patch, I found the 
issue when I search for D3 quirks through the source code.

Can you guys do a quick smoketest for this patch?

Kai-Heng

> 
> Thanks,
> Srinivas
> 
> 
>> 
>> Signed-off-by: Kai-Heng Feng 
>> ---
>> drivers/hid/intel-ish-hid/ipc/pci-ish.c | 15 ++-
>> 1 file changed, 10 insertions(+), 5 deletions(-)
>> 
>> diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
>> b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
>> index f491d8b4e24c..ab588b9c8d09 100644
>> --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
>> +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
>> @@ -106,6 +106,11 @@ static inline bool ish_should_enter_d0i3(struct
>> pci_dev *pdev)
>>  return !pm_suspend_via_firmware() || pdev->device ==
>> CHV_DEVICE_ID;
>> }
>> 
>> +static inline bool ish_should_leave_d0i3(struct pci_dev *pdev)
>> +{
>> +return !pm_resume_via_firmware() || pdev->device ==
>> CHV_DEVICE_ID;
>> +}
>> +
>> /**
>>  * ish_probe() - PCI driver probe callback
>>  * @pdev:pci device
>> @@ -215,9 +220,7 @@ static void __maybe_unused
>> ish_resume_handler(struct work_struct *work)
>>  struct ishtp_device *dev = pci_get_drvdata(pdev);
>>  int ret;
>> 
>> -/* Check the NO_D3 flag to distinguish the resume paths */
>> -if (pdev->dev_flags & PCI_DEV_FLAGS_NO_D3) {
>> -pdev->dev_flags &= ~PCI_DEV_FLAGS_NO_D3;
>> +if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag) {
>>  disable_irq_wake(pdev->irq);
>> 
>>  ishtp_send_resume(dev);
>> @@ -281,8 +284,10 @@ static int __maybe_unused ish_suspend(struct
>> device *device)
>>   */
>>  ish_disable_dma(dev);
>>  } else {
>> -/* Set the NO_D3 flag, the ISH would enter D0i3
>> */
>> -pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
>> +/* Save state so PCI core will keep the device
>> at D0,
>> + * the ISH would enter D0i3
>> + */
>> +pci_save_state(pdev);
>> 
> Did you test on some C
> 
> 
>>  enable_irq_wake(pdev->irq);
>>  }



Re: [RFC PATCH 2/2] init: Allow multi-line output of kernel command line

2020-05-20 Thread Andrew Morton
On Thu, 21 May 2020 13:36:28 +0900 Sergey Senozhatsky 
 wrote:

> On (20/05/20 18:00), Andrew Morton wrote:
> [..]
> > I'm wondering if we shold add a kernel puts() (putsk()?  yuk) which can
> > puts() a string of any length.
> > 
> > I'm counting around 150 instances of printk("%s", ...) and pr_foo("%s",
> > ...) which could perhaps be converted, thus saving an argument.
> 
> Can you point me at some examples?
> 

./arch/powerpc/kernel/udbg.c:   printk("%s", s);
./arch/powerpc/xmon/nonstdio.c: printk("%s", xmon_outbuf);
./arch/um/os-Linux/drivers/ethertap_user.c: printk("%s", output);
./arch/um/os-Linux/drivers/ethertap_user.c: printk("%s", output);
./arch/um/os-Linux/drivers/tuntap_user.c:   printk("%s", out

etc.

My point is, if we created a length-unlimited puts() function for printing the
kernel command line, it could be reused in such places, resulting in a
smaller kernel.




Re: [PATCH v3] /dev/mem: Revoke mappings when a driver claims the region

2020-05-20 Thread Dan Williams
On Wed, May 20, 2020 at 9:37 PM Dan Williams  wrote:
>
> On Wed, May 20, 2020 at 7:26 PM Matthew Wilcox  wrote:
> >
> > On Wed, May 20, 2020 at 06:35:25PM -0700, Dan Williams wrote:
> > > +static struct inode *devmem_inode;
> > > +
> > > +#ifdef CONFIG_IO_STRICT_DEVMEM
> > > +void revoke_devmem(struct resource *res)
> > > +{
> > > + struct inode *inode = READ_ONCE(devmem_inode);
> > > +
> > > + /*
> > > +  * Check that the initialization has completed. Losing the race
> > > +  * is ok because it means drivers are claiming resources before
> > > +  * the fs_initcall level of init and prevent /dev/mem from
> > > +  * establishing mappings.
> > > +  */
> > > + smp_rmb();
> > > + if (!inode)
> > > + return;
> >
> > But we don't need the smp_rmb() here, right?  READ_ONCE and WRITE_ONCE
> > are a DATA DEPENDENCY barrier (in Documentation/memory-barriers.txt 
> > parlance)
> > so the smp_rmb() is superfluous ...
>
> Is it? I did not grok that from Documentation/memory-barriers.txt.
> READ_ONCE and WRITE_ONCE are certainly ordered with respect to each
> other in the same function, but I thought they still depend on
> barriers for smp ordering?
>
> >
> > > + /*
> > > +  * Use a unified address space to have a single point to manage
> > > +  * revocations when drivers want to take over a /dev/mem mapped
> > > +  * range.
> > > +  */
> > > + inode->i_mapping = devmem_inode->i_mapping;
> > > + inode->i_mapping->host = devmem_inode;
> >
> > umm ... devmem_inode->i_mapping->host doesn't already point to devmem_inode?
>
> Not if inode is coming from:
>
>  mknod ./newmem c 1 1
>
> ...that's the problem that a unified inode solves. You can mknod all
> you want, but mapping and mapping->host will point to a common
> instance.
>
> >
> > > +
> > > + /* publish /dev/mem initialized */
> > > + smp_wmb();
> > > + WRITE_ONCE(devmem_inode, inode);
> >
> > As above, unnecessary barrier, I think.
>
> Well, if you're not sure, how sure should I be?

I'm pretty sure they are needed, because I need the prior writes to
initialize the inode to be fenced before the final write to publish
the inode. I don't think WRITE_ONCE() enforces that prior writes have
completed.


Re: [PATCH v3] /dev/mem: Revoke mappings when a driver claims the region

2020-05-20 Thread Dan Williams
On Wed, May 20, 2020 at 7:26 PM Matthew Wilcox  wrote:
>
> On Wed, May 20, 2020 at 06:35:25PM -0700, Dan Williams wrote:
> > +static struct inode *devmem_inode;
> > +
> > +#ifdef CONFIG_IO_STRICT_DEVMEM
> > +void revoke_devmem(struct resource *res)
> > +{
> > + struct inode *inode = READ_ONCE(devmem_inode);
> > +
> > + /*
> > +  * Check that the initialization has completed. Losing the race
> > +  * is ok because it means drivers are claiming resources before
> > +  * the fs_initcall level of init and prevent /dev/mem from
> > +  * establishing mappings.
> > +  */
> > + smp_rmb();
> > + if (!inode)
> > + return;
>
> But we don't need the smp_rmb() here, right?  READ_ONCE and WRITE_ONCE
> are a DATA DEPENDENCY barrier (in Documentation/memory-barriers.txt parlance)
> so the smp_rmb() is superfluous ...

Is it? I did not grok that from Documentation/memory-barriers.txt.
READ_ONCE and WRITE_ONCE are certainly ordered with respect to each
other in the same function, but I thought they still depend on
barriers for smp ordering?

>
> > + /*
> > +  * Use a unified address space to have a single point to manage
> > +  * revocations when drivers want to take over a /dev/mem mapped
> > +  * range.
> > +  */
> > + inode->i_mapping = devmem_inode->i_mapping;
> > + inode->i_mapping->host = devmem_inode;
>
> umm ... devmem_inode->i_mapping->host doesn't already point to devmem_inode?

Not if inode is coming from:

 mknod ./newmem c 1 1

...that's the problem that a unified inode solves. You can mknod all
you want, but mapping and mapping->host will point to a common
instance.

>
> > +
> > + /* publish /dev/mem initialized */
> > + smp_wmb();
> > + WRITE_ONCE(devmem_inode, inode);
>
> As above, unnecessary barrier, I think.

Well, if you're not sure, how sure should I be?


Re: [PATCH] perf evsel: Get group fd from CPU0 for system wide event

2020-05-20 Thread Jin, Yao

Hi Jiri,

On 5/20/2020 3:50 PM, Jiri Olsa wrote:

On Wed, May 20, 2020 at 01:36:40PM +0800, Jin, Yao wrote:

Hi Jiri,

On 5/18/2020 11:28 AM, Jin, Yao wrote:

Hi Jiri,

On 5/15/2020 4:33 PM, Jiri Olsa wrote:

On Fri, May 15, 2020 at 02:04:57PM +0800, Jin, Yao wrote:

SNIP


I think I get the root cause. That should be a serious bug in get_group_fd, 
access violation!

For a group mixed with system-wide event and per-core event and the group
leader is system-wide event, access violation will happen.

perf_evsel__alloc_fd allocates one FD member for system-wide event (only 
FD(evsel, 0, 0) is valid).

But for per core event, perf_evsel__alloc_fd allocates N FD members (N =
ncpus). For example, for ncpus is 8, FD(evsel, 0, 0) to FD(evsel, 7, 0) are
valid.

get_group_fd(struct evsel *evsel, int cpu, int thread)
{
  struct evsel *leader = evsel->leader;

  fd = FD(leader, cpu, thread);    /* access violation may happen here */
}

If leader is system-wide event, only the FD(leader, 0, 0) is valid.

When get_group_fd accesses FD(leader, 1, 0), access violation happens.

My fix is:

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 28683b0eb738..db05b8a1e1a8 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1440,6 +1440,9 @@ static int get_group_fd(struct evsel *evsel, int cpu, int 
thread)
  if (evsel__is_group_leader(evsel))
  return -1;

+   if (leader->core.system_wide && !evsel->core.system_wide)
+   return -2;


so this effectively stops grouping system_wide events with others,
and I think it's correct, how about events that differ in cpumask?



My understanding for the events that differ in cpumaks is, if the
leader's cpumask is not fully matched with the evsel's cpumask then we
stop the grouping. Is this understanding correct?

I have done some tests and get some conclusions:

1. If the group is mixed with core and uncore events, the system_wide checking 
can distinguish them.

2. If the group is mixed with core and uncore events and "-a" is
specified, the system_wide for core event is also false. So system_wide
checking can distinguish them too

3. In my test, the issue only occurs when we collect the metric which is
mixed with uncore event and core event, so maybe checking the
system_wide is OK.


should we perhaps ensure this before we call open? go throught all
groups and check they are on the same cpus?



The issue doesn't happen at most of the time (only for the metric
consisting of uncore event and core event), so fallback to stop grouping
if call open is failed looks reasonable.

Thanks
Jin Yao


thanks,
jirka



+
  /*
   * Leader must be already processed/open,
   * if not it's a bug.
@@ -1665,6 +1668,11 @@ static int evsel__open_cpu(struct evsel *evsel, struct 
perf_cpu_map *cpus,
  pid = perf_thread_map__pid(threads, thread);

  group_fd = get_group_fd(evsel, cpu, thread);
+   if (group_fd == -2) {
+   errno = EINVAL;
+   err = -EINVAL;
+   goto out_close;
+   }
   retry_open:
  test_attr__ready();

It enables the perf_evlist__reset_weak_group. And in the second_pass (in
__run_perf_stat), the events will be opened successfully.

I have tested OK for this fix on cascadelakex.

Thanks
Jin Yao





Is this fix OK?

Another thing is, do you think if we need to rename
"evsel->core.system_wide" to "evsel->core.has_cpumask".

The "system_wide" may misleading.

evsel->core.system_wide = pmu ? pmu->is_uncore : false;

"pmu->is_uncore" is true if PMU has a "cpumask". But it's not just uncore
PMU which has cpumask. Some other PMUs, e.g. cstate_pkg, also have cpumask.
So for this case, "has_cpumask" should be better.


so those flags are checked in many places in the code so I don't
think it's wise to mess with them

what I meant before was that the cpumask could be different for
different events so even when both events are 'system_wide' the
leader 'fd' might not exist for the groupped events and vice versa

so maybe we should ensure that we are groupping events with same
cpu maps before we go for open, so the get_group_fd stays simple



Thanks for the comments. I'm preparing the patch according to this idea.



But I'm not sure if the change is OK for other case, e.g. PT, which also
uses "evsel->core.system_wide".


plz CC Adrian Hunter  on next patches
if you are touching this



I will not touch "evsel->core.system_wide" in the new patch.

Thanks
Jin Yao


thanks,
jirka



Re: [PATCH 2/2] soundwire: intel: transition to 3 steps initialization

2020-05-20 Thread Vinod Koul
On 21-05-20, 02:23, Liao, Bard wrote:
> > -Original Message-
> > From: Vinod Koul 
> > Sent: Wednesday, May 20, 2020 9:54 PM
> > To: Bard Liao 
> > Cc: alsa-de...@alsa-project.org; linux-kernel@vger.kernel.org; 
> > ti...@suse.de;
> > broo...@kernel.org; gre...@linuxfoundation.org; j...@cadence.com;
> > srinivas.kandaga...@linaro.org; rander.w...@linux.intel.com;
> > ranjani.sridha...@linux.intel.com; hui.w...@canonical.com; pierre-
> > louis.boss...@linux.intel.com; Kale, Sanyog R ;
> > Blauciak, Slawomir ; Lin, Mengdong
> > ; Liao, Bard 
> > Subject: Re: [PATCH 2/2] soundwire: intel: transition to 3 steps 
> > initialization
> > 
> > On 20-05-20, 03:19, Bard Liao wrote:
> > > From: Pierre-Louis Bossart 
> > >
> > > Rather than a plain-vanilla init/exit, this patch provides 3 steps in
> > > the initialization (ACPI scan, probe, startup) which makes it easier to
> > > detect platform support for SoundWire, allocate required resources as
> > > early as possible, and conversely help make the startup() callback
> > > lighter-weight with only hardware register setup.
> > 
> > Okay but can you add details in changelog on what each step would do?
> 
> Sure. Will do.
> 
> > 
> > > @@ -1134,25 +1142,15 @@ static int intel_probe(struct platform_device
> > *pdev)
> > >
> > >   intel_pdi_ch_update(sdw);
> > >
> > > - /* Acquire IRQ */
> > > - ret = request_threaded_irq(sdw->link_res->irq,
> > > -sdw_cdns_irq, sdw_cdns_thread,
> > > -IRQF_SHARED, KBUILD_MODNAME, 
> > >cdns);
> > 
> > This is removed here but not added anywhere else, do we have no irq
> > after this patch?
> 
> We use a single irq for all Intel Audio DSP events and it will
> be requested in the SOF driver.

And how will the irq be propagated to sdw/cdns drivers here?

-- 
~Vinod


Re: [RFC PATCH 2/2] init: Allow multi-line output of kernel command line

2020-05-20 Thread Sergey Senozhatsky
On (20/05/20 18:00), Andrew Morton wrote:
[..]
> I'm wondering if we shold add a kernel puts() (putsk()?  yuk) which can
> puts() a string of any length.
> 
> I'm counting around 150 instances of printk("%s", ...) and pr_foo("%s",
> ...) which could perhaps be converted, thus saving an argument.

Can you point me at some examples?

-ss


Re: [PATCH 2/2] kvm/x86: don't expose MSR_IA32_UMWAIT_CONTROL unconditionally

2020-05-20 Thread Xiaoyao Li

On 5/21/2020 5:05 AM, Paolo Bonzini wrote:

On 20/05/20 18:07, Maxim Levitsky wrote:

This msr is only available when the host supports WAITPKG feature.

This breaks a nested guest, if the L1 hypervisor is set to ignore
unknown msrs, because the only other safety check that the
kernel does is that it attempts to read the msr and
rejects it if it gets an exception.

Fixes: 6e3ba4abce KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL

Signed-off-by: Maxim Levitsky 
---
  arch/x86/kvm/x86.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe3a24fd6b263..9c507b32b1b77 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5314,6 +5314,10 @@ static void kvm_init_msr_list(void)
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
+   break;
+   case MSR_IA32_UMWAIT_CONTROL:
+   if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
+   continue;
default:
break;
}


The patch is correct, and matches what is done for the other entries of
msrs_to_save_all.  However, while looking at it I noticed that
X86_FEATURE_WAITPKG is actually never added, and that is because it was
also not added to the supported CPUID in commit e69e72faa3a0 ("KVM: x86:
Add support for user wait instructions", 2019-09-24), which was before
the kvm_cpu_cap mechanism was added.

So while at it you should also fix that.  The right way to do that is to
add a

 if (vmx_waitpkg_supported())
 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);


+ Tao

I remember there is certainly some reason why we don't expose WAITPKG to 
guest by default.


Tao, please help clarify it.

Thanks,
-Xiaoyao



in vmx_set_cpu_caps.

Thanks,

Paolo





[PATCH] kbuild: doc: remove documentation about copying Module.symvers around

2020-05-20 Thread Masahiro Yamada
This is a left-over of commit 39808e451fdf ("kbuild: do not read
$(KBUILD_EXTMOD)/Module.symvers").

Kbuild no longer supports this way.

Signed-off-by: Masahiro Yamada 
---

 Documentation/kbuild/modules.rst | 12 
 1 file changed, 12 deletions(-)

diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index e0b45a257f21..a45cccff467d 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -528,18 +528,6 @@ build.
will then do the expected and compile both modules with
full knowledge of symbols from either module.
 
-   Use an extra Module.symvers file
-   When an external module is built, a Module.symvers file
-   is generated containing all exported symbols which are
-   not defined in the kernel. To get access to symbols
-   from bar.ko, copy the Module.symvers file from the
-   compilation of bar.ko to the directory where foo.ko is
-   built. During the module build, kbuild will read the
-   Module.symvers file in the directory of the external
-   module, and when the build is finished, a new
-   Module.symvers file is created containing the sum of
-   all symbols defined and not part of the kernel.
-
Use "make" variable KBUILD_EXTRA_SYMBOLS
If it is impractical to add a top-level kbuild file,
you can assign a space separated list
-- 
2.25.1



Re: [RFC PATCH 2/2] init: Allow multi-line output of kernel command line

2020-05-20 Thread Sergey Senozhatsky
On (20/05/20 13:36), Joe Perches wrote:
> > We can split command line in a loop - memchr(pos, ' ') - and
> > pr_cont() parts of the command line. pr_cont() has overflow
> > control and it flushes cont buffer before it overflows, so
> > we should not lose anything.
> 
> It doesn't matter much here, but I believe
> there's an 8k max buffer for pr_cont output.
> 
> include/linux/printk.h:#define CONSOLE_EXT_LOG_MAX  8192

This is for extended payload - the key:value dictionaries
which device core appends to normal printk() messages. We
don't have that many consoles that handle extended output
(netcon and, maybe, a few more).

-ss


[PATCH v2] libata: Use per port sync for detach

2020-05-20 Thread Kai-Heng Feng
Commit 130f4caf145c ("libata: Ensure ata_port probe has completed before
detach") may cause system freeze during suspend.

Using async_synchronize_full() in PM callbacks is wrong, since async
callbacks that are already scheduled may wait for not-yet-scheduled
callbacks, causes a circular dependency.

Instead of using big hammer like async_synchronize_full(), use async
cookie to make sure port probe are synced, without affecting other
scheduled PM callbacks.

Fixes: 130f4caf145c ("libata: Ensure ata_port probe has completed before 
detach")
BugLink: https://bugs.launchpad.net/bugs/1867983
Suggested-by: John Garry 
Signed-off-by: Kai-Heng Feng 
---
v2:
 - Sync up to cookie + 1.
 - Squash the synchronization into the same loop.

 drivers/ata/libata-core.c | 9 -
 include/linux/libata.h| 3 +++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index beca5f91bb4c..b6be84f2cecb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -42,7 +42,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -5778,7 +5777,7 @@ int ata_host_register(struct ata_host *host, struct 
scsi_host_template *sht)
/* perform each probe asynchronously */
for (i = 0; i < host->n_ports; i++) {
struct ata_port *ap = host->ports[i];
-   async_schedule(async_port_probe, ap);
+   ap->cookie = async_schedule(async_port_probe, ap);
}
 
return 0;
@@ -5921,10 +5920,10 @@ void ata_host_detach(struct ata_host *host)
int i;
 
/* Ensure ata_port probe has completed */
-   async_synchronize_full();
-
-   for (i = 0; i < host->n_ports; i++)
+   for (i = 0; i < host->n_ports; i++) {
+   async_synchronize_cookie(host->ports[i]->cookie + 1);
ata_port_detach(host->ports[i]);
+   }
 
/* the host is dead now, dissociate ACPI */
ata_acpi_dissociate(host);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index cffa4714bfa8..ae6dfc107ea8 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Define if arch has non-standard setup.  This is a _PCI_ standard
@@ -872,6 +873,8 @@ struct ata_port {
struct timer_list   fastdrain_timer;
unsigned long   fastdrain_cnt;
 
+   async_cookie_t  cookie;
+
int em_message_type;
void*private_data;
 
-- 
2.17.1



Re: [PATCH RFC] sched: Add a per-thread core scheduling interface(Internet mail)

2020-05-20 Thread 蒋彪



> On May 21, 2020, at 6:26 AM, Joel Fernandes (Google)  
> wrote:
> 
> Add a per-thread core scheduling interface which allows a thread to tag
> itself and enable core scheduling. Based on discussion at OSPM with
> maintainers, we propose a prctl(2) interface accepting values of 0 or 1.
> 1 - enable core scheduling for the task.
> 0 - disable core scheduling for the task.
> 
> Special cases:
> (1)
> The core-scheduling patchset contains a CGroup interface as well. In
> order for us to respect users of that interface, we avoid overriding the
> tag if a task was CGroup-tagged because the task becomes inconsistent
> with the CGroup tag. Instead return -EBUSY.
> 
> (2)
> If a task is prctl-tagged, allow the CGroup interface to override
> the task's tag.
> 
> ChromeOS will use core-scheduling to securely enable hyperthreading.
> This cuts down the keypress latency in Google docs from 150ms to 50ms
> while improving the camera streaming frame rate by ~3%.
Hi,
Are the performance improvements compared to the hyperthreading disabled 
scenario or not?
Could you help to explain how the keypress latency improvement comes with 
core-scheduling?

Thanks a lot.

Regards,
Jiang



Re: [RFC V2] mm/vmstat: Add events for PMD based THP migration without split

2020-05-20 Thread Anshuman Khandual



On 05/20/2020 12:45 PM, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Mon, May 18, 2020 at 12:12:36PM +0530, Anshuman Khandual wrote:
>> This adds the following two new VM events which will help in validating PMD
>> based THP migration without split. Statistics reported through these events
>> will help in performance debugging.
>>
>> 1. THP_PMD_MIGRATION_SUCCESS
>> 2. THP_PMD_MIGRATION_FAILURE
>>
>> Cc: Naoya Horiguchi 
>> Cc: Zi Yan 
>> Cc: John Hubbard 
>> Cc: Andrew Morton 
>> Cc: linux...@kvack.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual 
> 
> Hi Anshuman,

Hi Naoya,

> 
> I'm neutral for additinal lines in /proc/vmstat. It's a classic (so widely
> used) but inflexible interface. Users disabling thp are not happy with many
> thp-related lines, but judging from the fact that we already have many

Right, for similar reason, I am not too keen on enabling these counters
without migration being enabled with ARCH_ENABLE_THP_MIGRATION.

> thp-related lines some users really need them. So I feel hard to decide to
> agree or disagree with additional lines.

Currently these are conditional on ARCH_ENABLE_THP_MIGRATION. So we are
not adding these new lines unless it migration is available and enabled.

> 
> I think that tracepoints are the more flexible interfaces for monitoring,
> so I'm interested more in whether thp migration could be monitorable via
> tracepoint. Do you have any idea/plan on it?

Sure, we can add some trace points as well which can give more granular
details regarding THP migration mechanism itself e.g setting and removing
PMD migration entries etc probably with (vaddr, pmdp, pmd) details.

But we will still need /proc/vmstat entries that will be available right
away without requiring additional steps. This simplicity is essential for
folks to consider using these events more often.

Sure, will look into what trace points can be added for THP migration but
in a subsequent patch.

- Anshuman


Hi

2020-05-20 Thread Jerry Machel



Hi,

I write to inform you of a great business opportunity. My names is 
Jerry Machel Ivoirien Français, there is a business proposal i will like 
to discuss with you or your ORG. If it interests you please let me know 
and let's work it together.

Regards

Jerry Machel.


Re: [PATCH 09/29] kbuild: disallow multi-word in M= or KBUILD_EXTMOD

2020-05-20 Thread Masahiro Yamada
On Sun, May 17, 2020 at 9:33 PM David Laight  wrote:
>
> From: Masahiro Yamada
> > Sent: 17 May 2020 10:49
> > $(firstword ...) in scripts/Makefile.modpost was added by commit
> > 3f3fd3c05585 ("[PATCH] kbuild: allow multi-word $M in Makefile.modpost")
> > to build multiple external module directories.
> >
> > This feature has been broken for a while. Remove the bitrotten code, and
> > stop parsing if M or KBUILD_EXTMOD contains multiple words.
>
> ISTR that one of the kernel documentation files says that it is possible
> to build multiple modules together in order to avoid 'faffing' with
> exported symbol lists.
>
> So the docs need updating to match.


Do you remember which doc mentions it?



> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 
> 1PT, UK
> Registration No: 1397386 (Wales)
>


-- 
Best Regards
Masahiro Yamada


[PATCH] ASoC: dt-bindings: simple-card: care missing address #address-cells

2020-05-20 Thread Kuninori Morimoto
From: Kuninori Morimoto 

Current simple-card will get below error,
because it doesn't care about #address-cells at some part.

DTC 
Documentation/devicetree/bindings/sound/simple-card.example.dt.yaml

Documentation/devicetree/bindings/sound/simple-card.example.dts:171.46-173.15: \
Warning (unit_address_vs_reg): 
/example-4/sound/simple-audio-card,cpu@0: \
node has a unit name, but no reg or ranges property

Documentation/devicetree/bindings/sound/simple-card.example.dts:175.37-177.15: \
Warning (unit_address_vs_reg): 
/example-4/sound/simple-audio-card,cpu@1: \
node has a unit name, but no reg or ranges property
...

This patch fixup this issue.

Signed-off-by: Kuninori Morimoto 
---
 .../bindings/sound/simple-card.yaml   | 25 ++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/sound/simple-card.yaml 
b/Documentation/devicetree/bindings/sound/simple-card.yaml
index cb2bb5fac0e1..6c4c2c6d6d3c 100644
--- a/Documentation/devicetree/bindings/sound/simple-card.yaml
+++ b/Documentation/devicetree/bindings/sound/simple-card.yaml
@@ -208,6 +208,11 @@ patternProperties:
   reg:
 maxItems: 1
 
+  "#address-cells":
+const: 1
+  "#size-cells":
+const: 0
+
   # common properties
   frame-master:
 $ref: "#/definitions/frame-master"
@@ -288,7 +293,6 @@ examples:
 
 #address-cells = <1>;
 #size-cells = <0>;
-
 simple-audio-card,dai-link@0 { /* I2S - HDMI */
 reg = <0>;
 format = "i2s";
@@ -392,11 +396,15 @@ examples:
 simple-audio-card,routing = "ak4642 Playback", "DAI0 Playback",
 "ak4642 Playback", "DAI1 Playback";
 
+#address-cells = <1>;
+#size-cells = <0>;
 dpcmcpu: simple-audio-card,cpu@0 {
+reg = <0>;
 sound-dai = <_sound 0>;
 };
 
 simple-audio-card,cpu@1 {
+reg = <1>;
 sound-dai = <_sound 1>;
 };
 
@@ -427,7 +435,12 @@ examples:
 "pcm3168a Playback", "DAI3 Playback",
 "pcm3168a Playback", "DAI4 Playback";
 
+#address-cells = <1>;
+#size-cells = <0>;
+
 simple-audio-card,dai-link@0 {
+reg = <0>;
+
 format = "left_j";
 bitclock-master = <>;
 frame-master = <>;
@@ -441,22 +454,30 @@ examples:
 };
 
 simple-audio-card,dai-link@1 {
+reg = <1>;
+
 format = "i2s";
 bitclock-master = <>;
 frame-master = <>;
 
 convert-channels = <8>; /* TDM Split */
 
+#address-cells = <1>;
+#size-cells = <0>;
 sndcpu1: cpu@0 {
+reg = <0>;
 sound-dai = <_sound 1>;
 };
 cpu@1 {
+reg = <1>;
 sound-dai = <_sound 2>;
 };
 cpu@2 {
+reg = <2>;
 sound-dai = <_sound 3>;
 };
 cpu@3 {
+reg = <3>;
 sound-dai = <_sound 4>;
 };
 codec {
@@ -468,6 +489,8 @@ examples:
 };
 
 simple-audio-card,dai-link@2 {
+reg = <2>;
+
 format = "i2s";
 bitclock-master = <>;
 frame-master = <>;
-- 
2.17.1



Re: XHCI vs PCM2903B/PCM2904 part 2

2020-05-20 Thread Rik van Riel
On Wed, 2020-05-20 at 16:34 -0400, Alan Stern wrote:
> On Wed, May 20, 2020 at 03:21:44PM -0400, Rik van Riel wrote:
> > 
> > Interesting. That makes me really curious why things are
> > getting stuck, now...
> 
> This could be a bug in xhci-hcd.  Perhaps the controller's endpoint 
> state needs to be updated after one of these errors occurs.  Mathias 
> will know all about that.

I am seeing something potentially interesting in the
giant trace. First the final enqueue/dequeue before
the babble error:

  -0 [005] d.s. 776367.638233: xhci_inc_enq: ISOC
33a6879e: enq 0x001014070420(0x00101407) deq
0x001014070360(0x00101407) segs 2 stream 0 free_trbs 497
bounce 196 cycle 1

The next reference to 0x001014070360 is the babble error,
and some info on the ISOC buffer itself:

  -0 [005] d.h. 776367.639187: xhci_handle_event:
EVENT: TRB 001014070360 status 'Babble Detected' len 196 slot 15 ep
9 type 'Transfer Event' flags e:C
  -0 [005] d.h. 776367.639195: xhci_handle_transfer:
ISOC: Buffer 000e2676f400 length 196 TD size 0 intr 0 type 'Isoch'
flags b:i:I:c:s:I:e:C

Immediately after the babble error, the next request is enqueued,
and the doorbell is rung:

  -0 [005] d.h. 776367.639196: xhci_inc_deq: ISOC 
33a6879e: enq 0x001014070420(0x00101407) deq 
0x001014070370(0x00101407) segs 2 stream 0 free_trbs 498 bounce 196 
cycle 1
  -0 [005] d.h. 776367.639197: xhci_urb_giveback: ep4in-isoc: 
urb 72126553 pipe 135040 slot 15 length 196/196 sgs 0/0 stream 0 flags 
0206
  -0 [005] d.h. 776367.639197: xhci_inc_deq: EVENT 
97f84b16: enq 0x0010170b5000(0x0010170b5000) deq 
0x0010170b5670(0x0010170b5000) segs 1 stream 0 free_trbs 254 bounce 0 
cycle 1
  -0 [005] ..s. 776367.639212: xhci_urb_enqueue: ep4in-isoc: 
urb 72126553 pipe 135040 slot 15 length 0/196 sgs 0/0 stream 0 flags 
0206
  -0 [005] d.s. 776367.639214: xhci_queue_trb: ISOC: Buffer 
000e2676f400 length 196 TD size 0 intr 0 type 'Isoch' flags b:i:I:c:s:I:e:c
  -0 [005] d.s. 776367.639214: xhci_inc_enq: ISOC 
33a6879e: enq 0x001014070430(0x00101407) deq 
0x001014070370(0x00101407) segs 2 stream 0 free_trbs 497 bounce 196 
cycle 1
  -0 [005] d.s. 776367.639215: xhci_ring_ep_doorbell: Ring 
doorbell for Slot 15 ep4in

However, after that point, no more xhci_handle_transfer: ISOC
lines ar seen in the log. The doorbell line above is the last
line in the log for ep4in.

Is this some area where USB3 and USB2 behave differently?

dmesg: 
https://drive.google.com/open?id=1S2Qc8lroqA5-RMukuLBLWFGx10vEjG-i

usb trace, as requested by Mathias: 
https://drive.google.com/open?id=1cbLcOnAtQRW0Chgak6PNC0l4yJv__4uO

-- 
All Rights Reversed.


signature.asc
Description: This is a digitally signed message part


Re: Re: [PATCH] media: staging: tegra-vde: fix runtime pm imbalance on error

2020-05-20 Thread dinghao . liu
Hi, Dan,

I agree the best solution is to fix __pm_runtime_resume(). But there are also 
many cases that assume pm_runtime_get_sync() will change PM usage 
counter on error. According to my static analysis results, the number of these 
"right" cases are larger. Adjusting __pm_runtime_resume() directly will 
introduce 
more new bugs. Therefore I think we should resolve the "bug" cases individually.

I think that Dmitry's patch is more reasonable than mine. 

Dinghao

Dan Carpenter dan.carpen...@oracle.com写道:
> On Wed, May 20, 2020 at 01:15:44PM +0300, Dmitry Osipenko wrote:
> > 20.05.2020 12:51, Dinghao Liu пишет:
> > > pm_runtime_get_sync() increments the runtime PM usage counter even
> > > it returns an error code. Thus a pairing decrement is needed on
> > > the error handling path to keep the counter balanced.
> > > 
> > > Signed-off-by: Dinghao Liu 
> > > ---
> > >  drivers/staging/media/tegra-vde/vde.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/staging/media/tegra-vde/vde.c 
> > > b/drivers/staging/media/tegra-vde/vde.c
> > > index d3e63512a765..dd134a3a15c7 100644
> > > --- a/drivers/staging/media/tegra-vde/vde.c
> > > +++ b/drivers/staging/media/tegra-vde/vde.c
> > > @@ -777,7 +777,7 @@ static int tegra_vde_ioctl_decode_h264(struct 
> > > tegra_vde *vde,
> > >  
> > >   ret = pm_runtime_get_sync(dev);
> > >   if (ret < 0)
> > > - goto unlock;
> > > + goto put_runtime_pm;
> > >  
> > >   /*
> > >* We rely on the VDE registers reset value, otherwise VDE
> > > 
> > 
> > Hello Dinghao,
> > 
> > Thank you for the patch. I sent out a similar patch a week ago [1].
> > 
> > [1]
> > https://patchwork.ozlabs.org/project/linux-tegra/patch/20200514210847.9269-2-dig...@gmail.com/
> > 
> > The pm_runtime_put_noidle() should have the same effect as yours
> > variant, although my variant won't change the last_busy RPM time, which
> > I think is a bit more appropriate behavior.
> 
> I don't think either patch is correct.  The right thing to do is to fix
> __pm_runtime_resume() so it doesn't leak a reference count on error.
> 
> The problem is that a lot of functions don't check the return so
> possibly we are relying on that behavior.  We may need to introduce a
> new function which cleans up properly instead of leaking reference
> counts?
> 
> Also it's not documented that pm_runtime_get_sync() returns 1 sometimes
> on success so it leads to a few bugs.
> 
> drivers/gpu/drm/stm/ltdc.c: ret = pm_runtime_get_sync(ddev->dev);
> drivers/gpu/drm/stm/ltdc.c- if (ret) {
> --
> drivers/gpu/drm/stm/ltdc.c: ret = pm_runtime_get_sync(ddev->dev);
> drivers/gpu/drm/stm/ltdc.c- if (ret) {
> 
> drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:  ret = 
> pm_runtime_get_sync(pm->dev);
> drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c-  if (ret)
> 
> drivers/media/platform/ti-vpe/cal.c:ret = pm_runtime_get_sync(>dev);
> drivers/media/platform/ti-vpe/cal.c-if (ret)
> 
> drivers/mfd/arizona-core.c: ret = 
> pm_runtime_get_sync(arizona->dev);
> drivers/mfd/arizona-core.c- if (ret != 0)
> 
> drivers/remoteproc/qcom_q6v5_adsp.c:ret = pm_runtime_get_sync(adsp->dev);
> drivers/remoteproc/qcom_q6v5_adsp.c-if (ret)
> 
> drivers/spi/spi-img-spfi.c: ret = pm_runtime_get_sync(dev);
> drivers/spi/spi-img-spfi.c- if (ret)
> 
> drivers/usb/dwc3/dwc3-pci.c:ret = pm_runtime_get_sync(>dev);
> drivers/usb/dwc3/dwc3-pci.c-if (ret)
> 
> drivers/watchdog/rti_wdt.c: ret = pm_runtime_get_sync(dev);
> drivers/watchdog/rti_wdt.c- if (ret) {
> 
> regards,
> dan carpenter
> 
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 99c7da112c95..e280991a977d 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -1082,6 +1082,9 @@ int __pm_runtime_resume(struct device *dev, int 
> rpmflags)
>   retval = rpm_resume(dev, rpmflags);
>   spin_unlock_irqrestore(>power.lock, flags);
>  
> + if (retval < 0 && rpmflags & RPM_GET_PUT)
> + atomic_dec(>power.usage_count);
> +
>   return retval;
>  }
>  EXPORT_SYMBOL_GPL(__pm_runtime_resume);


[PATCH] init/do_mounts: fix a coding style error

2020-05-20 Thread zhouchuangao
Fix code style errors reported by scripts/checkpatch.pl.

Signed-off-by: zhouchuangao 
---
 init/do_mounts.c | 52 ++--
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 29d326b..2f8bd41 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -249,7 +249,7 @@ dev_t name_to_dev_t(const char *name)
 #endif
 
if (strncmp(name, "/dev/", 5) != 0) {
-   unsigned maj, min, offset;
+   unsigned int maj, min, offset;
char dummy;
 
if ((sscanf(name, "%u:%u%c", , , ) == 2) ||
@@ -412,8 +412,7 @@ static int __init do_mount_root(const char *name, const 
char *fs,
ksys_chdir("/root");
s = current->fs->pwd.dentry->d_sb;
ROOT_DEV = s->s_dev;
-   printk(KERN_INFO
-  "VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
+   pr_info("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
   s->s_type->name,
   sb_rdonly(s) ? " readonly" : "",
   MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
@@ -437,25 +436,26 @@ void __init mount_block_root(char *name, int flags)
 retry:
for (p = fs_names; *p; p += strlen(p)+1) {
int err = do_mount_root(name, p, flags, root_mount_data);
+
switch (err) {
-   case 0:
-   goto out;
-   case -EACCES:
-   case -EINVAL:
-   continue;
+   case 0:
+   goto out;
+   case -EACCES:
+   case -EINVAL:
+   continue;
}
-   /*
+   /*
 * Allow the user to distinguish between failed sys_open
 * and bad superblock on root device.
 * and give them a list of the available devices
 */
-   printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
+   pr_info("VFS: Cannot open root device \"%s\" or %s: error %d\n",
root_device_name, b, err);
-   printk("Please append a correct \"root=\" boot option; here are 
the available partitions:\n");
+   pr_info("Please append a correct \"root=\" boot option; here 
are the available partitions:\n");
 
printk_all_partitions();
 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
-   printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
+   pr_info("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
   "explicit textual name for \"root=\" boot option.\n");
 #endif
panic("VFS: Unable to mount root fs on %s", b);
@@ -465,17 +465,17 @@ void __init mount_block_root(char *name, int flags)
goto retry;
}
 
-   printk("List of all partitions:\n");
+   pr_info("List of all partitions:\n");
printk_all_partitions();
-   printk("No filesystem could mount root, tried: ");
+   pr_info("No filesystem could mount root, tried: ");
for (p = fs_names; *p; p += strlen(p)+1)
-   printk(" %s", p);
-   printk("\n");
+   pr_info(" %s", p);
+   pr_info("\n");
panic("VFS: Unable to mount root fs on %s", b);
 out:
put_page(page);
 }
- 
+
 #ifdef CONFIG_ROOT_NFS
 
 #define NFSROOT_TIMEOUT_MIN5
@@ -560,6 +560,7 @@ void __init change_floppy(char *fmt, ...)
char c;
int fd;
va_list args;
+
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
@@ -568,7 +569,7 @@ void __init change_floppy(char *fmt, ...)
ksys_ioctl(fd, FDEJECT, 0);
ksys_close(fd);
}
-   printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf);
+   pr_notice("VFS: Insert %s and press ENTER\n", buf);
fd = ksys_open("/dev/console", O_RDWR, 0);
if (fd >= 0) {
ksys_ioctl(fd, TCGETS, (long));
@@ -585,27 +586,27 @@ void __init change_floppy(char *fmt, ...)
 void __init mount_root(void)
 {
 #ifdef CONFIG_ROOT_NFS
-   if (ROOT_DEV == Root_NFS) {
+   if (Root_NFS == ROOT_DEV) {
if (mount_nfs_root())
return;
 
-   printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying 
floppy.\n");
+   pr_err("VFS: Unable to mount root fs via NFS, trying 
floppy.\n");
ROOT_DEV = Root_FD0;
}
 #endif
 #ifdef CONFIG_CIFS_ROOT
-   if (ROOT_DEV == Root_CIFS) {
+   if (Root_CIFS == ROOT_DEV) {
if (mount_cifs_root())
return;
 
-   printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying 
floppy.\n");
+   pr_err("VFS: Unable to mount root fs via SMB, trying 
floppy.\n");
ROOT_DEV = Root_FD0;
}
 #endif
 #ifdef 

Re: [PATCH] arm64: dts: qcom: sc7180: Move mss node to the right place

2020-05-20 Thread Sibi Sankar

On 2020-05-21 06:33, Stephen Boyd wrote:
The modem node has an address of 408 and thus should come after 
tlmm

and before gpu. Move the node to the right place to maintainer proper
address sort order.

Cc: Evan Green 
Cc: Sibi Sankar 
Fixes: e14a15eba89a ("arm64: dts: qcom: sc7180: Add Q6V5 MSS node")
Signed-off-by: Stephen Boyd 


Reviewed-by: Sibi Sankar 


---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +--
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 6b12c60c37fb..1027ef70f8db 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1459,6 +1459,57 @@ pinconf-sd-cd {
};
};

+   remoteproc_mpss: remoteproc@408 {
+   compatible = "qcom,sc7180-mpss-pas";
+   reg = <0 0x0408 0 0x4040>, <0 0x0418 0 0x48>;
+   reg-names = "qdsp6", "rmb";
+
+   interrupts-extended = < GIC_SPI 266 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 0 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 1 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 2 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 3 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 7 
IRQ_TYPE_EDGE_RISING>;
+   interrupt-names = "wdog", "fatal", "ready", "handover",
+ "stop-ack", "shutdown-ack";
+
+   clocks = < GCC_MSS_CFG_AHB_CLK>,
+< GCC_MSS_Q6_MEMNOC_AXI_CLK>,
+< GCC_MSS_NAV_AXI_CLK>,
+< GCC_MSS_SNOC_AXI_CLK>,
+< GCC_MSS_MFAB_AXIS_CLK>,
+< RPMH_CXO_CLK>;
+   clock-names = "iface", "bus", "nav", "snoc_axi",
+ "mnoc_axi", "xo";
+
+   power-domains = <_qmp AOSS_QMP_LS_MODEM>,
+   < SC7180_CX>,
+   < SC7180_MX>,
+   < SC7180_MSS>;
+   power-domain-names = "load_state", "cx", "mx", "mss";
+
+   memory-region = <_mem>;
+
+   qcom,smem-states = <_smp2p_out 0>;
+   qcom,smem-state-names = "stop";
+
+   resets = <_reset AOSS_CC_MSS_RESTART>,
+<_reset PDC_MODEM_SYNC_RESET>;
+   reset-names = "mss_restart", "pdc_reset";
+
+   qcom,halt-regs = <_mutex_regs 0x23000 0x25000 
0x24000>;
+   qcom,spare-regs = <_regs 0xb3e4>;
+
+   status = "disabled";
+
+   glink-edge {
+   interrupts = ;
+   label = "modem";
+   qcom,remote-pid = <1>;
+   mboxes = <_shared 12>;
+   };
+   };
+
gpu: gpu@500 {
compatible = "qcom,adreno-618.0", "qcom,adreno";
#stream-id-cells = <16>;
@@ -2054,57 +2105,6 @@ apss_merge_funnel_in: endpoint {
};
};

-   remoteproc_mpss: remoteproc@408 {
-   compatible = "qcom,sc7180-mpss-pas";
-   reg = <0 0x0408 0 0x4040>, <0 0x0418 0 0x48>;
-   reg-names = "qdsp6", "rmb";
-
-   interrupts-extended = < GIC_SPI 266 
IRQ_TYPE_EDGE_RISING>,
- <_smp2p_in 0 
IRQ_TYPE_EDGE_RISING>,
- <_smp2p_in 1 
IRQ_TYPE_EDGE_RISING>,
- <_smp2p_in 2 
IRQ_TYPE_EDGE_RISING>,
- <_smp2p_in 3 
IRQ_TYPE_EDGE_RISING>,
- <_smp2p_in 7 
IRQ_TYPE_EDGE_RISING>;
-   interrupt-names = "wdog", "fatal", "ready", "handover",
- "stop-ack", "shutdown-ack";
-
-   clocks = < GCC_MSS_CFG_AHB_CLK>,
-< GCC_MSS_Q6_MEMNOC_AXI_CLK>,
-< GCC_MSS_NAV_AXI_CLK>,
-< GCC_MSS_SNOC_AXI_CLK>,
-< GCC_MSS_MFAB_AXIS_CLK>,
-< RPMH_CXO_CLK>;
-   clock-names = "iface", "bus", "nav", "snoc_axi",
- "mnoc_axi", "xo";
-
-   power-domains = <_qmp 

[PATCH v5 3/4] mm/memory.c: Add memory read privilege on page fault handling

2020-05-20 Thread Bibo Mao
Here add pte_sw_mkyoung function to make page readable on MIPS
platform during page fault handling. This patch improves page
fault latency about 10% on my MIPS machine with lmbench
lat_pagefault case.

It is noop function on other arches, there is no negative
influence on those architectures.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h |  2 ++
 include/asm-generic/pgtable.h   | 16 
 mm/memory.c |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 5f610ec..9cd811e 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
+#define pte_sw_mkyoung pte_mkyoung
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8..7dcfa30 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -227,6 +227,22 @@ static inline void ptep_set_wrprotect(struct mm_struct 
*mm, unsigned long addres
 }
 #endif
 
+/*
+ * On some architectures hardware does not set page access bit when accessing
+ * memory page, it is responsibilty of software setting this bit. It brings
+ * out extra page fault penalty to track page access bit. For optimization page
+ * access bit can be set during all page fault flow on these arches.
+ * To be differentiate with macro pte_mkyoung, this macro is used on platforms
+ * where software maintains page access bit.
+ */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+   return pte;
+}
+#define pte_sw_mkyoung pte_sw_mkyoung
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 9e2be4a..33d3b4c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
 * Clear the pte entry and flush it first, before updating the
@@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
 
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct 
mem_cgroup *memcg,
 
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
-- 
1.8.3.1



[PATCH v5 1/4] MIPS: Do not flush tlb page when updating PTE entry

2020-05-20 Thread Bibo Mao
It is not necessary to flush tlb page on all CPUs if suitable PTE
entry exists already during page fault handling, just updating
TLB is fine.

Here redefine flush_tlb_fix_spurious_fault as empty on MIPS system.
V5:
- Define update_mmu_cache function specified on MIPS platform, and
  add page fault smp-race stats info
V4:
- add pte_sw_mkyoung function to implement readable privilege, and
  this function is  only in effect on MIPS system.
- add page valid bit judgement in function pte_modify
V3:
- add detailed changelog, modify typo issue in patch V2
v2:
- split flush_tlb_fix_spurious_fault and tlb update into two patches
- comments typo modification
- separate tlb update and add pte readable privilege into two patches

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9b01d2d..0d625c2 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -478,6 +478,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
-- 
1.8.3.1



Re: [PATCH v3 4/4] PCI: cadence: Use "dma-ranges" instead of "cdns,no-bar-match-nbits" property

2020-05-20 Thread Kishon Vijay Abraham I
Hi Rob,

On 5/19/2020 10:41 PM, Rob Herring wrote:
> On Fri, May 8, 2020 at 7:07 AM Kishon Vijay Abraham I  wrote:
>>
>> Cadence PCIe core driver (host mode) uses "cdns,no-bar-match-nbits"
>> property to configure the number of bits passed through from PCIe
>> address to internal address in Inbound Address Translation register.
>> This only used the NO MATCH BAR.
>>
>> However standard PCI dt-binding already defines "dma-ranges" to
>> describe the address ranges accessible by PCIe controller. Add support
>> in Cadence PCIe host driver to parse dma-ranges and configure the
>> inbound regions for BAR0, BAR1 and NO MATCH BAR. Cadence IP specifies
>> maximum size for BAR0 as 256GB, maximum size for BAR1 as 2 GB, so if
>> the dma-ranges specifies a size larger than the maximum allowed, the
>> driver will split and configure the BARs.
> 
> Would be useful to know what your dma-ranges contains now.
> 
> 
>> Legacy device tree binding compatibility is maintained by retaining
>> support for "cdns,no-bar-match-nbits".
>>
>> Signed-off-by: Kishon Vijay Abraham I 
>> ---
>>  .../controller/cadence/pcie-cadence-host.c| 141 --
>>  drivers/pci/controller/cadence/pcie-cadence.h |  17 ++-
>>  2 files changed, 141 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c 
>> b/drivers/pci/controller/cadence/pcie-cadence-host.c
>> index 6ecebb79057a..2485ecd8434d 100644
>> --- a/drivers/pci/controller/cadence/pcie-cadence-host.c
>> +++ b/drivers/pci/controller/cadence/pcie-cadence-host.c
>> @@ -11,6 +11,12 @@
>>
>>  #include "pcie-cadence.h"
>>
>> +static u64 cdns_rp_bar_max_size[] = {
>> +   [RP_BAR0] = _ULL(128 * SZ_2G),
>> +   [RP_BAR1] = SZ_2G,
>> +   [RP_NO_BAR] = SZ_64T,
>> +};
>> +
>>  void __iomem *cdns_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
>>int where)
>>  {
>> @@ -106,6 +112,117 @@ static int cdns_pcie_host_init_root_port(struct 
>> cdns_pcie_rc *rc)
>> return 0;
>>  }
>>
>> +static void cdns_pcie_host_bar_ib_config(struct cdns_pcie_rc *rc,
>> +enum cdns_pcie_rp_bar bar,
>> +u64 cpu_addr, u32 aperture)
>> +{
>> +   struct cdns_pcie *pcie = >pcie;
>> +   u32 addr0, addr1;
>> +
>> +   addr0 = CDNS_PCIE_AT_IB_RP_BAR_ADDR0_NBITS(aperture) |
>> +   (lower_32_bits(cpu_addr) & GENMASK(31, 8));
>> +   addr1 = upper_32_bits(cpu_addr);
>> +   cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_RP_BAR_ADDR0(bar), addr0);
>> +   cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_RP_BAR_ADDR1(bar), addr1);
>> +}
>> +
>> +static int cdns_pcie_host_bar_config(struct cdns_pcie_rc *rc,
>> +struct resource_entry *entry,
>> +enum cdns_pcie_rp_bar *index)
>> +{
>> +   u64 cpu_addr, pci_addr, size, winsize;
>> +   struct cdns_pcie *pcie = >pcie;
>> +   struct device *dev = pcie->dev;
>> +   enum cdns_pcie_rp_bar bar;
>> +   unsigned long flags;
>> +   u32 aperture;
>> +   u32 value;
>> +
>> +   cpu_addr = entry->res->start;
>> +   flags = entry->res->flags;
>> +   pci_addr = entry->res->start - entry->offset;
>> +   size = resource_size(entry->res);
>> +   bar = *index;
>> +
>> +   if (entry->offset) {
>> +   dev_err(dev, "Cannot map PCI addr: %llx to CPU addr: %llx\n",
>> +   pci_addr, cpu_addr);
> 
> Would be a bit more clear to say PCI addr must equal CPU addr.
> 
>> +   return -EINVAL;
>> +   }
>> +
>> +   value = cdns_pcie_readl(pcie, CDNS_PCIE_LM_RC_BAR_CFG);
>> +   while (size > 0) {
>> +   if (bar > RP_NO_BAR) {
>> +   dev_err(dev, "Failed to map inbound regions!\n");
>> +   return -EINVAL;
>> +   }
>> +
>> +   winsize = size;
>> +   if (size > cdns_rp_bar_max_size[bar])
>> +   winsize = cdns_rp_bar_max_size[bar];
>> +
>> +   aperture = ilog2(winsize);
>> +
>> +   cdns_pcie_host_bar_ib_config(rc, bar, cpu_addr, aperture);
>> +
>> +   if (bar == RP_NO_BAR)
>> +   break;
>> +
>> +   if (winsize + cpu_addr >= SZ_4G) {
>> +   if (!(flags & IORESOURCE_PREFETCH))
>> +   value |= LM_RC_BAR_CFG_CTRL_MEM_64BITS(bar);
>> +   value |= LM_RC_BAR_CFG_CTRL_PREF_MEM_64BITS(bar);
>> +   } else {
>> +   if (!(flags & IORESOURCE_PREFETCH))
>> +   value |= LM_RC_BAR_CFG_CTRL_MEM_32BITS(bar);
>> +   value |= LM_RC_BAR_CFG_CTRL_PREF_MEM_32BITS(bar);
>> +   }
>> +
>> +   value |= LM_RC_BAR_CFG_APERTURE(bar, aperture);
>> +
>> +   size -= winsize;
>> +   cpu_addr += winsize;
>> + 

Re: [tip: locking/kcsan] READ_ONCE: Use data_race() to avoid KCSAN instrumentation

2020-05-20 Thread Nathan Chancellor
On Thu, May 21, 2020 at 12:17:12AM +0200, Borislav Petkov wrote:
> Hi,
> 
> On Tue, May 12, 2020 at 02:36:53PM -, tip-bot2 for Will Deacon wrote:
> > The following commit has been merged into the locking/kcsan branch of tip:
> > 
> > Commit-ID: cdd28ad2d8110099e43527e96d059c5639809680
> > Gitweb:
> > https://git.kernel.org/tip/cdd28ad2d8110099e43527e96d059c5639809680
> > Author:Will Deacon 
> > AuthorDate:Mon, 11 May 2020 21:41:49 +01:00
> > Committer: Thomas Gleixner 
> > CommitterDate: Tue, 12 May 2020 11:04:17 +02:00
> > 
> > READ_ONCE: Use data_race() to avoid KCSAN instrumentation
> > 
> > Rather then open-code the disabling/enabling of KCSAN across the guts of
> > {READ,WRITE}_ONCE(), defer to the data_race() macro instead.
> > 
> > Signed-off-by: Will Deacon 
> > Signed-off-by: Thomas Gleixner 
> > Acked-by: Peter Zijlstra (Intel) 
> > Cc: Marco Elver 
> > Link: https://lkml.kernel.org/r/20200511204150.27858-18-w...@kernel.org
> 
> so this commit causes a kernel build slowdown depending on the .config
> of between 50% and over 100%. I just bisected locking/kcsan and got
> 
> NOT_OK:   cdd28ad2d811 READ_ONCE: Use data_race() to avoid KCSAN 
> instrumentation
> OK:   88f1be32068d kcsan: Rework data_race() so that it can be used by 
> READ_ONCE()
> 
> with a simple:
> 
> $ git clean -dqfx && mk defconfig
> $ time make -j
> 
> I'm not even booting the kernels - simply checking out the above commits
> and building the target kernels. I.e., something in that commit is
> making gcc go nuts in the compilation phases.
> 
> -- 
> Regards/Gruss,
> Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

For what it's worth, I also noticed the same thing with clang. I only
verified the issue in one of my first build targets, an arm defconfig
build, which regressed from 2.5 minutes to 10+ minutes.

More details available on our issue tracker (Nick did some more
profiling on other configs with both clang and gcc):

https://github.com/ClangBuiltLinux/linux/issues/1032

More than happy to do further triage as time permits. I do note Marco's
message about the upcoming series to eliminate this but it would be nice
if this did not regress in the meantime.

Cheers,
Nathan


[PATCH v5 2/4] mm/memory.c: Update local TLB if PTE entry exists

2020-05-20 Thread Bibo Mao
If two threads concurrently fault at the same address, the thread that
won the race updates the PTE and its local TLB. For now, the other
thread gives up, simply does nothing, and continues.

It could happen that this second thread triggers another fault, whereby
it only updates its local TLB while handling the fault. Instead of
triggering another fault, let's directly update the local TLB of the
second thread.

It is only useful to architectures where software can update TLB, it may
bring out some negative effect if update_mmu_cache is used for other
purpose also. It seldom happens where multiple threads access the same
page at the same time, so the negative effect is limited on other arches.

With specjvm2008 workload, smp-race pgfault counts is about 3% to 4%
of the total pgfault counts by watching /proc/vmstats information

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 20 
 mm/memory.c | 27 +++
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 0d625c2..5f610ec 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -480,6 +480,26 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
 
 #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
 
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+   return pte_val(pte_a) == pte_val(pte_b);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep,
+   pte_t entry, int dirty)
+{
+   if (!pte_same(*ptep, entry))
+   set_pte_at(vma->vm_mm, address, ptep, entry);
+   /*
+* update_mmu_cache will unconditionally execute, handling both
+* the case that the PTE changed and the spurious fault case.
+*/
+   return true;
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/mm/memory.c b/mm/memory.c
index f703fe8..9e2be4a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2436,10 +2436,9 @@ static inline bool cow_user_page(struct page *dst, 
struct page *src,
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
 * Other thread has already handled the fault
-* and we don't need to do anything. If it's
-* not the case, the fault will be triggered
-* again on the same address.
+* and update local tlb only
 */
+   update_mmu_cache(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2463,7 +2462,8 @@ static inline bool cow_user_page(struct page *dst, struct 
page *src,
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, >ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
-   /* The PTE changed under us. Retry page fault. */
+   /* The PTE changed under us, update local tlb */
+   update_mmu_cache(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2752,6 +2752,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
new_page = old_page;
page_copied = 1;
} else {
+   update_mmu_cache(vma, vmf->address, vmf->pte);
mem_cgroup_cancel_charge(new_page, memcg, false);
}
 
@@ -2812,6 +2813,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 * pte_offset_map_lock.
 */
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+   update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
return VM_FAULT_NOPAGE;
}
@@ -2936,6 +2938,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, >ptl);
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+   update_mmu_cache(vma, vmf->address, vmf->pte);
unlock_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
put_page(vmf->page);
@@ -3341,8 +3344,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
vma->vm_page_prot));
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,

[PATCH v5 4/4] MIPS: mm: add page valid judgement in function pte_modify

2020-05-20 Thread Bibo Mao
If original PTE has _PAGE_ACCESSED bit set, and new pte has no
_PAGE_NO_READ bit set, we can add _PAGE_SILENT_READ bit to enable
page valid bit.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9cd811e..ef26552 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -529,8 +529,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-   return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
-(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+   pte_val(pte) &= _PAGE_CHG_MASK;
+   pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK;
+   if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ))
+   pte_val(pte) |= _PAGE_SILENT_READ;
+   return pte;
 }
 #endif
 
-- 
1.8.3.1



Re: [PATCH v14 03/11] soc: mediatek: Add basic_clk_name to scp_power_data

2020-05-20 Thread Weiyi Lu
On Mon, 2020-05-18 at 19:52 +0200, Enric Balletbo i Serra wrote:
> Hi Weiyi,
> 
> On 15/5/20 5:35, Weiyi Lu wrote:
> > On Mon, 2020-05-11 at 14:02 +0800, Weiyi Lu wrote:
> >> On Wed, 2020-05-06 at 23:01 +0200, Enric Balletbo i Serra wrote:
> >>> Hi Weiyi,
> >>>
> >>> Thank you for your patch.
> >>>
> >>> On 6/5/20 10:15, Weiyi Lu wrote:
>  Try to stop extending the clk_id or clk_names if there are
>  more and more new BASIC clocks. To get its own clocks by the
>  basic_clk_name of each power domain.
>  And then use basic_clk_name strings for all compatibles, instead of
>  mixing clk_id and clk_name.
> 
>  Signed-off-by: Weiyi Lu 
>  Reviewed-by: Nicolas Boichat 
>  ---
>   drivers/soc/mediatek/mtk-scpsys.c | 134 
>  --
>   1 file changed, 41 insertions(+), 93 deletions(-)
> 
>  diff --git a/drivers/soc/mediatek/mtk-scpsys.c 
>  b/drivers/soc/mediatek/mtk-scpsys.c
>  index f669d37..c9c3cf7 100644
>  --- a/drivers/soc/mediatek/mtk-scpsys.c
>  +++ b/drivers/soc/mediatek/mtk-scpsys.c
>  @@ -78,34 +78,6 @@
>   #define PWR_STATUS_HIF1 BIT(26) /* MT7622 */
>   #define PWR_STATUS_WB   BIT(27) /* MT7622 */
>   
>  -enum clk_id {
>  -CLK_NONE,
>  -CLK_MM,
>  -CLK_MFG,
>  -CLK_VENC,
>  -CLK_VENC_LT,
>  -CLK_ETHIF,
>  -CLK_VDEC,
>  -CLK_HIFSEL,
>  -CLK_JPGDEC,
>  -CLK_AUDIO,
>  -CLK_MAX,
>  -};
>  -
>  -static const char * const clk_names[] = {
>  -NULL,
>  -"mm",
>  -"mfg",
>  -"venc",
>  -"venc_lt",
>  -"ethif",
>  -"vdec",
>  -"hif_sel",
>  -"jpgdec",
>  -"audio",
>  -NULL,
>  -};
>  -
>   #define MAX_CLKS3
>   
>   /**
>  @@ -116,7 +88,7 @@ enum clk_id {
>    * @sram_pdn_bits: The mask for sram power control bits.
>    * @sram_pdn_ack_bits: The mask for sram power control acked bits.
>    * @bus_prot_mask: The mask for single step bus protection.
>  - * @clk_id: The basic clocks required by this power domain.
>  + * @basic_clk_name: The basic clocks required by this power domain.
>    * @caps: The flag for active wake-up action.
>    */
>   struct scp_domain_data {
>  @@ -126,7 +98,7 @@ struct scp_domain_data {
>   u32 sram_pdn_bits;
>   u32 sram_pdn_ack_bits;
>   u32 bus_prot_mask;
>  -enum clk_id clk_id[MAX_CLKS];
>  +const char *basic_clk_name[MAX_CLKS];
> >>>
> >>> I only reviewed v13, so sorry if this was already discussed. I am 
> >>> wondering if
> >>> would be better take advantage of the devm_clk_bulk_get() function 
> >>> instead of
> >>> kind of reimplementing the same, something like this
> >>>
> >>>   const struct clk_bulk_data *basic_clocks;
> >>>
> >>
> >> I thought it should be const struct clk_bulk_data
> >> basic_clocks[MAX_CLKS]; instead of const struct clk_bulk_data
> >> *basic_clocks; in struct scp_domain_data data type
> >>
>   u8 caps;
>   };
>   
>  @@ -411,12 +383,19 @@ static int scpsys_power_off(struct 
>  generic_pm_domain *genpd)
>   return ret;
>   }
>   
>  -static void init_clks(struct platform_device *pdev, struct clk **clk)
>  +static int init_basic_clks(struct platform_device *pdev, struct clk 
>  **clk,
>  +const char * const *name)
>   {
>   int i;
>   
>  -for (i = CLK_NONE + 1; i < CLK_MAX; i++)
>  -clk[i] = devm_clk_get(>dev, clk_names[i]);
>  +for (i = 0; i < MAX_CLKS && name[i]; i++) {
>  +clk[i] = devm_clk_get(>dev, name[i]);
>  +
>  +if (IS_ERR(clk[i]))
>  +return PTR_ERR(clk[i]);
>  +}
> >>>
> >>> You will be able to remove this function, see below ...
> >>>
>  +
>  +return 0;
>   }
>   
>   static struct scp *init_scp(struct platform_device *pdev,
>  @@ -426,9 +405,8 @@ static struct scp *init_scp(struct platform_device 
>  *pdev,
>   {
>   struct genpd_onecell_data *pd_data;
>   struct resource *res;
>  -int i, j;
>  +int i, ret;
>   struct scp *scp;
>  -struct clk *clk[CLK_MAX];
>   
>   scp = devm_kzalloc(>dev, sizeof(*scp), GFP_KERNEL);
>   if (!scp)
>  @@ -481,8 +459,6 @@ static struct scp *init_scp(struct platform_device 
>  *pdev,
>   
>   pd_data->num_domains = num;
>   
>  -init_clks(pdev, clk);
>  -
>   for (i = 0; i < num; i++) {
>   struct scp_domain *scpd = 

[PATCH v1 2/2] arm64: dts: imx8mn-ddr4-evk: correct ldo1/ldo2 voltage range

2020-05-20 Thread Robin Gong
Correct ldo1 voltage range from wrong high group(3.0v~3.3v) to low group
(1.6v~1.9v) because the ldo1 should be 1.8v. Actually, two voltage groups
have been supported at bd718x7-regulator driver, hence, just corrrect the
voltage range to 1.6v~3.3v. For ldo2@0.8v, correct voltage range too.
Otherwise, ldo1 would be kept @3.0v and ldo2@0.9v which violate i.mx8mm
datasheet as the below warning log in kernel:

[0.995524] LDO1: Bringing 180uV into 300-300uV
[0.999196] LDO2: Bringing 80uV into 90-90uV

Signed-off-by: Robin Gong 
---
 arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 4 ++--
 arch/arm64/boot/dts/freescale/imx8mn-evk.dts  | 9 +
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts 
b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
index d07e0e6..a1e5483 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
@@ -113,7 +113,7 @@
 
ldo1_reg: LDO1 {
regulator-name = "LDO1";
-   regulator-min-microvolt = <300>;
+   regulator-min-microvolt = <160>;
regulator-max-microvolt = <330>;
regulator-boot-on;
regulator-always-on;
@@ -121,7 +121,7 @@
 
ldo2_reg: LDO2 {
regulator-name = "LDO2";
-   regulator-min-microvolt = <90>;
+   regulator-min-microvolt = <80>;
regulator-max-microvolt = <90>;
regulator-boot-on;
regulator-always-on;
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts 
b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
index 61f3519..117ff4b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
@@ -13,6 +13,15 @@
compatible = "fsl,imx8mn-evk", "fsl,imx8mn";
 };
 
+ {
+   status = "okay";
+spidev0: spi@0 {
+   compatible = "ge,achc";
+   reg = <0>;
+   spi-max-frequency = <100>;
+   };
+};
+
 _0 {
/delete-property/operating-points-v2;
 };
-- 
2.7.4



[PATCH v1 1/2] arm64: dts: imx8mm-evk: correct ldo1/ldo2 voltage range

2020-05-20 Thread Robin Gong
Correct ldo1 voltage range from wrong high group(3.0v~3.3v) to low group
(1.6v~1.9v) because the ldo1 should be 1.8v. Actually, two voltage groups
have been supported at bd718x7-regulator driver, hence, just corrrect the
voltage range to 1.6v~3.3v. For ldo2@0.8v, correct voltage range too.
Otherwise, ldo1 would be kept @3.0v and ldo2@0.9v which violate i.mx8mm
datasheet as the below warning log in kernel:

[0.995524] LDO1: Bringing 180uV into 300-300uV
[0.999196] LDO2: Bringing 80uV into 90-90uV

Signed-off-by: Robin Gong 
---
 arch/arm64/boot/dts/freescale/imx8mm-evk.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts 
b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
index e5ec832..0f1d7f8 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts
@@ -208,7 +208,7 @@
 
ldo1_reg: LDO1 {
regulator-name = "LDO1";
-   regulator-min-microvolt = <300>;
+   regulator-min-microvolt = <160>;
regulator-max-microvolt = <330>;
regulator-boot-on;
regulator-always-on;
@@ -216,7 +216,7 @@
 
ldo2_reg: LDO2 {
regulator-name = "LDO2";
-   regulator-min-microvolt = <90>;
+   regulator-min-microvolt = <80>;
regulator-max-microvolt = <90>;
regulator-boot-on;
regulator-always-on;
-- 
2.7.4



Re: [PATCH] soc: fsl: qe: Replace one-element array and use struct_size() helper

2020-05-20 Thread Kees Cook
On Wed, May 20, 2020 at 06:52:21PM -0500, Li Yang wrote:
> On Mon, May 18, 2020 at 5:57 PM Kees Cook  wrote:
> > Hm, looking at this code, I see a few other things that need to be
> > fixed:
> >
> > 1) drivers/tty/serial/ucc_uart.c does not do a be32_to_cpu() conversion
> >on the length test (understandably, a little-endian system has never run
> >this code since it's ppc specific), but it's still wrong:
> >
> > if (firmware->header.length != fw->size) {
> >
> >compare to the firmware loader:
> >
> > length = be32_to_cpu(hdr->length);
> >
> > 2) drivers/soc/fsl/qe/qe.c does not perform bounds checking on the
> >per-microcode offsets, so the uploader might send data outside the
> >firmware buffer. Perhaps:
> 
> We do validate the CRC for each microcode, it is unlikely the CRC
> check can pass if the offset or length is not correct.  But you are
> probably right that it will be safer to check the boundary and fail

Right, but a malicious firmware file could still match CRC but trick the
kernel code.

> quicker before we actually start the CRC check.  Will you come up with
> a formal patch or you want us to deal with it?

It sounds like Gustavo will be sending one, though I don't think either
of us have the hardware to test it with, so if you could do that part,
that would be great! :)

-- 
Kees Cook


Re: [PATCH -V2] swap: Reduce lock contention on swap cache from swap slots allocation

2020-05-20 Thread Huang, Ying
Andrew Morton  writes:

> On Wed, 20 May 2020 11:15:02 +0800 Huang Ying  wrote:
>
>> In some swap scalability test, it is found that there are heavy lock
>> contention on swap cache even if we have split one swap cache radix
>> tree per swap device to one swap cache radix tree every 64 MB trunk in
>> commit 4b3ef9daa4fc ("mm/swap: split swap cache into 64MB trunks").
>> 
>> The reason is as follow.  After the swap device becomes fragmented so
>> that there's no free swap cluster, the swap device will be scanned
>> linearly to find the free swap slots.  swap_info_struct->cluster_next
>> is the next scanning base that is shared by all CPUs.  So nearby free
>> swap slots will be allocated for different CPUs.  The probability for
>> multiple CPUs to operate on the same 64 MB trunk is high.  This causes
>> the lock contention on the swap cache.
>> 
>> To solve the issue, in this patch, for SSD swap device, a percpu
>> version next scanning base (cluster_next_cpu) is added.  Every CPU
>> will use its own per-cpu next scanning base.  And after finishing
>> scanning a 64MB trunk, the per-cpu scanning base will be changed to
>> the beginning of another randomly selected 64MB trunk.  In this way,
>> the probability for multiple CPUs to operate on the same 64 MB trunk
>> is reduced greatly.  Thus the lock contention is reduced too.  For
>> HDD, because sequential access is more important for IO performance,
>> the original shared next scanning base is used.
>> 
>> To test the patch, we have run 16-process pmbench memory benchmark on
>> a 2-socket server machine with 48 cores.  One ram disk is configured
>
> What does "ram disk" mean here?  Which drivers(s) are in use and backed
> by what sort of memory?

We use the following kernel command line

memmap=48G!6G memmap=48G!68G

to create 2 DRAM based /dev/pmem disks (48GB each).  Then we use these
ram disks as swap devices.

>> as the swap device per socket.  The pmbench working-set size is much
>> larger than the available memory so that swapping is triggered.  The
>> memory read/write ratio is 80/20 and the accessing pattern is random.
>> In the original implementation, the lock contention on the swap cache
>> is heavy.  The perf profiling data of the lock contention code path is
>> as following,
>> 
>> _raw_spin_lock_irq.add_to_swap_cache.add_to_swap.shrink_page_list:  7.91
>> _raw_spin_lock_irqsave.__remove_mapping.shrink_page_list:   7.11
>> _raw_spin_lock.swapcache_free_entries.free_swap_slot.__swap_entry_free: 2.51
>> _raw_spin_lock_irqsave.swap_cgroup_record.mem_cgroup_uncharge_swap: 1.66
>> _raw_spin_lock_irq.shrink_inactive_list.shrink_lruvec.shrink_node:  1.29
>> _raw_spin_lock.free_pcppages_bulk.drain_pages_zone.drain_pages: 1.03
>> _raw_spin_lock_irq.shrink_active_list.shrink_lruvec.shrink_node:0.93
>> 
>> After applying this patch, it becomes,
>> 
>> _raw_spin_lock.swapcache_free_entries.free_swap_slot.__swap_entry_free: 3.58
>> _raw_spin_lock_irq.shrink_inactive_list.shrink_lruvec.shrink_node:  2.3
>> _raw_spin_lock_irqsave.swap_cgroup_record.mem_cgroup_uncharge_swap: 2.26
>> _raw_spin_lock_irq.shrink_active_list.shrink_lruvec.shrink_node:1.8
>> _raw_spin_lock.free_pcppages_bulk.drain_pages_zone.drain_pages: 1.19
>> 
>> The lock contention on the swap cache is almost eliminated.
>> 
>> And the pmbench score increases 18.5%.  The swapin throughput
>> increases 18.7% from 2.96 GB/s to 3.51 GB/s.  While the swapout
>> throughput increases 18.5% from 2.99 GB/s to 3.54 GB/s.
>
> If this was backed by plain old RAM, can we assume that the performance
> improvement on SSD swap is still good?

We need really fast disk to show the benefit.  I have tried this on 2
Intel P3600 NVMe disks.  The performance improvement is only about 1%.
The improvement should be better on the faster disks, such as Intel
Optane disk.  I will try to find some to test.

> Does the ram disk actually set SWP_SOLIDSTATE?

Yes.  "blk_queue_flag_set(QUEUE_FLAG_NONROT, q)" is called in
drivers/nvdimm/pmem.c.

Best Regards,
Huang, Ying


Re: [PATCH v6 05/12] mmap locking API: convert mmap_sem call sites missed by coccinelle

2020-05-20 Thread Andrew Morton
On Tue, 19 May 2020 22:29:01 -0700 Michel Lespinasse  wrote:

> Convert the last few remaining mmap_sem rwsem calls to use the new
> mmap locking API. These were missed by coccinelle for some reason
> (I think coccinelle does not support some of the preprocessor
> constructs in these files ?)


From: Andrew Morton 
Subject: mmap-locking-api-convert-mmap_sem-call-sites-missed-by-coccinelle-fix

convert linux-next leftovers

Cc: Michel Lespinasse 
Cc: Daniel Jordan 
Cc: Laurent Dufour 
Cc: Vlastimil Babka 
Cc: Davidlohr Bueso 
Cc: David Rientjes 
Cc: Hugh Dickins 
Cc: Jason Gunthorpe 
Cc: Jerome Glisse 
Cc: John Hubbard 
Cc: Liam Howlett 
Cc: Matthew Wilcox 
Cc: Peter Zijlstra 
Cc: Ying Han 
Signed-off-by: Andrew Morton 
---

 arch/arm64/kvm/mmu.c |   14 +++---
 lib/test_hmm.c   |   14 +++---
 2 files changed, 14 insertions(+), 14 deletions(-)

--- 
a/lib/test_hmm.c~mmap-locking-api-convert-mmap_sem-call-sites-missed-by-coccinelle-fix
+++ a/lib/test_hmm.c
@@ -243,9 +243,9 @@ static int dmirror_range_fault(struct dm
}
 
range->notifier_seq = mmu_interval_read_begin(range->notifier);
-   down_read(>mmap_sem);
+   mmap_read_lock(mm);
ret = hmm_range_fault(range);
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
if (ret) {
if (ret == -EBUSY)
continue;
@@ -684,7 +684,7 @@ static int dmirror_migrate(struct dmirro
if (!mmget_not_zero(mm))
return -EINVAL;
 
-   down_read(>mmap_sem);
+   mmap_read_lock(mm);
for (addr = start; addr < end; addr = next) {
vma = find_vma(mm, addr);
if (!vma || addr < vma->vm_start ||
@@ -711,7 +711,7 @@ static int dmirror_migrate(struct dmirro
dmirror_migrate_finalize_and_map(, dmirror);
migrate_vma_finalize();
}
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
mmput(mm);
 
/* Return the migrated data for verification. */
@@ -731,7 +731,7 @@ static int dmirror_migrate(struct dmirro
return ret;
 
 out:
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
mmput(mm);
return ret;
 }
@@ -823,9 +823,9 @@ static int dmirror_range_snapshot(struct
 
range->notifier_seq = mmu_interval_read_begin(range->notifier);
 
-   down_read(>mmap_sem);
+   mmap_read_lock(mm);
ret = hmm_range_fault(range);
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
if (ret) {
if (ret == -EBUSY)
continue;
--- 
a/arch/arm64/kvm/mmu.c~mmap-locking-api-convert-mmap_sem-call-sites-missed-by-coccinelle-fix
+++ a/arch/arm64/kvm/mmu.c
@@ -1084,7 +1084,7 @@ void stage2_unmap_vm(struct kvm *kvm)
int idx;
 
idx = srcu_read_lock(>srcu);
-   down_read(>mm->mmap_sem);
+   mmap_read_lock(current->mm);
spin_lock(>mmu_lock);
 
slots = kvm_memslots(kvm);
@@ -1092,7 +1092,7 @@ void stage2_unmap_vm(struct kvm *kvm)
stage2_unmap_memslot(kvm, memslot);
 
spin_unlock(>mmu_lock);
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
srcu_read_unlock(>srcu, idx);
 }
 
@@ -1848,11 +1848,11 @@ static int user_mem_abort(struct kvm_vcp
}
 
/* Let's check if we will get back a huge page backed by hugetlbfs */
-   down_read(>mm->mmap_sem);
+   mmap_read_lock(current->mm);
vma = find_vma_intersection(current->mm, hva, hva + 1);
if (unlikely(!vma)) {
kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
return -EFAULT;
}
 
@@ -1879,7 +1879,7 @@ static int user_mem_abort(struct kvm_vcp
if (vma_pagesize == PMD_SIZE ||
(vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm)))
gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> 
PAGE_SHIFT;
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
 
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
@@ -2456,7 +2456,7 @@ int kvm_arch_prepare_memory_region(struc
(kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT;
 
-   down_read(>mm->mmap_sem);
+   mmap_read_lock(current->mm);
/*
 * A memory region could potentially cover multiple VMAs, and any holes
 * between them, so iterate over all of them to find out if we can map
@@ -2515,7 +2515,7 @@ int kvm_arch_prepare_memory_region(struc
stage2_flush_memslot(kvm, memslot);
spin_unlock(>mmu_lock);
 out:
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
return ret;
 }
 
_



Re: [PATCH v6 12/12] mmap locking API: convert mmap_sem comments

2020-05-20 Thread Andrew Morton
On Tue, 19 May 2020 22:29:08 -0700 Michel Lespinasse  wrote:

> Convert comments that reference mmap_sem to reference mmap_lock instead.

This may not be complete..

From: Andrew Morton 
Subject: mmap-locking-api-convert-mmap_sem-comments-fix

fix up linux-next leftovers

Cc: Daniel Jordan 
Cc: Davidlohr Bueso 
Cc: David Rientjes 
Cc: Hugh Dickins 
Cc: Jason Gunthorpe 
Cc: Jerome Glisse 
Cc: John Hubbard 
Cc: Laurent Dufour 
Cc: Liam Howlett 
Cc: Matthew Wilcox 
Cc: Michel Lespinasse 
Cc: Peter Zijlstra 
Cc: Vlastimil Babka 
Cc: Ying Han 
Signed-off-by: Andrew Morton 
---

 arch/powerpc/mm/fault.c |2 +-
 include/linux/pgtable.h |6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

--- a/arch/powerpc/mm/fault.c~mmap-locking-api-convert-mmap_sem-comments-fix
+++ a/arch/powerpc/mm/fault.c
@@ -138,7 +138,7 @@ static noinline int bad_access_pkey(stru
 * 2. T1   : set AMR to deny access to pkey=4, touches, page
 * 3. T1   : faults...
 * 4.T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
-* 5. T1   : enters fault handler, takes mmap_sem, etc...
+* 5. T1   : enters fault handler, takes mmap_lock, etc...
 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
 *   faulted on a pte with its pkey=4.
 */
--- a/include/linux/pgtable.h~mmap-locking-api-convert-mmap_sem-comments-fix
+++ a/include/linux/pgtable.h
@@ -1101,11 +1101,11 @@ static inline pmd_t pmd_read_atomic(pmd_
 #endif
 /*
  * This function is meant to be used by sites walking pagetables with
- * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
+ * the mmap_lock held in read mode to protect against MADV_DONTNEED and
  * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
  * into a null pmd and the transhuge page fault can convert a null pmd
  * into an hugepmd or into a regular pmd (if the hugepage allocation
- * fails). While holding the mmap_sem in read mode the pmd becomes
+ * fails). While holding the mmap_lock in read mode the pmd becomes
  * stable and stops changing under us only if it's not null and not a
  * transhuge pmd. When those races occurs and this function makes a
  * difference vs the standard pmd_none_or_clear_bad, the result is
@@ -1115,7 +1115,7 @@ static inline pmd_t pmd_read_atomic(pmd_
  *
  * For 32bit kernels with a 64bit large pmd_t this automatically takes
  * care of reading the pmd atomically to avoid SMP race conditions
- * against pmd_populate() when the mmap_sem is hold for reading by the
+ * against pmd_populate() when the mmap_lock is hold for reading by the
  * caller (a special atomic read not done by "gcc" as in the generic
  * version above, is also needed when THP is disabled because the page
  * fault can populate the pmd from under us).
_



Re: [PATCH] arm64/cpufeature: Move BUG_ON() inside get_arm64_ftr_reg()

2020-05-20 Thread Anshuman Khandual



On 05/20/2020 11:09 PM, Will Deacon wrote:
> On Wed, May 20, 2020 at 04:47:11PM +0100, Catalin Marinas wrote:
>> On Wed, May 20, 2020 at 01:20:13PM +0100, Will Deacon wrote:
>>> On Wed, May 20, 2020 at 06:52:54AM +0530, Anshuman Khandual wrote:
 There is no way to proceed when requested register could not be searched in
 arm64_ftr_reg[]. Requesting for a non present register would be an error as
 well. Hence lets just BUG_ON() when the search fails in get_arm64_ftr_reg()
 rather than checking for return value and doing the same in some individual
 callers.

 But there are some callers that dont BUG_ON() upon search failure. It adds
 an argument 'failsafe' that provides required switch between callers based
 on whether they could proceed or not.

 Cc: Catalin Marinas 
 Cc: Will Deacon 
 Cc: Suzuki K Poulose 
 Cc: Mark Brown 
 Cc: linux-arm-ker...@lists.infradead.org
 Cc: linux-kernel@vger.kernel.org

 Signed-off-by: Anshuman Khandual 
 ---
 Applies on next-20200518 that has recent cpufeature changes from Will.

  arch/arm64/kernel/cpufeature.c | 26 +-
  1 file changed, 13 insertions(+), 13 deletions(-)

 diff --git a/arch/arm64/kernel/cpufeature.c 
 b/arch/arm64/kernel/cpufeature.c
 index bc5048f152c1..62767cc540c3 100644
 --- a/arch/arm64/kernel/cpufeature.c
 +++ b/arch/arm64/kernel/cpufeature.c
 @@ -557,7 +557,7 @@ static int search_cmp_ftr_reg(const void *id, const 
 void *regp)
   * - NULL on failure. It is upto the caller to decide
   * the impact of a failure.
   */
 -static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
 +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id, bool failsafe)
>>>
>>> Generally, I'm not a big fan of boolean arguments because they are really
>>> opaque at the callsite. It also seems bogus to me that we don't trust the
>>> caller to pass a valid sys_id, but we trust it to get "failsafe" right,
>>> which seems to mean "I promise to check the result isn't NULL before
>>> dereferencing it."
>>>
>>> So I don't see how this patch improves anything. I'd actually be more
>>> inclined to stick a WARN() in get_arm64_ftr_reg() when it returns NULL and
>>> have the callers handle NULL by returning early, getting rid of all the
>>> BUG_ONs in here. Sure, the system might end up in a funny state, but we
>>> WARN()d about it and tried to keep going (and Linus has some strong opinions
>>> on this too).
>>
>> Such WARN can be triggered by the user via emulate_sys_reg(), so we
>> can't really have it in get_arm64_ftr_reg() without a 'failsafe' option.
> 
> Ah yes, that would be bad. In which case, I don't think the existing code
> should change.

The existing code has BUG_ON() in three different callers doing exactly the
same thing that can easily be taken care in get_arm64_ftr_reg() itself. As
mentioned before an enum variable (as preferred - over a bool) can still
preserve the existing behavior for emulate_sys_reg().

IMHO these are very good reasons for us to change the code which will make
it cleaner while also removing three redundant BUG_ON() instances. Hence I
will request you to please reconsider this proposal.

- Anshuman


[PATCH] [v2] PCI: tegra194: Fix runtime PM imbalance on error

2020-05-20 Thread Dinghao Liu
pm_runtime_get_sync() increments the runtime PM usage counter even
when it returns an error code. Thus a pairing decrement is needed on
the error handling path to keep the counter balanced.

Signed-off-by: Dinghao Liu 
---
 drivers/pci/controller/dwc/pcie-tegra194.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c 
b/drivers/pci/controller/dwc/pcie-tegra194.c
index ae30a2fd3716..2c0d2ce16b47 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -1623,7 +1623,7 @@ static int tegra_pcie_config_rp(struct tegra_pcie_dw 
*pcie)
ret = pinctrl_pm_select_default_state(dev);
if (ret < 0) {
dev_err(dev, "Failed to configure sideband pins: %d\n", ret);
-   goto fail_pinctrl;
+   goto fail_pm_get_sync;
}
 
tegra_pcie_init_controller(pcie);
@@ -1650,9 +1650,8 @@ static int tegra_pcie_config_rp(struct tegra_pcie_dw 
*pcie)
 
 fail_host_init:
tegra_pcie_deinit_controller(pcie);
-fail_pinctrl:
-   pm_runtime_put_sync(dev);
 fail_pm_get_sync:
+   pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
return ret;
 }
-- 
2.17.1



Re: [PATCH bpf] security: Fix hook iteration for secid_to_secctx

2020-05-20 Thread Alexei Starovoitov
On Wed, May 20, 2020 at 7:02 PM James Morris  wrote:
>
> On Wed, 20 May 2020, Alexei Starovoitov wrote:
>
> > On Wed, May 20, 2020 at 8:15 AM Casey Schaufler  
> > wrote:
> > >
> > >
> > > On 5/20/2020 5:56 AM, KP Singh wrote:
> > > > From: KP Singh 
> > > >
> > > > secid_to_secctx is not stackable, and since the BPF LSM registers this
> > > > hook by default, the call_int_hook logic is not suitable which
> > > > "bails-on-fail" and casues issues when other LSMs register this hook and
> > > > eventually breaks Audit.
> > > >
> > > > In order to fix this, directly iterate over the security hooks instead
> > > > of using call_int_hook as suggested in:
> > > >
> > > > https: 
> > > > //lore.kernel.org/bpf/9d0eb6c6-803a-ff3a-5603-9ad6d9edf...@schaufler-ca.com/#t
> > > >
> > > > Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks")
> > > > Fixes: 625236ba3832 ("security: Fix the default value of 
> > > > secid_to_secctx hook"
> > > > Reported-by: Alexei Starovoitov 
> > > > Signed-off-by: KP Singh 
> > >
> > > This looks fine.
> >
> > Tested. audit works now.
> > I fixed missing ')' in the commit log
> > and applied to bpf tree.
> > It will be on the way to Linus tree soon.
>
> Please add:
>
>
> Acked-by: James Morris 

Thank you. Done.


Re: [PATCH] arm64/cpufeature: Move BUG_ON() inside get_arm64_ftr_reg()

2020-05-20 Thread Anshuman Khandual



On 05/20/2020 05:50 PM, Will Deacon wrote:
> Hi Anshuman,
> 
> On Wed, May 20, 2020 at 06:52:54AM +0530, Anshuman Khandual wrote:
>> There is no way to proceed when requested register could not be searched in
>> arm64_ftr_reg[]. Requesting for a non present register would be an error as
>> well. Hence lets just BUG_ON() when the search fails in get_arm64_ftr_reg()
>> rather than checking for return value and doing the same in some individual
>> callers.
>>
>> But there are some callers that dont BUG_ON() upon search failure. It adds
>> an argument 'failsafe' that provides required switch between callers based
>> on whether they could proceed or not.
>>
>> Cc: Catalin Marinas 
>> Cc: Will Deacon 
>> Cc: Suzuki K Poulose 
>> Cc: Mark Brown 
>> Cc: linux-arm-ker...@lists.infradead.org
>> Cc: linux-kernel@vger.kernel.org
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>> Applies on next-20200518 that has recent cpufeature changes from Will.
>>
>>  arch/arm64/kernel/cpufeature.c | 26 +-
>>  1 file changed, 13 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index bc5048f152c1..62767cc540c3 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -557,7 +557,7 @@ static int search_cmp_ftr_reg(const void *id, const void 
>> *regp)
>>   * - NULL on failure. It is upto the caller to decide
>>   *   the impact of a failure.
>>   */
>> -static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
>> +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id, bool failsafe)
> 
> Generally, I'm not a big fan of boolean arguments because they are really
> opaque at the callsite. It also seems bogus to me that we don't trust the

If preferred, we could replace with an enum variable here with some
more context e.g

enum ftr_reg_search {
FTR_REG_SEARCH_SAFE,
FTR_REG_SEARCH_UNSAFE,
};

> caller to pass a valid sys_id, but we trust it to get "failsafe" right,

If we really trust the callers, then why BUG_ON() checks are present in
the first place. Because it is always prudent to protect against the
unexpected.

> which seems to mean "I promise to check the result isn't NULL before
> dereferencing it."

Not sure I got this. Do you mean all the present BUG_ON() are trying to
check that returned arm64_ftr_reg is valid before dereferencing it ? If
there is real trust on the callers that a non present sys_id will never
get requested, then all present BUG_ON() instances should never be there.

Either we trust the callers - drop all BUG_ON() and WARN_ON() instances
or we dont - consolidate BUG_ON() and WARN_ON() instances appropriately.

> 
> So I don't see how this patch improves anything. I'd actually be more

It consolidates multiple BUG_ON() in various callers which are not really
required. Code consolidation and reduction especially BUG_ON() instances,
is invariably a good thing.

> inclined to stick a WARN() in get_arm64_ftr_reg() when it returns NULL and

AFAICS in emulate_sys_reg() where the user can send non-present sys_id
registers that eventually gets emulated, should not expect an WARN_ON()
as it did not do anything wrong.

> have the callers handle NULL by returning early, getting rid of all the
> BUG_ONs in here. Sure, the system might end up in a funny state, but we
> WARN()d about it and tried to keep going (and Linus has some strong opinions
> on this too).

Sure, we could go with an WARN_ON() instead, if acceptable and preferred.


Re: [PATCH v3 01/16] spi: dw: Add Tx/Rx finish wait methods to the MID DMA

2020-05-20 Thread Feng Tang
Hi Serge,

On Thu, May 21, 2020 at 04:21:51AM +0300, Serge Semin wrote:
> Since DMA transfers are performed asynchronously with actual SPI
> transaction, then even if DMA transfers are finished it doesn't mean
> all data is actually pushed to the SPI bus. Some data might still be
> in the controller FIFO. This is specifically true for Tx-only
> transfers. In this case if the next SPI transfer is recharged while
> a tail of the previous one is still in FIFO, we'll loose that tail
> data. In order to fix this lets add the wait procedure of the Tx/Rx
> SPI transfers completion after the corresponding DMA transactions
> are finished.
> 
> Co-developed-by: Georgy Vlasov 
> Signed-off-by: Georgy Vlasov 
> Signed-off-by: Serge Semin 
> Fixes: 7063c0d942a1 ("spi/dw_spi: add DMA support")
> Cc: Ramil Zaripov 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Paul Burton 
> Cc: Ralf Baechle 
> Cc: Arnd Bergmann 
> Cc: Andy Shevchenko 
> Cc: Rob Herring 
> Cc: linux-m...@vger.kernel.org
> Cc: devicet...@vger.kernel.org
> 
> ---
> 
> Changelog v2:
> - Use conditional statement instead of the ternary operator in the ref
>   clock getter.
> - Move the patch to the head of the series so one could be picked up to
>   the stable kernels as a fix.
> 
> Changelog v3:
> - Use spi_delay_exec() method to wait for the current operation completion.
> ---
>  drivers/spi/spi-dw-mid.c | 69 
>  drivers/spi/spi-dw.h | 10 ++
>  2 files changed, 79 insertions(+)
> 
> diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
> index f9757a370699..3526b196a7fc 100644
> --- a/drivers/spi/spi-dw-mid.c
> +++ b/drivers/spi/spi-dw-mid.c
> @@ -17,6 +17,7 @@
>  #include 
>  #include 
>  
> +#define WAIT_RETRIES 5
>  #define RX_BUSY  0
>  #define TX_BUSY  1
>  
> @@ -143,6 +144,47 @@ static enum dma_slave_buswidth convert_dma_width(u32 
> dma_width) {
>   return DMA_SLAVE_BUSWIDTH_UNDEFINED;
>  }
>  
> +static void dw_spi_dma_calc_delay(struct dw_spi *dws, u32 nents,
> +   struct spi_delay *delay)
> +{
> + unsigned long ns, us;
> +
> + ns = (NSEC_PER_SEC / spi_get_clk(dws)) * nents * dws->n_bytes *
> +  BITS_PER_BYTE;
> +
> + if (ns <= NSEC_PER_USEC) {
> + delay->unit = SPI_DELAY_UNIT_NSECS;
> + delay->value = ns;
> + } else {
> + us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
> + delay->unit = SPI_DELAY_UNIT_USECS;
> + delay->value = clamp_val(us, 0, USHRT_MAX);
> + }
> +}
> +
> +static inline bool dw_spi_dma_tx_busy(struct dw_spi *dws)
> +{
> + return !(dw_readl(dws, DW_SPI_SR) & SR_TF_EMPT);
> +}
> +
> +static void dw_spi_dma_wait_tx_done(struct dw_spi *dws)
> +{
> + int retry = WAIT_RETRIES;
> + struct spi_delay delay;
> + u32 nents;
> +
> + nents = dw_readl(dws, DW_SPI_TXFLR);
> + dw_spi_dma_calc_delay(dws, nents, );
> +
> + while (dw_spi_dma_tx_busy(dws) && retry--)
> + spi_delay_exec(, NULL);
> +
> + if (retry < 0) {
> + dev_err(>master->dev, "Tx hanged up\n");
> + dws->master->cur_msg->status = -EIO;
> + }
> +}
> +
>  /*
>   * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
>   * channel will clear a corresponding bit.
> @@ -151,6 +193,8 @@ static void dw_spi_dma_tx_done(void *arg)
>  {
>   struct dw_spi *dws = arg;
>  
> + dw_spi_dma_wait_tx_done(dws);
> +
>   clear_bit(TX_BUSY, >dma_chan_busy);
>   if (test_bit(RX_BUSY, >dma_chan_busy))
>   return;
> @@ -192,6 +236,29 @@ static struct dma_async_tx_descriptor 
> *dw_spi_dma_prepare_tx(struct dw_spi *dws,
>   return txdesc;
>  }
>  
> +static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws)
> +{
> + return !!(dw_readl(dws, DW_SPI_SR) & SR_RF_NOT_EMPT);
> +}
> +
> +static void dw_spi_dma_wait_rx_done(struct dw_spi *dws)
> +{
> + int retry = WAIT_RETRIES;
> + struct spi_delay delay;
> + u32 nents;
> +
> + nents = dw_readl(dws, DW_SPI_RXFLR);
> + dw_spi_dma_calc_delay(dws, nents, );
> +
> + while (dw_spi_dma_rx_busy(dws) && retry--)
> + spi_delay_exec(, NULL);
> +
> + if (retry < 0) {
> + dev_err(>master->dev, "Rx hanged up\n");
> + dws->master->cur_msg->status = -EIO;
> + }
> +}
> +
>  /*
>   * dws->dma_chan_busy is set before the dma transfer starts, callback for rx
>   * channel will clear a corresponding bit.
> @@ -200,6 +267,8 @@ static void dw_spi_dma_rx_done(void *arg)
>  {
>   struct dw_spi *dws = arg;
>  
> + dw_spi_dma_wait_rx_done(dws);

I can understand the problem about TX, but I don't see how RX
will get hurt, can you elaborate more? thanks

- Feng


> +
>   clear_bit(RX_BUSY, >dma_chan_busy);
>   if (test_bit(TX_BUSY, >dma_chan_busy))
>   return;
> diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
> index e92d43b9a9e6..81364f501b7e 100644
> --- 

Re: [PATCH v3 0/3] Even moar rpmh cleanups

2020-05-20 Thread Bjorn Andersson
On Wed 20 May 18:21 PDT 2020, Stephen Boyd wrote:

> We remove the tcs_is_free() API and then do super micro optimizations on
> the irq handler. I haven't tested anything here so most likely there's a
> bug (again again)!
> 
> Changes from v2:
>  * Went back in time and used the v1 patch for the first patch with
>the fixes to make it not so complicated
> 
> Changes from v1:
>  * First patch became even moar complicated because it combines
>find_free_tcs() with the check for a request in flight
>  * Fixed subject in patch 2
>  * Put back unsigned long for bitmap operation to silence compiler
>warning
>  * Picked up review tags
> 

Can you please resend this series with both linux-arm-msm and myself on
Cc for all three patches?

Thanks,
Bjorn

> Stephen Boyd (3):
>   soc: qcom: rpmh-rsc: Remove tcs_is_free() API
>   soc: qcom: rpmh-rsc: Loop over fewer bits in irq handler
>   soc: qcom: rpmh-rsc: Fold WARN_ON() into if condition
> 
>  drivers/soc/qcom/rpmh-rsc.c | 65 +
>  1 file changed, 22 insertions(+), 43 deletions(-)
> 
> Cc: Maulik Shah 
> Cc: Douglas Anderson 
> 
> base-commit: 1f7a3eb785e4a4e196729cd3d5ec97bd5f9f2940
> -- 
> Sent by a computer, using git, on the internet
> 


Re: Re: [PATCH] PCI: tegra: fix runtime pm imbalance on error

2020-05-20 Thread dinghao . liu
Thank you for your advice. I will fix these problems in the next edition of 
patch.

Thierry Reding thierry.red...@gmail.com写道:
> On Wed, May 20, 2020 at 04:52:23PM +0800, Dinghao Liu wrote:
> > pm_runtime_get_sync() increments the runtime PM usage counter even
> > it returns an error code. Thus a pairing decrement is needed on
> 
> s/even it/even when it/
> 
> Might also be a good idea to use a different subject prefix because I
> was almost not going to look at the other patch, taking this to be a
> replacement for it.
> 
> Although, looking at the log we have used this same prefix for both
> drivers in the past...
> 
> > the error handling path to keep the counter balanced.
> > 
> > Signed-off-by: Dinghao Liu 
> > ---
> >  drivers/pci/controller/dwc/pcie-tegra194.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c 
> > b/drivers/pci/controller/dwc/pcie-tegra194.c
> > index ae30a2fd3716..a69f9e49dcb5 100644
> > --- a/drivers/pci/controller/dwc/pcie-tegra194.c
> > +++ b/drivers/pci/controller/dwc/pcie-tegra194.c
> > @@ -1651,8 +1651,8 @@ static int tegra_pcie_config_rp(struct tegra_pcie_dw 
> > *pcie)
> >  fail_host_init:
> > tegra_pcie_deinit_controller(pcie);
> >  fail_pinctrl:
> > -   pm_runtime_put_sync(dev);
> >  fail_pm_get_sync:
> 
> Either of those two labels is now no longer needed. Of course it'll now
> be odd to jump to fail_pm_get_sync on pinctrl_pm_select_default_state()
> failure, but that's one of the reasons why label should have names
> describing what they do rather than describe the failure location. I
> guess we can live with that for now. I'll make a note to send a cleanup
> patch for that later on.
> 
> With the fixup in the commit message and either of the labels removed:
> 
> Acked-by: Thierry Reding 


RE: [PATCH 0/3] arm64: perf: Add support for Perf NMI interrupts

2020-05-20 Thread Song Bao Hua (Barry Song)


> -Original Message-
> From: linux-arm-kernel [mailto:linux-arm-kernel-boun...@lists.infradead.org]
> On Behalf Of Alexandru Elisei
> Sent: Wednesday, May 20, 2020 10:31 PM> 
> Hi,
> 
> On 5/18/20 12:17 PM, Alexandru Elisei wrote:
> > Hi,
> >
> > On 5/18/20 11:45 AM, Mark Rutland wrote:
> >> Hi all,
> >>
> >> On Mon, May 18, 2020 at 02:26:00PM +0800, Lecopzer Chen wrote:
> >>> HI Sumit,
> >>>
> >>> Thanks for your information.
> >>>
> >>> I've already implemented IPI (same as you did [1], little difference
> >>> in detail), hardlockup detector and perf in last year(2019) for
> >>> debuggability.
> >>> And now we tend to upstream to reduce kernel maintaining effort.
> >>> I'm glad if someone in ARM can do this work :)
> >>>
> >>> Hi Julien,
> >>>
> >>> Does any Arm maintainers can proceed this action?
> >> Alexandru (Cc'd) has been rebasing and reworking Julien's patches,
> >> which is my preferred approach.
> >>
> >> I understand that's not quite ready for posting since he's
> >> investigating some of the nastier subtleties (e.g. mutual exclusion
> >> with the NMI), but maybe we can put the work-in-progress patches
> >> somewhere in the mean time.
> >>
> >> Alexandru, do you have an idea of what needs to be done, and/or when
> >> you expect you could post that?
> > I'm currently working on rebasing the patches on top of 5.7-rc5, when
> > I have something usable I'll post a link (should be a couple of days).
> > After that I will address the review comments, and I plan to do a
> > thorough testing because I'm not 100% confident that some of the
> > assumptions around the locks that were removed are correct. My guess is
> this will take a few weeks.
> 
> Pushed a WIP branch on linux-arm.org [1]:
> 
> git clone -b WIP-pmu-nmi git://linux-arm.org/linux-ae
> 
> Practically untested, I only did perf record on a defconfig kernel running on 
> the
> model.
> 
> [1]
> http://www.linux-arm.org/git?p=linux-ae.git;a=shortlog;h=refs/heads/WIP-pm
> u-nmi

Fortunately, it does work. I used this tree to perf annotate 
arm_smmu_cmdq_issue_cmdlist() which
is completely disabling IRQ. Luckily, it reports correct data. Before that, it 
reported all time was spent by
the code which enabled IRQ .


Barry

> 
> Thanks,
> Alex
> >
> > Thanks,
> > Alex
> >> Thanks,
> >> Mark.
> >>
> >>> This is really useful in debugging.
> >>> Thank you!!
> >>>
> >>>
> >>>
> >>> [1] https://lkml.org/lkml/2020/4/24/328
> >>>
> >>>
> >>> Lecopzer
> >>>
> >>> Sumit Garg  於 2020年5月18日 週一 下午
> 1:46寫道:
>  + Julien
> 
>  Hi Lecopzer,
> 
>  On Sat, 16 May 2020 at 18:20, Lecopzer Chen 
> wrote:
> > These series implement Perf NMI funxtionality and depends on
> > Pseudo NMI [1] which has been upstreamed.
> >
> > In arm64 with GICv3, Pseudo NMI was implemented for NMI-like
> interruts.
> > That can be extended to Perf NMI which is the prerequisite for
> > hard-lockup detector which had already a standard interface inside
> Linux.
> >
> > Thus the first step we need to implement perf NMI interface and
> > make sure it works fine.
> >
>  This is something that is already implemented via Julien's
>  patch-set [1]. Its v4 has been floating since July, 2019 and I
>  couldn't find any major blocking comments but not sure why things
>  haven't progressed further.
> 
>  Maybe Julien or Arm maintainers can provide updates on existing
>  patch-set [1] and how we should proceed further with this
>  interesting feature.
> 
>  And regarding hard-lockup detection, I have been able to enable it
>  based on perf NMI events using Julien's perf patch-set [1]. Have a
>  look at the patch here [2].
> 
>  [1] https://patchwork.kernel.org/cover/11047407/
>  [2]
>  http://lists.infradead.org/pipermail/linux-arm-kernel/2020-May/7322
>  27.html
> 
>  -Sumit
> 
> > Perf NMI has been test by dd if=/dev/urandom of=/dev/null like the
> > link [2] did.
> >
> > [1] https://lkml.org/lkml/2019/1/31/535
> > [2] https://www.linaro.org/blog/debugging-arm-kernels-using-nmifiq
> >
> >
> > Lecopzer Chen (3):
> >   arm_pmu: Add support for perf NMI interrupts registration
> >   arm64: perf: Support NMI context for perf event ISR
> >   arm64: Kconfig: Add support for the Perf NMI
> >
> >  arch/arm64/Kconfig | 10 +++
> >  arch/arm64/kernel/perf_event.c | 36 ++--
> >  drivers/perf/arm_pmu.c | 51
> ++
> >  include/linux/perf/arm_pmu.h   |  6 
> >  4 files changed, 88 insertions(+), 15 deletions(-)
> >
> > --
> > 2.25.1



Re: [PATCH] KVM: PPC: Book3S HV: relax check on H_SVM_INIT_ABORT

2020-05-20 Thread Greg Kurz
On Wed, 20 May 2020 18:51:10 +0200
Laurent Dufour  wrote:

> The commit 8c47b6ff29e3 ("KVM: PPC: Book3S HV: Check caller of H_SVM_*
> Hcalls") added checks of secure bit of SRR1 to filter out the Hcall
> reserved to the Ultravisor.
> 
> However, the Hcall H_SVM_INIT_ABORT is made by the Ultravisor passing the
> context of the VM calling UV_ESM. This allows the Hypervisor to return to
> the guest without going through the Ultravisor. Thus the Secure bit of SRR1
> is not set in that particular case.
> 
> In the case a regular VM is calling H_SVM_INIT_ABORT, this hcall will be
> filtered out in kvmppc_h_svm_init_abort() because kvm->arch.secure_guest is
> not set in that case.
> 

Why not checking vcpu->kvm->arch.secure_guest then ?

> Fixes: 8c47b6ff29e3 ("KVM: PPC: Book3S HV: Check caller of H_SVM_* Hcalls")
> Signed-off-by: Laurent Dufour 
> ---
>  arch/powerpc/kvm/book3s_hv.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 93493f0cbfe8..eb1f96cb7b72 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1099,9 +1099,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>   ret = kvmppc_h_svm_init_done(vcpu->kvm);
>   break;
>   case H_SVM_INIT_ABORT:
> - ret = H_UNSUPPORTED;
> - if (kvmppc_get_srr1(vcpu) & MSR_S)
> - ret = kvmppc_h_svm_init_abort(vcpu->kvm);

or at least put a comment to explain why H_SVM_INIT_ABORT
doesn't have the same sanity check as the other SVM hcalls.

> + ret = kvmppc_h_svm_init_abort(vcpu->kvm);
>   break;
>  
>   default:



Re: Re: [PATCH] PCI: tegra: fix runtime pm imbalance on error

2020-05-20 Thread dinghao . liu
Thank you for your advice. I think tegra194 is a good choice and 
I will use it in the next edition of patch.

Bjorn Helgaas helg...@kernel.org写道:
> On Wed, May 20, 2020 at 11:59:08AM +0200, Thierry Reding wrote:
> > On Wed, May 20, 2020 at 04:52:23PM +0800, Dinghao Liu wrote:
> > > pm_runtime_get_sync() increments the runtime PM usage counter even
> > > it returns an error code. Thus a pairing decrement is needed on
> > 
> > s/even it/even when it/
> > 
> > Might also be a good idea to use a different subject prefix because I
> > was almost not going to look at the other patch, taking this to be a
> > replacement for it.
> 
> Amen.  This would be a good change to start using "PCI: tegra194" or
> something for pcie-tegra194.c.  Or will there be tegra195, tegra 196,
> etc added to this driver?
> 
> Also, please capitalize the first word and "PM" in the subjects:
> 
>   PCI: tegra194: Fix runtime PM imbalance on error
> 
> Bjorn


Re: [PATCH -V2] swap: Reduce lock contention on swap cache from swap slots allocation

2020-05-20 Thread Andrew Morton
On Wed, 20 May 2020 11:15:02 +0800 Huang Ying  wrote:

> In some swap scalability test, it is found that there are heavy lock
> contention on swap cache even if we have split one swap cache radix
> tree per swap device to one swap cache radix tree every 64 MB trunk in
> commit 4b3ef9daa4fc ("mm/swap: split swap cache into 64MB trunks").
> 
> The reason is as follow.  After the swap device becomes fragmented so
> that there's no free swap cluster, the swap device will be scanned
> linearly to find the free swap slots.  swap_info_struct->cluster_next
> is the next scanning base that is shared by all CPUs.  So nearby free
> swap slots will be allocated for different CPUs.  The probability for
> multiple CPUs to operate on the same 64 MB trunk is high.  This causes
> the lock contention on the swap cache.
> 
> To solve the issue, in this patch, for SSD swap device, a percpu
> version next scanning base (cluster_next_cpu) is added.  Every CPU
> will use its own per-cpu next scanning base.  And after finishing
> scanning a 64MB trunk, the per-cpu scanning base will be changed to
> the beginning of another randomly selected 64MB trunk.  In this way,
> the probability for multiple CPUs to operate on the same 64 MB trunk
> is reduced greatly.  Thus the lock contention is reduced too.  For
> HDD, because sequential access is more important for IO performance,
> the original shared next scanning base is used.
> 
> To test the patch, we have run 16-process pmbench memory benchmark on
> a 2-socket server machine with 48 cores.  One ram disk is configured

What does "ram disk" mean here?  Which drivers(s) are in use and backed
by what sort of memory?

> as the swap device per socket.  The pmbench working-set size is much
> larger than the available memory so that swapping is triggered.  The
> memory read/write ratio is 80/20 and the accessing pattern is random.
> In the original implementation, the lock contention on the swap cache
> is heavy.  The perf profiling data of the lock contention code path is
> as following,
> 
> _raw_spin_lock_irq.add_to_swap_cache.add_to_swap.shrink_page_list:  7.91
> _raw_spin_lock_irqsave.__remove_mapping.shrink_page_list:   7.11
> _raw_spin_lock.swapcache_free_entries.free_swap_slot.__swap_entry_free: 2.51
> _raw_spin_lock_irqsave.swap_cgroup_record.mem_cgroup_uncharge_swap: 1.66
> _raw_spin_lock_irq.shrink_inactive_list.shrink_lruvec.shrink_node:  1.29
> _raw_spin_lock.free_pcppages_bulk.drain_pages_zone.drain_pages: 1.03
> _raw_spin_lock_irq.shrink_active_list.shrink_lruvec.shrink_node:0.93
> 
> After applying this patch, it becomes,
> 
> _raw_spin_lock.swapcache_free_entries.free_swap_slot.__swap_entry_free: 3.58
> _raw_spin_lock_irq.shrink_inactive_list.shrink_lruvec.shrink_node:  2.3
> _raw_spin_lock_irqsave.swap_cgroup_record.mem_cgroup_uncharge_swap: 2.26
> _raw_spin_lock_irq.shrink_active_list.shrink_lruvec.shrink_node:1.8
> _raw_spin_lock.free_pcppages_bulk.drain_pages_zone.drain_pages: 1.19
> 
> The lock contention on the swap cache is almost eliminated.
> 
> And the pmbench score increases 18.5%.  The swapin throughput
> increases 18.7% from 2.96 GB/s to 3.51 GB/s.  While the swapout
> throughput increases 18.5% from 2.99 GB/s to 3.54 GB/s.

If this was backed by plain old RAM, can we assume that the performance
improvement on SSD swap is still good?

Does the ram disk actually set SWP_SOLIDSTATE?


Re: [PATCH v2] Makefile: support compressed debug info

2020-05-20 Thread Masahiro Yamada
On Thu, May 21, 2020 at 4:36 AM 'Nick Desaulniers' via Clang Built
Linux  wrote:
>
> As debug information gets larger and larger, it helps significantly save
> the size of vmlinux images to compress the information in the debug
> information sections. Note: this debug info is typically split off from
> the final compressed kernel image, which is why vmlinux is what's used
> in conjunction with GDB. Minimizing the debug info size should have no
> impact on boot times, or final compressed kernel image size.
>
> All of the debug sections will have a `C` flag set.
> $ readelf -S 
>
> $ bloaty vmlinux.gcc75.compressed.dwarf4 -- \
> vmlinux.gcc75.uncompressed.dwarf4
>
> FILE SIZEVM SIZE
>  --  --
>   +0.0% +18  [ = ]   0[Unmapped]
>  -73.3%  -114Ki  [ = ]   0.debug_aranges
>  -76.2% -2.01Mi  [ = ]   0.debug_frame
>  -73.6% -2.89Mi  [ = ]   0.debug_str
>  -80.7% -4.66Mi  [ = ]   0.debug_abbrev
>  -82.9% -4.88Mi  [ = ]   0.debug_ranges
>  -70.5% -9.04Mi  [ = ]   0.debug_line
>  -79.3% -10.9Mi  [ = ]   0.debug_loc
>  -39.5% -88.6Mi  [ = ]   0.debug_info
>  -18.2%  -123Mi  [ = ]   0TOTAL
>
> $ bloaty vmlinux.clang11.compressed.dwarf4 -- \
> vmlinux.clang11.uncompressed.dwarf4
>
> FILE SIZEVM SIZE
>  --  --
>   +0.0% +23  [ = ]   0[Unmapped]
>  -65.6%-871  [ = ]   0.debug_aranges
>  -77.4% -1.84Mi  [ = ]   0.debug_frame
>  -82.9% -2.33Mi  [ = ]   0.debug_abbrev
>  -73.1% -2.43Mi  [ = ]   0.debug_str
>  -84.8% -3.07Mi  [ = ]   0.debug_ranges
>  -65.9% -8.62Mi  [ = ]   0.debug_line
>  -86.2% -40.0Mi  [ = ]   0.debug_loc
>  -42.0% -64.1Mi  [ = ]   0.debug_info
>  -22.1%  -122Mi  [ = ]   0TOTAL
>
> For x86_64 defconfig + LLVM=1 (before):
> Elapsed (wall clock) time (h:mm:ss or m:ss): 3:22.03
> Maximum resident set size (kbytes): 43856
>
> For x86_64 defconfig + LLVM=1 (after):
> Elapsed (wall clock) time (h:mm:ss or m:ss): 3:32.52
> Maximum resident set size (kbytes): 1566776
>
> Suggested-by: David Blaikie 
> Suggested-by: Fangrui Song 


Suggested-by -> Reviewed-by

https://patchwork.kernel.org/patch/11524939/#23349551



> Suggested-by: Nick Clifton 


I do not know where this tag came from.

Nick Clifton taught us the version rule of binutils,but did not state
anything about this patch itself.

https://patchwork.kernel.org/patch/11524939/#23355175


> Suggested-by: Sedat Dilek 

I do not see the source of this tag, either...



> Tested-by: Sedat Dilek 
> Signed-off-by: Nick Desaulniers 
> ---

 snip

> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -225,6 +225,21 @@ config DEBUG_INFO_REDUCED
>   DEBUG_INFO build and compile times are reduced too.
>   Only works with newer gcc versions.
>
> +config DEBUG_INFO_COMPRESSED
> +   bool "Compressed debugging information"
> +   depends on DEBUG_INFO
> +   depends on $(cc-option,-gz=zlib)
> +   depends on $(as-option,-Wa,--compress-debug-sections=zlib)

This does not work. (always false)
You cannot enable this option.

The comma between -Wa and --compress-debug-sections=zlib
is eaten by Kconfig parser because commas are delimiters
of function parameters.


Please write like this.

depends on $(as-option,-Wa$(comma)--compress-debug-sections=zlib)





> +   depends on $(ld-option,--compress-debug-sections=zlib)
> +   help
> + Compress the debug information using zlib.  Requires GCC 5.0+ or 
> Clang
> + 5.0+, binutils 2.26+, and zlib.
> +
> + Users of dpkg-deb via scripts/package/builddeb may
> + wish to set the $KDEB_COMPRESS env var to "none" to avoid 
> recompressing
> + the debug info again with a different compression scheme, which can
> + result in larger binaries.

No. This is not correct.

CONFIG_DEBUG_INFO_COMPRESSED compresses the only debug info part.
The other parts still get by benefit from the default KDEB_COMPRESS=xz.


The numbers are here:


CONFIG_DEBUG_INFO_COMPRESSED=y
-rw-r--r-- 1 masahiro masahiro 209077584 May 21 11:19
linux-image-5.7.0-rc5+-dbg_5.7.0-rc5+-26_amd64.deb


CONFIG_DEBUG_INFO_COMPRESSED=y and KDEB_COMPRESS=none
-rw-r--r-- 1 masahiro masahiro 643051712 May 21 11:22
linux-image-5.7.0-rc5+-dbg_5.7.0-rc5+-27_amd64.deb


CONFIG_DEBUG_INFO_COMPRESSED=n
-rw-r--r-- 1 masahiro masahiro 112200308 May 21 11:40
linux-image-5.7.0-rc5+-dbg_5.7.0-rc5+-30_amd64.deb




For the deb package size perspective,
it is better to keep KDEB_COMPRESS as default.

The main motivation for changing KDEB_COMPRESS
is the build speed.  (see commit 1a7f0a34ea7d05)




CONFIG_DEBUG_INFO_COMPRESSED has a downside
for the debug deb package, but we need to accept it.









-- 
Best Regards
Masahiro Yamada


[PATCH] [v2] PCI: tegra: Fix runtime PM imbalance on error

2020-05-20 Thread Dinghao Liu
pm_runtime_get_sync() increments the runtime PM usage counter even
when it returns an error code. Thus a pairing decrement is needed on
the error handling path to keep the counter balanced.

Also, call pm_runtime_disable() when pm_runtime_get_sync() returns
an error code.

Signed-off-by: Dinghao Liu 
---
 drivers/pci/controller/pci-tegra.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pci-tegra.c 
b/drivers/pci/controller/pci-tegra.c
index 3e64ba6a36a8..00236dd65b5b 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -2712,7 +2712,7 @@ static int tegra_pcie_probe(struct platform_device *pdev)
err = pm_runtime_get_sync(pcie->dev);
if (err < 0) {
dev_err(dev, "fail to enable pcie controller: %d\n", err);
-   goto teardown_msi;
+   goto pm_runtime_put;
}
 
host->busnr = bus->start;
@@ -2746,7 +2746,6 @@ static int tegra_pcie_probe(struct platform_device *pdev)
 pm_runtime_put:
pm_runtime_put_sync(pcie->dev);
pm_runtime_disable(pcie->dev);
-teardown_msi:
tegra_pcie_msi_teardown(pcie);
 put_resources:
tegra_pcie_put_resources(pcie);
-- 
2.17.1



Re: [PATCH 3.16 00/99] 3.16.84-rc1 review

2020-05-20 Thread Chen-Yu Tsai
On Thu, May 21, 2020 at 5:23 AM Guenter Roeck  wrote:
>
> On 5/20/20 7:13 AM, Ben Hutchings wrote:
> > This is the start of the stable review cycle for the 3.16.84 release.
> > There are 99 patches in this series, which will be posted as responses
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> >
> > Responses should be made by Fri May 22 20:00:00 UTC 2020.
> > Anything received after that time might be too late.
> >
> Build results:
> total: 135 pass: 135 fail: 0
> Qemu test results:
> total: 230 pass: 227 fail: 3
> Failed tests:
> arm:cubieboard:multi_v7_defconfig:mem512:sun4i-a10-cubieboard:initrd
> 
> arm:cubieboard:multi_v7_defconfig:usb:mem512:sun4i-a10-cubieboard:rootfs
> 
> arm:cubieboard:multi_v7_defconfig:sata:mem512:sun4i-a10-cubieboard:rootfs
>
> The arm tests fail due to a compile error.
>
> drivers/clk/tegra/clk-tegra-periph.c:524:65: error: 'CLK_IS_CRITICAL' 
> undeclared here (not in a function); did you mean 'CLK_IS_BASIC'?

This looks like a result of having

  clk: tegra: Mark fuse clock as critical
 [bf83b96f87ae2abb1e535306ea53608e8de5dfbb]

In which case you probably need to add

32b9b1096186 clk: Allow clocks to be marked as CRITICAL

to the pile.


ChenYu


Re: mm: mkfs.ext4 invoked oom-killer on i386 - pagecache_get_page

2020-05-20 Thread Yafang Shao
On Thu, May 21, 2020 at 2:00 AM Naresh Kamboju
 wrote:
>
> On Wed, 20 May 2020 at 17:26, Naresh Kamboju  
> wrote:
> >
> >
> > This issue is specific on 32-bit architectures i386 and arm on linux-next 
> > tree.
> > As per the test results history this problem started happening from
> > Bad : next-20200430
> > Good : next-20200429
> >
> > steps to reproduce:
> > dd if=/dev/disk/by-id/ata-SanDisk_SSD_PLUS_120GB_190504A00573
> > of=/dev/null bs=1M count=2048
> > or
> > mkfs -t ext4 /dev/disk/by-id/ata-SanDisk_SSD_PLUS_120GB_190804A00BE5
> >
> >
> > Problem:
> > [   38.802375] dd invoked oom-killer: gfp_mask=0x100cc0(GFP_USER),
> > order=0, oom_score_adj=0
>
> As a part of investigation on this issue LKFT teammate Anders Roxell
> git bisected the problem and found bad commit(s) which caused this problem.
>
> The following two patches have been reverted on next-20200519 and retested the
> reproducible steps and confirmed the test case mkfs -t ext4 got PASS.
> ( invoked oom-killer is gone now)
>
> Revert "mm, memcg: avoid stale protection values when cgroup is above
> protection"
> This reverts commit 23a53e1c02006120f89383270d46cbd040a70bc6.
>
> Revert "mm, memcg: decouple e{low,min} state mutations from protection
> checks"
> This reverts commit 7b88906ab7399b58bb088c28befe50bcce076d82.
>

My guess is that we made the same mistake in commit "mm, memcg:
decouple e{low,min} state mutations from protection
checks" that it read a stale memcg protection in
mem_cgroup_below_low() and mem_cgroup_below_min().

Bellow is a possble fix,

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7a2c56fc..6591b71 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -391,20 +391,28 @@ static inline unsigned long
mem_cgroup_protection(struct mem_cgroup *root,
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 struct mem_cgroup *memcg);

-static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_low(struct mem_cgroup *root,
+   struct mem_cgroup *memcg)
 {
if (mem_cgroup_disabled())
return false;

+   if (root == memcg)
+   return false;
+
return READ_ONCE(memcg->memory.elow) >=
page_counter_read(>memory);
 }

-static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_min(struct mem_cgroup *root,
+   struct mem_cgroup *memcg)
 {
if (mem_cgroup_disabled())
return false;

+   if (root == memcg)
+   return false;
+
return READ_ONCE(memcg->memory.emin) >=
page_counter_read(>memory);
 }
@@ -896,12 +904,14 @@ static inline void
mem_cgroup_calculate_protection(struct mem_cgroup *root,
 {
 }

-static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_low(struct mem_cgroup *root,
+   struct mem_cgroup *memcg)
 {
return false;
 }

-static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_min(struct mem_cgroup *root,
+   struct mem_cgroup *memcg)
 {
return false;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c71660e..fdcdd88 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2637,13 +2637,13 @@ static void shrink_node_memcgs(pg_data_t
*pgdat, struct scan_control *sc)

mem_cgroup_calculate_protection(target_memcg, memcg);

-   if (mem_cgroup_below_min(memcg)) {
+   if (mem_cgroup_below_min(target_memcg, memcg)) {
/*
 * Hard protection.
 * If there is no reclaimable memory, OOM.
 */
continue;
-   } else if (mem_cgroup_below_low(memcg)) {
+   } else if (mem_cgroup_below_low(target_memcg, memcg)) {
/*
 * Soft protection.
 * Respect the protection only as long as





> i386 test log shows mkfs -t ext4 pass
> https://lkft.validation.linaro.org/scheduler/job/1443405#L1200
>
> ref:
> https://lore.kernel.org/linux-mm/cover.1588092152.git.ch...@chrisdown.name/
> https://lore.kernel.org/linux-mm/ca+g9fyvzlm7n1be7ajxd8_49fogpgwwtiq7sxkvre_zoerj...@mail.gmail.com/T/#t
>
> --
> Linaro LKFT
> https://lkft.linaro.org



--
Thanks
Yafang


RE: [PATCH v3 0/2] Fix PCI HyperV device error handling

2020-05-20 Thread Michael Kelley
From: Lorenzo Pieralisi  Sent: Monday, May 11, 2020 
4:22 AM
> 
> On Thu, May 07, 2020 at 01:01:26PM +0800, Wei Hu wrote:
> > This series better handles some PCI HyperV error cases in general
> > and for kdump case. Some of review comments from previous individual
> > patch reviews, including splitting into separate patches, have already
> > been incorporated. Thanks Lorenzo Pieralisi for the review and
> > suggestions, as well as Michael Kelley's contribution to the commit
> > log.
> >
> > Thanks,
> > Wei
> >
> >
> > Wei Hu (2):
> >   PCI: hv: Fix the PCI HyperV probe failure path to release resource
> > properly
> >   PCI: hv: Retry PCI bus D0 entry when the first attempt failed with
> > invalid device state
> >
> >  drivers/pci/controller/pci-hyperv.c | 60 ++---
> >  1 file changed, 54 insertions(+), 6 deletions(-)
> 
> Applied to pci/hv, thanks.
> 

Lorenzo -- 

Will you be bringing these fixes into 5.7?  The main fix is the 2nd patch, but
there wasn't a clear "Fixes:" tag to add because the problem is due more to
how Hyper-V operates than a bug in a previous Linux commit.  We have a
customer experiencing the problem, so getting the fix into the main tree
sooner rather than later is helpful.

Thx,

Michael


Re: [PATCH v2 08/12] i2c: designware: Introduce platform drivers glue layer interface

2020-05-20 Thread Serge Semin
On Wed, May 20, 2020 at 03:46:11PM +0300, Jarkko Nikula wrote:
> Hi
> 
> On 5/10/20 12:50 PM, Serge Semin wrote:
> > Seeing the DW I2C platform driver is getting overcomplicated with a lot of
> > vendor-specific configs let's introduce a glue-layer interface so new
> > platforms which equipped with Synopsys Designware APB I2C IP-core would
> > be able to handle their peculiarities in the dedicated objects.
> > 
> Comment to this patch and patches 9/12 and 12/12:
> 
> Currently i2c-designware-platdrv.c is about 500 lines of code so I don't
> think it's too overcomplicated. But I feel we have already too many Kconfig
> options and source modules for i2c-designware and obviously would like to
> push back a little from adding more.
> 
> I don't think i2c-designware-platdrv.c becomes yet too complicated if Baikal
> related code is added there, perhaps under #ifdef CONFIG_OF like MSCC Ocelot
> code is currently.

Well, it's up to you to decide, what solution is more suitable for you to
maintain. My idea of detaching the MSCC and Baikal-T1 code to the dedicated
source files was to eventually move the whole i2c-designware-* set of files
into a dedicated directory drivers/i2c/buses/dw as it's done for some others
Synopsys DesignWare controllers: drivers/pci/controller/dwc/, drivers/usb/dwc2,
drivers/usb/dwc3, drivers/net/ethernet/synopsys/ . If you think, that it's too
early for Dw I2C code to live in a dedicated directory, fine with me. I can
merge the MSCC and Baikal-T1 code back into the i2c-designware-platdrv.c .
So what's your final word in this matter?

-Sergey

> 
> -- 
> Jarkko


Re: io_uring vs CPU hotplug, was Re: [PATCH 5/9] blk-mq: don't set data->ctx and data->hctx in blk_mq_alloc_request_hctx

2020-05-20 Thread Ming Lei
On Thu, May 21, 2020 at 12:14:18AM +0200, Thomas Gleixner wrote:
> Jens Axboe  writes:
> 
> > On 5/20/20 1:41 PM, Thomas Gleixner wrote:
> >> Jens Axboe  writes:
> >>> On 5/20/20 8:45 AM, Jens Axboe wrote:
>  It just uses kthread_create_on_cpu(), nothing home grown. Pretty sure
>  they just break affinity if that CPU goes offline.
> >>>
> >>> Just checked, and it works fine for me. If I create an SQPOLL ring with
> >>> SQ_AFF set and bound to CPU 3, if CPU 3 goes offline, then the kthread
> >>> just appears unbound but runs just fine. When CPU 3 comes online again,
> >>> the mask appears correct.
> >> 
> >> When exactly during the unplug operation is it unbound?
> >
> > When the CPU has been fully offlined. I check the affinity mask, it
> > reports 0. But it's still being scheduled, and it's processing work.
> > Here's an example, PID 420 is the thread in question:
> >
> > [root@archlinux cpu3]# taskset -p 420
> > pid 420's current affinity mask: 8
> > [root@archlinux cpu3]# echo 0 > online 
> > [root@archlinux cpu3]# taskset -p 420
> > pid 420's current affinity mask: 0
> > [root@archlinux cpu3]# echo 1 > online 
> > [root@archlinux cpu3]# taskset -p 420
> > pid 420's current affinity mask: 8
> >
> > So as far as I can tell, it's working fine for me with the goals
> > I have for that kthread.
> 
> Works for me is not really useful information and does not answer my
> question:
> 
> >> When exactly during the unplug operation is it unbound?
> 
> The problem Ming and Christoph are trying to solve requires that the
> thread is migrated _before_ the hardware queue is shut down and
> drained. That's why I asked for the exact point where this happens.
> 
> When the CPU is finally offlined, i.e. the CPU cleared the online bit in
> the online mask is definitely too late simply because it still runs on
> that outgoing CPU _after_ the hardware queue is shut down and drained.

IMO, the patch in Christoph's blk-mq-hotplug.2 still works for percpu
kthread.

It is just not optimal in the retrying, but it should be fine. When the
percpu kthread is scheduled on the CPU to be offlined:

- if the kthread doesn't observe the INACTIVE flag, the allocated request
will be drained.

- otherwise, the kthread just retries and retries to allocate & release,
and sooner or later, its time slice is consumed, and migrated out, and the
cpu hotplug handler will get chance to run and move on, then the cpu is
shutdown.

- After the cpu is shutdown, the percpu kthread becomes unbound, and
the allocation from new online cpu will succeed.

Thanks,
Ming



Re: [PATCH v3] /dev/mem: Revoke mappings when a driver claims the region

2020-05-20 Thread Matthew Wilcox
On Wed, May 20, 2020 at 06:35:25PM -0700, Dan Williams wrote:
> +static struct inode *devmem_inode;
> +
> +#ifdef CONFIG_IO_STRICT_DEVMEM
> +void revoke_devmem(struct resource *res)
> +{
> + struct inode *inode = READ_ONCE(devmem_inode);
> +
> + /*
> +  * Check that the initialization has completed. Losing the race
> +  * is ok because it means drivers are claiming resources before
> +  * the fs_initcall level of init and prevent /dev/mem from
> +  * establishing mappings.
> +  */
> + smp_rmb();
> + if (!inode)
> + return;

But we don't need the smp_rmb() here, right?  READ_ONCE and WRITE_ONCE
are a DATA DEPENDENCY barrier (in Documentation/memory-barriers.txt parlance)
so the smp_rmb() is superfluous ...

> + /*
> +  * Use a unified address space to have a single point to manage
> +  * revocations when drivers want to take over a /dev/mem mapped
> +  * range.
> +  */
> + inode->i_mapping = devmem_inode->i_mapping;
> + inode->i_mapping->host = devmem_inode;

umm ... devmem_inode->i_mapping->host doesn't already point to devmem_inode?

> +
> + /* publish /dev/mem initialized */
> + smp_wmb();
> + WRITE_ONCE(devmem_inode, inode);

As above, unnecessary barrier, I think.



Re: [PATCH net-next 1/2] net: hns3: adds support for dynamic VLAN mode

2020-05-20 Thread tanhuazhong




On 2020/5/21 9:36, Jakub Kicinski wrote:

On Thu, 21 May 2020 09:33:14 +0800 tanhuazhong wrote:

On 2020/5/21 5:06, Jakub Kicinski wrote:

On Wed, 20 May 2020 09:20:12 +0800 Huazhong Tan wrote:

From: GuoJia Liao 

There is a scenario which needs vNICs enable the VLAN filter
in access port, while disable the VLAN filter in trunk port.
Access port and trunk port can switch according to the user's
configuration.

This patch adds support for the dynamic VLAN mode. then the
HNS3 driver can support two VLAN modes: default VLAN mode and
dynamic VLAN mode. User can switch the mode through the
configuration file.


What configuration file? Sounds like you're reimplementing trusted
VFs (ndo_set_vf_trust).
   


Hi, Jakub.

Maybe this configuration file here is a little misleading,
this VLAN mode is decided by the firmware, the driver will
query the VLAN mode from firmware during  intializing.


And the FW got that configuration from?



It depends on the user's demand, the user can choose the firmware
which supports the default VLAN mode or the dynamic VLAN mode.


I will modified this description in V2. BTW, is there any
other suggestion about this patch?


The other suggestion was to trusted vf. What's the difference between
trusted VF and "dynamic VLAN mode"?



Trust VF is not related to dynamic VLAN mode. So far it's only
be used for privilege checking for the VF promisc. And dynamic
VLAN mode is designed to adapt specified scenario which want
enable/disable VLAN filter base on VLAN used.

Thanks.


In default VLAN mode, port based VLAN filter and VF VLAN
filter should always be enabled.

In dynamic VLAN mode, port based VLAN filter is disabled, and
VF VLAN filter is disabled defaultly, and should be enabled
when there is a non-zero VLAN ID. In addition, VF VLAN filter
is enabled if PVID is enabled for vNIC.

When enable promisc, VLAN filter should be disabled. When disable
promisc, VLAN filter's status depends on the value of
'vport->vf_vlan_en', which is used to record the VF VLAN filter
status.

In default VLAN mode, 'vport->vf_vlan_en' always be 'true', so
VF VLAN filter will set to be enabled after disabling promisc.

In dynamic VLAN mode, 'vport->vf_vlan_en' lies on whether there
is a non-zero VLAN ID.

Signed-off-by: GuoJia Liao 
Signed-off-by: Huazhong Tan 



.





Re: [patch V6 10/37] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY

2020-05-20 Thread Boris Ostrovsky
On 5/20/20 3:16 PM, Thomas Gleixner wrote:


> +__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs;
> + bool inhcall;
> +
> + idtentry_enter(regs);
> + old_regs = set_irq_regs(regs);
> +
> + run_on_irqstack(__xen_pv_evtchn_do_upcall, NULL, regs);


We need to handle nested case (i.e. !irq_needs_irq_stack(), like in your
original version). Moving get_and_clear_inhcall() up should prevent
scheduling when this happens.


-boris


> +
> + set_irq_regs(old_regs);
> +
> + inhcall = get_and_clear_inhcall();
> + __idtentry_exit(regs, inhcall);
> + restore_inhcall(inhcall);
>  }



Re: [RFC][PATCHES] uaccess-related stuff in net/*

2020-05-20 Thread David Miller
From: Al Viro 
Date: Thu, 21 May 2020 01:36:57 +0100

> On Mon, May 11, 2020 at 05:02:51PM -0700, David Miller wrote:
>> From: Al Viro 
>> Date: Mon, 11 May 2020 05:43:28 +0100
>> 
>> >Assorted uaccess-related work in net/*.  First, there's
>> > getting rid of compat_alloc_user_space() mess in MCAST_...
>> > [gs]etsockopt() - no need to play with copying to/from temporary
>> > object on userland stack, etc., when ->compat_[sg]etsockopt()
>> > instances in question can easly do everything without that.
>> > That's the first 13 patches.  Then there's a trivial bit in
>> > net/batman-adv (completely unrelated to everything else) and
>> > finally getting the atm compat ioctls into simpler shape.
>> > 
>> >Please, review and comment.  Individual patches in followups,
>> > the entire branch (on top of current net/master) is in
>> > git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git #uaccess.net
>> 
>> I have no problems with this series:
>> 
>> Acked-by: David S. Miller 
> 
> OK, rebased on top of current net/master (no conflicts) and pushed out
> to the same branch.  Patches (for net-next) in followups

Looks good, pulled into net-next.


RE: [PATCH 2/2] soundwire: intel: transition to 3 steps initialization

2020-05-20 Thread Liao, Bard
> -Original Message-
> From: Vinod Koul 
> Sent: Wednesday, May 20, 2020 9:54 PM
> To: Bard Liao 
> Cc: alsa-de...@alsa-project.org; linux-kernel@vger.kernel.org; ti...@suse.de;
> broo...@kernel.org; gre...@linuxfoundation.org; j...@cadence.com;
> srinivas.kandaga...@linaro.org; rander.w...@linux.intel.com;
> ranjani.sridha...@linux.intel.com; hui.w...@canonical.com; pierre-
> louis.boss...@linux.intel.com; Kale, Sanyog R ;
> Blauciak, Slawomir ; Lin, Mengdong
> ; Liao, Bard 
> Subject: Re: [PATCH 2/2] soundwire: intel: transition to 3 steps 
> initialization
> 
> On 20-05-20, 03:19, Bard Liao wrote:
> > From: Pierre-Louis Bossart 
> >
> > Rather than a plain-vanilla init/exit, this patch provides 3 steps in
> > the initialization (ACPI scan, probe, startup) which makes it easier to
> > detect platform support for SoundWire, allocate required resources as
> > early as possible, and conversely help make the startup() callback
> > lighter-weight with only hardware register setup.
> 
> Okay but can you add details in changelog on what each step would do?

Sure. Will do.

> 
> > @@ -1134,25 +1142,15 @@ static int intel_probe(struct platform_device
> *pdev)
> >
> > intel_pdi_ch_update(sdw);
> >
> > -   /* Acquire IRQ */
> > -   ret = request_threaded_irq(sdw->link_res->irq,
> > -  sdw_cdns_irq, sdw_cdns_thread,
> > -  IRQF_SHARED, KBUILD_MODNAME, 
> >cdns);
> 
> This is removed here but not added anywhere else, do we have no irq
> after this patch?

We use a single irq for all Intel Audio DSP events and it will
be requested in the SOF driver.

> 
> > @@ -1205,5 +1201,5 @@ static struct platform_driver sdw_intel_drv = {
> >  module_platform_driver(sdw_intel_drv);
> >
> >  MODULE_LICENSE("Dual BSD/GPL");
> > -MODULE_ALIAS("platform:int-sdw");
> > +MODULE_ALIAS("sdw:intel-sdw");
> 
> it is still a platform device, so does sdw: tag make sense?
> This is used by modprobe to load the driver!

Will fix it

> 
> > +/**
> > + * sdw_intel_probe() - SoundWire Intel probe routine
> > + * @res: resource data
> > + *
> > + * This creates SoundWire Master and Slave devices below the controller.
> 
> I dont think the comment is correct, this is done in intel_master_probe
> which is platform device probe...

Thanks. Will fix it.

> 
> > + * All the information necessary is stored in the context, and the res
> > + * argument pointer can be freed after this step.
> > + */
> > +struct sdw_intel_ctx
> > +*sdw_intel_probe(struct sdw_intel_res *res)
> > +{
> > +   return sdw_intel_probe_controller(res);
> > +}
> > +EXPORT_SYMBOL(sdw_intel_probe);
> 
> I guess this would be called by SOF driver, question is when..?

Will document it, thanks.

> 
> > +/**
> > + * sdw_intel_startup() - SoundWire Intel startup
> > + * @ctx: SoundWire context allocated in the probe
> > + *
> > + */
> > +int sdw_intel_startup(struct sdw_intel_ctx *ctx)
> > +{
> > +   return sdw_intel_startup_controller(ctx);
> > +}
> > +EXPORT_SYMBOL(sdw_intel_startup);
> 
> when is this called, pls do document that

Will document it, thanks.

> 
> --
> ~Vinod


Re: [PATCH v2 07/12] i2c: designware: Move Baytrail sem config to the platform if-clause

2020-05-20 Thread Serge Semin
On Wed, May 20, 2020 at 03:16:14PM +0300, Jarkko Nikula wrote:
> On 5/10/20 12:50 PM, Serge Semin wrote:
> > Currently Intel Baytrail I2C semaphore is a feature of the DW APB I2C
> > platform driver. It's a bit confusing to see it's config in the menu at
> > some separated place with no reference to the platform code. Lets move the
> > config definition under the if-I2C_DESIGNWARE_PLATFORM clause. By doing so
> > the config menu will display the feature right below the DW I2C platform
> > driver item and will indent it to the right so signifying its belonging.
> > 
> > Signed-off-by: Serge Semin 
> > Cc: Alexey Malahov 
> > Cc: Thomas Bogendoerfer 
> > Cc: Paul Burton 
> > Cc: Ralf Baechle 
> > Cc: Andy Shevchenko 
> > Cc: Mika Westerberg 
> > Cc: Wolfram Sang 
> > Cc: Rob Herring 
> > Cc: Frank Rowand 
> > Cc: linux-m...@vger.kernel.org
> > Cc: devicet...@vger.kernel.org
> > ---
> >   drivers/i2c/busses/Kconfig | 30 +-
> >   1 file changed, 17 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
> > index 368aa64e9266..ed6927c4c540 100644
> > --- a/drivers/i2c/busses/Kconfig
> > +++ b/drivers/i2c/busses/Kconfig
> > @@ -530,8 +530,8 @@ config I2C_DESIGNWARE_CORE
> >   config I2C_DESIGNWARE_PLATFORM
> > tristate "Synopsys DesignWare Platform"
> > -   select I2C_DESIGNWARE_CORE
> > depends on (ACPI && COMMON_CLK) || !ACPI
> > +   select I2C_DESIGNWARE_CORE
> > help
> >   If you say yes to this option, support will be included for the
> >   Synopsys DesignWare I2C adapter.
> > @@ -539,6 +539,22 @@ config I2C_DESIGNWARE_PLATFORM
> >   This driver can also be built as a module.  If so, the module
> >   will be called i2c-designware-platform.
> > +if I2C_DESIGNWARE_PLATFORM
> > +
> > +config I2C_DESIGNWARE_BAYTRAIL
> > +   bool "Intel Baytrail I2C semaphore support"
> > +   depends on ACPI
> > +   depends on (I2C_DESIGNWARE_PLATFORM=m && IOSF_MBI) || \
> > +  (I2C_DESIGNWARE_PLATFORM=y && IOSF_MBI=y)
> > +   help
> > + This driver enables managed host access to the PMIC I2C bus on select
> > + Intel BayTrail platforms using the X-Powers AXP288 PMIC. It allows
> > + the host to request uninterrupted access to the PMIC's I2C bus from
> > + the platform firmware controlling it. You should say Y if running on
> > + a BayTrail system using the AXP288.
> > +
> > +endif # I2C_DESIGNWARE_PLATFORM
> > +
> 
> Is the added "if I2C_DESIGNWARE_PLATFORM" needed here? Should the "depends
> on" be enough?

The idea was to add if-endif clause here for features possibly added sometime
in future. But using normal "depends on I2C_DESIGNWARE_PLATFORM" shall make
the config depicted as an indented sub-config as well. Would you like me to
remove the if-clause and use the depends on operator instead?

-Sergey

> 
> Jarkko


Re: [PATCH v2 1/1] dt-bindings: spi: Add schema for Cadence QSPI Controller driver

2020-05-20 Thread Ramuthevar, Vadivel MuruganX

Hi Mark,

 Thank you for the review comments...

On 20/5/2020 8:43 pm, Mark Brown wrote:

On Wed, May 20, 2020 at 08:36:12PM +0800, Ramuthevar,Vadivel MuruganX wrote:

From: Ramuthevar Vadivel Murugan 

Add dt-bindings documentation for Cadence-QSPI controller to support
spi based flash memories.

Signed-off-by: Ramuthevar Vadivel Murugan 

---
  .../devicetree/bindings/mtd/cadence-quadspi.txt|  67 ---
  .../devicetree/bindings/spi/cdns,qspi-nor.yaml | 133 +


The changelog says this is adding a new binding but the actual change is
mostly a conversion to YAML.  Please split the additions out into a
separate change, ideally doing that before the conversion since there is
a backlog on review of YAML conversions.


Initially was sending the only YAML file alone, then reviewers suggest 
to me do this way so I did, next by split the patches like below...


1. remove the cadence-quadspi.txt (patch1)
2. convert txt to YAML (patch2)

Regards
Vadivel




[PATCH] MIPS: DTS: Only build subdir of current platform

2020-05-20 Thread Tiezhu Yang
Add config check in Makefile to only build the subdir of current platform.

E.g. without this patch:

  AR  arch/mips/built-in.a
  AR  arch/mips/boot/dts/brcm/built-in.a
  AR  arch/mips/boot/dts/cavium-octeon/built-in.a
  AR  arch/mips/boot/dts/img/built-in.a
  AR  arch/mips/boot/dts/ingenic/built-in.a
  AR  arch/mips/boot/dts/lantiq/built-in.a
  DTC arch/mips/boot/dts/loongson/loongson3_4core_rs780e.dtb
  DTB arch/mips/boot/dts/loongson/loongson3_4core_rs780e.dtb.S
  AS  arch/mips/boot/dts/loongson/loongson3_4core_rs780e.dtb.o
  DTC arch/mips/boot/dts/loongson/loongson3_8core_rs780e.dtb
  DTB arch/mips/boot/dts/loongson/loongson3_8core_rs780e.dtb.S
  AS  arch/mips/boot/dts/loongson/loongson3_8core_rs780e.dtb.o
  AR  arch/mips/boot/dts/loongson/built-in.a
  AR  arch/mips/boot/dts/mscc/built-in.a
  AR  arch/mips/boot/dts/mti/built-in.a
  AR  arch/mips/boot/dts/netlogic/built-in.a
  AR  arch/mips/boot/dts/ni/built-in.a
  AR  arch/mips/boot/dts/pic32/built-in.a
  AR  arch/mips/boot/dts/qca/built-in.a
  AR  arch/mips/boot/dts/ralink/built-in.a
  AR  arch/mips/boot/dts/xilfpga/built-in.a
  AR  arch/mips/boot/dts/built-in.a

With this patch:

  AR  arch/mips/built-in.a
  DTC arch/mips/boot/dts/loongson/loongson3_4core_rs780e.dtb
  DTB arch/mips/boot/dts/loongson/loongson3_4core_rs780e.dtb.S
  AS  arch/mips/boot/dts/loongson/loongson3_4core_rs780e.dtb.o
  DTC arch/mips/boot/dts/loongson/loongson3_8core_rs780e.dtb
  DTB arch/mips/boot/dts/loongson/loongson3_8core_rs780e.dtb.S
  AS  arch/mips/boot/dts/loongson/loongson3_8core_rs780e.dtb.o
  AR  arch/mips/boot/dts/loongson/built-in.a
  AR  arch/mips/boot/dts/built-in.a

Signed-off-by: Tiezhu Yang 
---
 arch/mips/boot/dts/Makefile | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index d429a69..dce32d1 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -1,17 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0
-subdir-y   += brcm
-subdir-y   += cavium-octeon
-subdir-y   += img
-subdir-y   += ingenic
-subdir-y   += lantiq
-subdir-y   += loongson
-subdir-y   += mscc
-subdir-y   += mti
-subdir-y   += netlogic
-subdir-y   += ni
-subdir-y   += pic32
-subdir-y   += qca
-subdir-y   += ralink
-subdir-y   += xilfpga
+subdir-$(CONFIG_BMIPS_GENERIC) += brcm
+subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon
+subdir-$(CONFIG_MACH_PISTACHIO)+= img
+subdir-$(CONFIG_MACH_INGENIC)  += ingenic
+subdir-$(CONFIG_LANTIQ)+= lantiq
+subdir-$(CONFIG_MACH_LOONGSON64)   += loongson
+subdir-$(CONFIG_MSCC_OCELOT)   += mscc
+subdir-$(CONFIG_MIPS_MALTA)+= mti
+subdir-$(CONFIG_NLM_XLP_BOARD) += netlogic
+subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445)+= ni
+subdir-$(CONFIG_MACH_PIC32)+= pic32
+subdir-$(CONFIG_ATH79) += qca
+subdir-$(CONFIG_RALINK)+= ralink
+subdir-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += xilfpga
 
 obj-$(CONFIG_BUILTIN_DTB)  := $(addsuffix /, $(subdir-y))
-- 
2.1.0



Re: [PATCH 0/1] x86/boot: lld fix

2020-05-20 Thread Fangrui Song

On 2020-05-20, Arvind Sankar wrote:

On Wed, May 20, 2020 at 06:56:53PM -0400, Arvind Sankar wrote:

arch/x86/boot/setup.elf currently has an orphan section .text.startup,
and lld git as of ebf14d9b6d8b is breaking on 64-bit due to what seems
to be a change in behavior on orphan section placement (details in patch
commit message).

I'm not sure if this was an intentional change in lld, but it seems like
a good idea to explicitly include .text.startup anyway.

Arvind Sankar (1):
  x86/boot: Add .text.startup to setup.ld

 arch/x86/boot/setup.ld | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--
2.26.2



I found your PATCH 1/1 on https://lkml.org/lkml/2020/5/20/1491 


-   .text   : { *(.text) }
+   .text   : { *(.text.startup) *(.text) }

The LLD behavior change was introduced in
https://reviews.llvm.org/D75225 (will be included in 11.0.0)
It was intended to match GNU ld.

But yes, orphan section placement is still different in the two linkers.

Placing .text.startup before .text seems good.
In GNU ld's internal linker script (ld --verbose),
.text.startup is placed before .text

Reviewed-by: Fangrui Song 


Re: [RFC PATCH 2/2] init: Allow multi-line output of kernel command line

2020-05-20 Thread Joe Perches
On Wed, 2020-05-20 at 18:00 -0700, Andrew Morton wrote:
> On Wed, 20 May 2020 13:36:45 -0700 Joe Perches  wrote:
> 
> > On Wed, 2020-05-20 at 21:10 +0900, Sergey Senozhatsky wrote:
> > > On (20/05/19 21:58), Joe Perches wrote:
> > > [..]
> > > > >  Maybe we can
> > > > > use here something rather random and much shorter instead. E.g.
> > > > > 256 chars. Hmm. How 
> > > > 
> > > > min(some_max like 132/256, PRINTK_LOG_LINE_MAX)
> > > > 
> > > > would work.
> > > 
> > > An alternative approach would be to do what we do in the
> > > print_modules() (the list of modules which can definitely
> > > be longer than 1K chars).
> > > 
> > > We can split command line in a loop - memchr(pos, ' ') - and
> > > pr_cont() parts of the command line. pr_cont() has overflow
> > > control and it flushes cont buffer before it overflows, so
> > > we should not lose anything.
> > 
> > It doesn't matter much here, but I believe
> > there's an 8k max buffer for pr_cont output.
> > 
> > include/linux/printk.h:#define CONSOLE_EXT_LOG_MAX  8192
> > 
> > Anyway, no worries, it simplifies the loop if
> > done that way.
> 
> I'm wondering if we shold add a kernel puts() (putsk()?  yuk) which can
> puts() a string of any length.
> 
> I'm counting around 150 instances of printk("%s", ...) and pr_foo("%s",
> ...) which could perhaps be converted, thus saving an argument.

I'd expect that it hardly matters.
printk(KERN_CONT "string") works.




linux-next: build warning after merge of the v4l-dvb tree

2020-05-20 Thread Stephen Rothwell
Hi all,

After merging the v4l-dvb tree, today's linux-next build (x86_84
allmodconfig) produced this warning:

drivers/staging/media/atomisp/pci/atomisp_v4l2.c:764:12: warning: 
'atomisp_mrfld_power' defined but not used [-Wunused-function]
  764 | static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
  |^~~

Introduced by commit

  95d1f398c4dc ("media: atomisp: keep the ISP powered on when setting it")

-- 
Cheers,
Stephen Rothwell


pgpRYTS_V1FQy.pgp
Description: OpenPGP digital signature


Re: [PATCH bpf] security: Fix hook iteration for secid_to_secctx

2020-05-20 Thread James Morris
On Wed, 20 May 2020, Alexei Starovoitov wrote:

> On Wed, May 20, 2020 at 8:15 AM Casey Schaufler  
> wrote:
> >
> >
> > On 5/20/2020 5:56 AM, KP Singh wrote:
> > > From: KP Singh 
> > >
> > > secid_to_secctx is not stackable, and since the BPF LSM registers this
> > > hook by default, the call_int_hook logic is not suitable which
> > > "bails-on-fail" and casues issues when other LSMs register this hook and
> > > eventually breaks Audit.
> > >
> > > In order to fix this, directly iterate over the security hooks instead
> > > of using call_int_hook as suggested in:
> > >
> > > https: 
> > > //lore.kernel.org/bpf/9d0eb6c6-803a-ff3a-5603-9ad6d9edf...@schaufler-ca.com/#t
> > >
> > > Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks")
> > > Fixes: 625236ba3832 ("security: Fix the default value of secid_to_secctx 
> > > hook"
> > > Reported-by: Alexei Starovoitov 
> > > Signed-off-by: KP Singh 
> >
> > This looks fine.
> 
> Tested. audit works now.
> I fixed missing ')' in the commit log
> and applied to bpf tree.
> It will be on the way to Linus tree soon.

Please add:


Acked-by: James Morris 


-- 
James Morris




Re: [PATCH v2 04/12] i2c: designware: Convert driver to using regmap API

2020-05-20 Thread Serge Semin
On Wed, May 20, 2020 at 03:16:07PM +0300, Jarkko Nikula wrote:
> On 5/10/20 12:50 PM, Serge Semin wrote:
> > Seeing the DW I2C driver is using flags-based accessors with two
> > conditional clauses it would be better to replace them with the regmap
> > API IO methods and to initialize the regmap object with read/write
> > callbacks specific to the controller registers map implementation. This
> > will be also handy for the drivers with non-standard registers mapping
> > (like an embedded into the Baikal-T1 System Controller DW I2C block, which
> > glue-driver is a part of this series).
> > 
> > As before the driver tries to detect the mapping setup at probe stage and
> > creates a regmap object accordingly, which will be used by the rest of the
> > code to correctly access the controller registers. In two places it was
> > appropriate to convert the hand-written read-modify-write and
> > read-poll-loop design patterns to the corresponding regmap API
> > ready-to-use methods.
> > 
> > Note the regmap IO methods return value is checked only at the probe
> > stage. The rest of the code won't do this because basically we have
> > MMIO-based regmap so non of the read/write methods can fail (this also
> > won't be needed for the Baikal-T1-specific I2C controller).
> > 
> > Suggested-by: Andy Shevchenko 
> > Signed-off-by: Serge Semin 
> > Cc: Alexey Malahov 
> > Cc: Thomas Bogendoerfer 
> > Cc: Paul Burton 
> > Cc: Ralf Baechle 
> > Cc: Wolfram Sang 
> > Cc: Rob Herring 
> > Cc: Frank Rowand 
> > Cc: devicet...@vger.kernel.org
> > Cc: linux-m...@vger.kernel.org
> > ---
> >   drivers/i2c/busses/Kconfig |   1 +
> >   drivers/i2c/busses/i2c-designware-common.c | 171 +++--
> >   drivers/i2c/busses/i2c-designware-core.h   |  18 +--
> >   drivers/i2c/busses/i2c-designware-master.c | 125 ---
> >   drivers/i2c/busses/i2c-designware-slave.c  |  77 +-
> >   5 files changed, 239 insertions(+), 153 deletions(-)
> > 
> Looking at patches 4/12-12/12 I think it would be good to move fixes and
> less invasive patches before this. Like
> 
> i2c: designware: slave: Set DW I2C core module dependency
> i2c: designware: Use `-y` to build multi-object modules
> i2c: designware: Move Baytrail sem config to the platform if-clause
> 
> That said, you may add:
> 
> Tested-by: Jarkko Nikula 
> Acked-by: Jarkko Nikula 

Ok. I'll move those three patches to be before this one in v3. Thanks.

-Sergey


Re: [PATCH v4 3/9] usb: dwc3: Increase timeout for CmdAct cleared by device controller

2020-05-20 Thread Thinh Nguyen
Thinh Nguyen wrote:
> Jun Li wrote:
>> Hi
>>
>>> -Original Message-
>>> From: Thinh Nguyen 
>>> Sent: 2020年5月19日 14:46
>>> To: Jun Li ; Felipe Balbi ; Jun Li
>>> 
>>> Cc: John Stultz ; lkml 
>>> ; Yu
>>> Chen ; Greg Kroah-Hartman 
>>> ; Rob
>>> Herring ; Mark Rutland ; ShuFan 
>>> Lee
>>> ; Heikki Krogerus ;
>>> Suzuki K Poulose ; Chunfeng Yun
>>> ; Hans de Goede ; Andy 
>>> Shevchenko
>>> ; Valentin Schneider 
>>> ;
>>> Jack Pham ; Linux USB List 
>>> ; open
>>> list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS 
>>> ;
>>> Peter Chen 
>>> Subject: Re: [PATCH v4 3/9] usb: dwc3: Increase timeout for CmdAct cleared 
>>> by device
>>> controller
>>>
>>> Thinh Nguyen wrote:
 Jun Li wrote:
>> -Original Message-
>> From: Felipe Balbi  On Behalf Of Felipe Balbi
>> Sent: 2020年5月16日 19:57
>> To: Jun Li ; Thinh Nguyen
>> ; Jun Li 
>> Cc: John Stultz ; lkml
>> ; Yu Chen ; Greg
>> Kroah-Hartman ; Rob Herring
>> ; Mark Rutland ; ShuFan
>> Lee ; Heikki Krogerus
>> ;
>> Suzuki K Poulose ; Chunfeng Yun
>> ; Hans de Goede ;
>> Andy Shevchenko ; Valentin Schneider
>> ; Jack Pham ;
>> Linux USB List ; open list:OPEN FIRMWARE
>> AND FLATTENED DEVICE TREE BINDINGS ;
>> Peter Chen ; Thinh Nguyen
>> 
>> Subject: RE: [PATCH v4 3/9] usb: dwc3: Increase timeout for CmdAct
>> cleared by device controller
>>
>>
>> Hi,
>>
>> Jun Li  writes:
>> Hi Thinh, could you comment this?
> You only need to wake up the usb2 phy when issuing the command
> while running in highspeed or below. If you're running in SS or
> higher, internally the controller does it for you for usb3 phy.
> In Jun's case, it seems like it takes longer for his phy to wake up.
>
> IMO, in this case, I think it's fine to increase the command timeout.
 Is there an upper limit to this? Is 32k clock the slowest that can
 be fed to the PHY as a suspend clock?
>>> Yes, 32K clock is the slowest, Per DWC3 document on Power Down
>>> Scale (bits 31:19 of GCTL):
>>>
>>> "Power Down Scale (PwrDnScale)
>>> The USB3 suspend_clk input replaces pipe3_rx_pclk as a clock source
>>> to a small part of the USB3 controller that operates when the SS
>>> PHY is in its lowest power (P3) state, and therefore does not provide a 
>>> clock.
>>> The Power Down Scale field specifies how many suspend_clk periods
>>> fit into a 16 kHz clock period. When performing the division, round
>>> up the remainder.
>>> For example, when using an 8-bit/16-bit/32-bit PHY and 25-MHz
>>> Suspend clock, Power Down Scale = 25000 kHz/16 kHz = 13'd1563
>>> (rounder up)
>>> Note:
>>> - Minimum Suspend clock frequency is 32 kHz
>>> - Maximum Suspend clock frequency is 125 MHz"
>> Cool, now do we have an upper bound for how many clock cycles it
>> takes to wake up the PHY?
> My understanding is this ep command does not wake up the SS PHY, the
> SS PHY still stays at P3 when execute this ep command. The time
> required here is to wait controller complete something for this ep
> command with 32K clock.
 Sorry I made a mistake. You're right. Just checked with one of the RTL
 engineers, and it doesn't need to wake up the phy. However, if it is
 eSS speed, it may take longer time as the command may be completing
 with the suspend clock.

>>> What's the value for GCTL[7:6]?
>> 2'b00
>>
>> Thanks
>> Li Jun
> (Sorry for the delay reply)
>
> If it's 0, then the ram clock should be the same as the bus_clk, which
> is odd since you mentioned that the suspend_clk is used instead while in P3.

Just checked with the RTL engineer, even if GCTL[7:6] is set to 0, 
internally it can still run with suspend clock during P3.

> Anyway, I was looking for a way maybe to improve the speed during
> issuing a command. One way is to set GUSB3PIPECTL[17]=0, and it should
> wakeup the phy anytime. I think Felipe suggested it. It's odd that it
> doesn't work for you. I don't have other ideas beside increasing the
> command timeout.
>

In any case, increasing the timeout should be fine with me. It maybe 
difficult to determine the max timeout base on the slowest clock rate 
and number of cycles. Different controller and controller versions 
behave differently and may have different number of clock cycles to 
complete a command.

The RTL engineer recommended timeout to be at least 1ms (which maybe 
more than the polling rate of this patch). I'm fine with either the rate 
provided by this tested patch or higher.

BR,
Thinh


[PATCH] [PATCH]Fixed: line break of pr_cont not take effect in linux-rt

2020-05-20 Thread root
From: 汪勇10269566 

Line break of pr_cont not take effect.
Use several pr_cont to print continuous paragraph, it is expected to
have line break when line ends up with  '\n', however the paragraph
does not have line break
-printk_kthread_func will not print info before log_store insert msg
 into printk_rb, and pr_cont calls cont_add to keep data in buffer.
 cont_add only when the following conditions are met insert msg to
 printk_rb
 1.cpu != c->cpu_owner || !(flags & LOG_CONT)
 2.c->len + len > sizeof(c->buf)

Signed-off-by: 汪勇10269566 
---
 kernel/printk/printk.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0605a74..d898f50 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1871,6 +1871,7 @@ static void cont_add(int ctx, int cpu, u32 caller_id, int 
facility, int level,
// but later continuations can add a newline.
if (flags & LOG_NEWLINE) {
c->flags |= LOG_NEWLINE;
+   cont_flush(ctx);
}
 }
 
-- 
2.15.2



[PATCH v3] /dev/mem: Revoke mappings when a driver claims the region

2020-05-20 Thread Dan Williams
Close the hole of holding a mapping over kernel driver takeover event of
a given address range.

Commit 90a545e98126 ("restrict /dev/mem to idle io memory ranges")
introduced CONFIG_IO_STRICT_DEVMEM with the goal of protecting the
kernel against scenarios where a /dev/mem user tramples memory that a
kernel driver owns. However, this protection only prevents *new* read(),
write() and mmap() requests. Established mappings prior to the driver
calling request_mem_region() are left alone.

Especially with persistent memory, and the core kernel metadata that is
stored there, there are plentiful scenarios for a /dev/mem user to
violate the expectations of the driver and cause amplified damage.

Teach request_mem_region() to find and shoot down active /dev/mem
mappings that it believes it has successfully claimed for the exclusive
use of the driver. Effectively a driver call to request_mem_region()
becomes a hole-punch on the /dev/mem device.

The typical usage of unmap_mapping_range() is part of
truncate_pagecache() to punch a hole in a file, but in this case the
implementation is only doing the "first half" of a hole punch. Namely it
is just evacuating current established mappings of the "hole", and it
relies on the fact that /dev/mem establishes mappings in terms of
absolute physical address offsets. Once existing mmap users are
invalidated they can attempt to re-establish the mapping, or attempt to
continue issuing read(2) / write(2) to the invalidated extent, but they
will then be subject to the CONFIG_IO_STRICT_DEVMEM checking that can
block those subsequent accesses.

Cc: Arnd Bergmann 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Matthew Wilcox 
Cc: Russell King 
Cc: Andrew Morton 
Cc: Greg Kroah-Hartman 
Fixes: 90a545e98126 ("restrict /dev/mem to idle io memory ranges")
Signed-off-by: Dan Williams 
---
Changes since v2 [1]:

- Fix smp_wmb() placement relative to publishing write (Matthew)

[1]: 
http://lore.kernel.org/r/158987153989.484.17143582803685077783.st...@dwillia2-desk3.amr.corp.intel.com

 drivers/char/mem.c |  104 +++-
 include/linux/ioport.h |6 +++
 include/uapi/linux/magic.h |1 
 kernel/resource.c  |5 ++
 4 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 43dd0891ca1e..46bea7a25983 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -31,11 +31,15 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #ifdef CONFIG_IA64
 # include 
 #endif
 
+#define DEVMEM_MINOR   1
 #define DEVPORT_MINOR  4
 
 static inline unsigned long size_inside_page(unsigned long start,
@@ -805,12 +809,66 @@ static loff_t memory_lseek(struct file *file, loff_t 
offset, int orig)
return ret;
 }
 
+static struct inode *devmem_inode;
+
+#ifdef CONFIG_IO_STRICT_DEVMEM
+void revoke_devmem(struct resource *res)
+{
+   struct inode *inode = READ_ONCE(devmem_inode);
+
+   /*
+* Check that the initialization has completed. Losing the race
+* is ok because it means drivers are claiming resources before
+* the fs_initcall level of init and prevent /dev/mem from
+* establishing mappings.
+*/
+   smp_rmb();
+   if (!inode)
+   return;
+
+   /*
+* The expectation is that the driver has successfully marked
+* the resource busy by this point, so devmem_is_allowed()
+* should start returning false, however for performance this
+* does not iterate the entire resource range.
+*/
+   if (devmem_is_allowed(PHYS_PFN(res->start)) &&
+   devmem_is_allowed(PHYS_PFN(res->end))) {
+   /*
+* *cringe* iomem=relaxed says "go ahead, what's the
+* worst that can happen?"
+*/
+   return;
+   }
+
+   unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 
1);
+}
+#endif
+
 static int open_port(struct inode *inode, struct file *filp)
 {
+   int rc;
+
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
 
-   return security_locked_down(LOCKDOWN_DEV_MEM);
+   rc = security_locked_down(LOCKDOWN_DEV_MEM);
+   if (rc)
+   return rc;
+
+   if (iminor(inode) != DEVMEM_MINOR)
+   return 0;
+
+   /*
+* Use a unified address space to have a single point to manage
+* revocations when drivers want to take over a /dev/mem mapped
+* range.
+*/
+   inode->i_mapping = devmem_inode->i_mapping;
+   inode->i_mapping->host = devmem_inode;
+   filp->f_mapping = inode->i_mapping;
+
+   return 0;
 }
 
 #define zero_lseek null_lseek
@@ -885,7 +943,7 @@ static const struct memdev {
fmode_t fmode;
 } devlist[] = {
 #ifdef CONFIG_DEVMEM
-[1] = { "mem", 0, _fops, FMODE_UNSIGNED_OFFSET },
+[DEVMEM_MINOR] = { "mem", 0, _fops, FMODE_UNSIGNED_OFFSET },
 #endif
 #ifdef 

Re: [PATCH v2 5/6] dmaengine: dw: Introduce max burst length hw config

2020-05-20 Thread Serge Semin
On Tue, May 19, 2020 at 10:37:14PM +0530, Vinod Koul wrote:
> On 17-05-20, 22:38, Serge Semin wrote:
> > On Fri, May 15, 2020 at 12:09:50PM +0530, Vinod Koul wrote:
> > > On 12-05-20, 22:12, Andy Shevchenko wrote:
> > > > On Tue, May 12, 2020 at 05:08:20PM +0300, Serge Semin wrote:
> > > > > On Fri, May 08, 2020 at 02:41:53PM +0300, Andy Shevchenko wrote:
> > > > > > On Fri, May 08, 2020 at 01:53:03PM +0300, Serge Semin wrote:

[nip]

> > > > > > But let's see what we can do better. Since maximum is defined on 
> > > > > > the slave side
> > > > > > device, it probably needs to define minimum as well, otherwise it's 
> > > > > > possible
> > > > > > that some hardware can't cope underrun bursts.
> > > > > 
> > > > > There is no need to define minimum if such limit doesn't exists 
> > > > > except a
> > > > > natural 1. Moreover it doesn't exist for all DMA controllers seeing 
> > > > > noone has
> > > > > added such capability into the generic DMA subsystem so far.
> > > > 
> > > > There is a contract between provider and consumer about DMA resource. 
> > > > That's
> > > > why both sides should participate in fulfilling it. Theoretically it 
> > > > may be a
> > > > hardware that doesn't support minimum burst available in DMA by a 
> > > > reason. For
> > > > such we would need minimum to be provided as well.
> > > 
> > > Agreed and if required caps should be extended to tell consumer the
> > > minimum values supported.
> > 
> > Sorry, it's not required by our hardware. Is there any, which actually has 
> > such
> > limitation? (minimum burst length)
> 
> IIUC the idea is that you will tell maximum and minimum values supported
> and client can pick the best value. Esp in case of slave transfers
> things like burst, msize are governed by client capability and usage. So
> exposing the set to pick from would make sense

Agreed. I'll add min_burst capability.

-Sergey

> 
> -- 
> ~Vinod


Re: [PATCH v5 1/4] rcu/kasan: record and print call_rcu() call stack

2020-05-20 Thread Walter Wu
> On Wed, May 20, 2020 at 2:34 PM Walter Wu  wrote:
> >
> > This feature will record the last two call_rcu() call stacks and
> > prints up to 2 call_rcu() call stacks in KASAN report.
> >
> > When call_rcu() is called, we store the call_rcu() call stack into
> > slub alloc meta-data, so that the KASAN report can print rcu stack.
> >
> > [1]https://bugzilla.kernel.org/show_bug.cgi?id=198437
> > [2]https://groups.google.com/forum/#!searchin/kasan-dev/better$20stack$20traces$20for$20rcu%7Csort:date/kasan-dev/KQsjT_88hDE/7rNUZprRBgAJ
> >
> > Signed-off-by: Walter Wu 
> > Suggested-by: Dmitry Vyukov 
> > Acked-by: Paul E. McKenney 
> > Cc: Andrey Ryabinin 
> > Cc: Dmitry Vyukov 
> > Cc: Alexander Potapenko 
> > Cc: Andrew Morton 
> > Cc: Josh Triplett 
> > Cc: Mathieu Desnoyers 
> > Cc: Lai Jiangshan 
> > Cc: Joel Fernandes 
> > Cc: Andrey Konovalov 
> > ---
> >  include/linux/kasan.h |  2 ++
> >  kernel/rcu/tree.c |  2 ++
> >  mm/kasan/common.c |  4 ++--
> >  mm/kasan/generic.c| 21 +
> >  mm/kasan/kasan.h  | 10 ++
> >  mm/kasan/report.c | 24 
> >  6 files changed, 61 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/kasan.h b/include/linux/kasan.h
> > index 31314ca7c635..23b7ee00572d 100644
> > --- a/include/linux/kasan.h
> > +++ b/include/linux/kasan.h
> > @@ -174,11 +174,13 @@ static inline size_t kasan_metadata_size(struct 
> > kmem_cache *cache) { return 0; }
> >
> >  void kasan_cache_shrink(struct kmem_cache *cache);
> >  void kasan_cache_shutdown(struct kmem_cache *cache);
> > +void kasan_record_aux_stack(void *ptr);
> >
> >  #else /* CONFIG_KASAN_GENERIC */
> >
> >  static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
> >  static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
> > +static inline void kasan_record_aux_stack(void *ptr) {}
> >
> >  #endif /* CONFIG_KASAN_GENERIC */
> >
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 06548e2ebb72..36a4ff7f320b 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -57,6 +57,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include "../time/tick-internal.h"
> >
> >  #include "tree.h"
> > @@ -2668,6 +2669,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
> > head->func = func;
> > head->next = NULL;
> > local_irq_save(flags);
> > +   kasan_record_aux_stack(head);
> > rdp = this_cpu_ptr(_data);
> >
> > /* Add the callback to our list. */
> > diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> > index 2906358e42f0..8bc618289bb1 100644
> > --- a/mm/kasan/common.c
> > +++ b/mm/kasan/common.c
> > @@ -41,7 +41,7 @@
> >  #include "kasan.h"
> >  #include "../slab.h"
> >
> > -static inline depot_stack_handle_t save_stack(gfp_t flags)
> > +depot_stack_handle_t kasan_save_stack(gfp_t flags)
> >  {
> > unsigned long entries[KASAN_STACK_DEPTH];
> > unsigned int nr_entries;
> > @@ -54,7 +54,7 @@ static inline depot_stack_handle_t save_stack(gfp_t flags)
> >  static inline void set_track(struct kasan_track *track, gfp_t flags)
> >  {
> > track->pid = current->pid;
> > -   track->stack = save_stack(flags);
> > +   track->stack = kasan_save_stack(flags);
> >  }
> >
> >  void kasan_enable_current(void)
> > diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> > index 56ff8885fe2e..8acf48882ba2 100644
> > --- a/mm/kasan/generic.c
> > +++ b/mm/kasan/generic.c
> > @@ -325,3 +325,24 @@ DEFINE_ASAN_SET_SHADOW(f2);
> >  DEFINE_ASAN_SET_SHADOW(f3);
> >  DEFINE_ASAN_SET_SHADOW(f5);
> >  DEFINE_ASAN_SET_SHADOW(f8);
> > +
> > +void kasan_record_aux_stack(void *addr)
> > +{
> > +   struct page *page = kasan_addr_to_page(addr);
> > +   struct kmem_cache *cache;
> > +   struct kasan_alloc_meta *alloc_info;
> > +   void *object;
> > +
> > +   if (!(page && PageSlab(page)))
> > +   return;
> > +
> > +   cache = page->slab_cache;
> > +   object = nearest_obj(cache, page, addr);
> > +   alloc_info = get_alloc_info(cache, object);
> > +
> > +   /*
> > +* record the last two call_rcu() call stacks.
> > +*/
> > +   alloc_info->aux_stack[1] = alloc_info->aux_stack[0];
> > +   alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
> > +}
> > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> > index e8f37199d885..a7391bc83070 100644
> > --- a/mm/kasan/kasan.h
> > +++ b/mm/kasan/kasan.h
> > @@ -104,7 +104,15 @@ struct kasan_track {
> >
> >  struct kasan_alloc_meta {
> > struct kasan_track alloc_track;
> > +#ifdef CONFIG_KASAN_GENERIC
> > +   /*
> > +* call_rcu() call stack is stored into struct kasan_alloc_meta.
> > +* The free stack is stored into struct kasan_free_meta.
> > +*/
> > +   depot_stack_handle_t aux_stack[2];
> > +#else
> > struct kasan_track free_track[KASAN_NR_FREE_STACKS];
> > +#endif
> >  #ifdef 

[PATCH] mm/compaction: Fix the incorrect hole in fast_isolate_freepages()

2020-05-20 Thread Baoquan He
Qian reported that a crash happened in compaction.
http://lkml.kernel.org/r/8c537eb7-85ee-4dcf-943e-3cc0ed0df...@lca.pw

LTP: starting swapping01 (swapping01 -i 5)
page:eaaa refcount:1 mapcount:0 mapping:2243743b index:0x0
flags: 0x1fffe01000(reserved)
raw: 001fffe01000 eaaa0008 eaaa0008 
raw:   0001 
page dumped because: VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn))
page_owner info is not present (never set?)
[ cut here ]
kernel BUG at mm/page_alloc.c:533!
...
CPU: 17 PID: 218 Comm: kcompactd0 Not tainted 5.7.0-rc2-next-20200423+ #7
...
RIP: 0010:set_pfnblock_flags_mask+0x150/0x210
...
RSP: 0018:c900042ff858 EFLAGS: 00010282
RAX:  RBX: 0001 RCX: 9a002382
RDX:  RSI: 0008 RDI: 8884535b8e6c
RBP: c900042ff8b8 R08: ed108a6b8459 R09: ed108a6b8459
R10: 8884535c22c7 R11: ed108a6b8458 R12: 0002a800
R13: eaaa R14: 88847fff3000 R15: 88847fff3040
FS:  () GS:88845358() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fd1eb4a1000 CR3: 00083154c000 CR4: 003406e0
Call Trace:
 isolate_freepages+0xb20/0x1140
 ? isolate_freepages_block+0x730/0x730
 ? mark_held_locks+0x34/0xb0
 ? free_unref_page+0x7d/0x90
 ? free_unref_page+0x7d/0x90
 ? check_flags.part.28+0x86/0x220
 compaction_alloc+0xdd/0x100
 migrate_pages+0x304/0x17e0
 ? __ClearPageMovable+0x100/0x100
 ? isolate_freepages+0x1140/0x1140
 compact_zone+0x1249/0x1e90
 ? compaction_suitable+0x260/0x260
 kcompactd_do_work+0x231/0x650
 ? sysfs_compact_node+0x80/0x80
 ? finish_wait+0xe6/0x110
 kcompactd+0x162/0x490
 ? kcompactd_do_work+0x650/0x650
 ? finish_wait+0x110/0x110
 ? __kasan_check_read+0x11/0x20
 ? __kthread_parkme+0xd4/0xf0
 ? kcompactd_do_work+0x650/0x650
 kthread+0x1f7/0x220
 ? kthread_create_worker_on_cpu+0xc0/0xc0
 ret_from_fork+0x27/0x50

After investigation, it turns out that this is introduced by commit of
linux-next: commit f6edbdb71877 ("mm: memmap_init: iterate over memblock
regions rather that check each PFN").

After investigation, it turns out that this is introduced by commit of
linux-next, the patch subject is:
  "mm: memmap_init: iterate over memblock regions rather that check each PFN".

Qian added debugging code. The debugging log shows that the fault page is
0x2a80. From the system e820 map which is pasted at bottom, the page
is in e820 reserved range:
BIOS-e820: [mem 0x29ffe000-0x2a80afff] reserved
And it's in section [0x2800, 0x2fff]. In that secion, there are
several usable ranges and some e820 reserved ranges.

For this kind of e820 reserved range, it won't be added to memblock allocator.
However, init_unavailable_mem() will initialize to add them into node 0,
zone 0. Before that commit, later, memmap_init() will add e820 reserved
ranges into the zone where they are contained, because it can pass
the checking of early_pfn_valid() and early_pfn_in_nid(). In this case,
the e820 reserved range where fault page 0x2a80 is located is added
into DMA32 zone. After that commit, the e820 reserved rgions are kept
in node 0, zone 0, since we iterate over memblock regions to iniatialize
in memmap_init() instead, their node and zone won't be changed.

Now, fast_isolate_freepages() will use min mark directly as the migration
target if no page is found from buddy. However, the min mark is not checked
carefully, it could be in e820 reserved range, and trigger the
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn)) when try to mark it
as skip.

Here, let's call pageblock_pfn_to_page() to get page of min_pfn, since it
will do careful checks and return NULL if the pfn is not qualified.

[0.00] BIOS-provided physical RAM map:
[0.00] BIOS-e820: [mem 0x-0x0008bfff] usable
[0.00] BIOS-e820: [mem 0x0008c000-0x0009] reserved
[0.00] BIOS-e820: [mem 0x000e-0x000f] reserved
[0.00] BIOS-e820: [mem 0x0010-0x28328fff] usable
[0.00] BIOS-e820: [mem 0x28329000-0x28568fff] reserved
[0.00] BIOS-e820: [mem 0x28569000-0x28d85fff] usable
[0.00] BIOS-e820: [mem 0x28d86000-0x28ee5fff] reserved
[0.00] BIOS-e820: [mem 0x28ee6000-0x29a04fff] usable
[0.00] BIOS-e820: [mem 0x29a05000-0x29a08fff] reserved
[0.00] BIOS-e820: [mem 0x29a09000-0x29ee4fff] usable
[0.00] BIOS-e820: [mem 0x29ee5000-0x29ef2fff] ACPI data
[0.00] BIOS-e820: [mem 0x29ef3000-0x29f22fff] usable
[0.00] BIOS-e820: [mem 0x29f23000-0x29f23fff] ACPI data
[0.00] BIOS-e820: [mem 

Re: [PATCH v2 4/6] dmaengine: dw: Print warning if multi-block is unsupported

2020-05-20 Thread Serge Semin
On Tue, May 19, 2020 at 10:32:46PM +0530, Vinod Koul wrote:
> On 17-05-20, 22:23, Serge Semin wrote:
> > On Fri, May 15, 2020 at 12:00:39PM +0530, Vinod Koul wrote:
> > > Hi Serge,
> > > 
> > > On 12-05-20, 15:42, Serge Semin wrote:
> > > > Vinod,
> > > > 
> > > > Could you join the discussion for a little bit?
> > > > 
> > > > In order to properly fix the problem discussed in this topic, we need to
> > > > introduce an additional capability exported by DMA channel handlers on 
> > > > per-channel
> > > > basis. It must be a number, which would indicate an upper limitation of 
> > > > the SG list
> > > > entries amount.
> > > > Something like this would do it:
> > > > struct dma_slave_caps {
> > > > ...
> > > > unsigned int max_sg_nents;
> > > > ...
> > > 
> > > Looking at the discussion, I agree we should can this up in the
> > > interface. The max_dma_len suggests the length of a descriptor allowed,
> > > it does not convey the sg_nents supported which in the case of nollp is
> > > one.
> > > 
> > > Btw is this is a real hardware issue, I have found that value of such
> > > hardware is very less and people did fix it up in subsequent revs to add
> > > llp support.
> > 
> > Yes, it is. My DW DMAC doesn't support LLP and there isn't going to be new 
> > SoC
> > version produced.(
> 
> Ouch
> 
> > > Also, another question is why this cannot be handled in driver, I agree
> > > your hardware does not support llp but that does not stop you from
> > > breaking a multi_sg list into N hardware descriptors and keep submitting
> > > them (for this to work submission should be done in isr and not in bh,
> > > unfortunately very few driver take that route).
> > 
> > Current DW DMA driver does that, but this isn't enough. The problem is that
> > in order to fix the issue in the DMA hardware driver we need to introduce
> > an inter-dependent channels abstraction and synchronously feed both Tx and
> > Rx DMA channels with hardware descriptors (LLP entries) one-by-one. This 
> > hardly
> > needed by any slave device driver rather than SPI, which Tx and Rx buffers 
> > are
> > inter-dependent. So Andy's idea was to move the fix to the SPI driver (feed
> > the DMA engine channels with Tx and Rx data buffers synchronously), but DMA
> > engine would provide an info whether such fix is required. This can be
> > determined by the maximum SG entries capability.
> 
> Okay but having the sw limitation removed would also be a good idea, you
> can handle any user, I will leave it upto you, either way is okay
> 
> > 
> > (Note max_sg_ents isn't a limitation on the number of SG entries supported 
> > by
> > the DMA driver, but the number of SG entries handled by the DMA engine in a
> > single DMA transaction.)
> > 
> > > TBH the max_sg_nents or
> > > max_dma_len are HW restrictions and SW *can* deal with then :-)
> > 
> > Yes, it can, but it only works for the cases when individual DMA channels 
> > are
> > utilized. DMA hardware driver doesn't know that the target and source slave
> > device buffers (SPI Tx and Rx FIFOs) are inter-dependent, that writing to 
> > one
> > you will implicitly push data to another. So due to the interrupts handling
> > latency Tx DMA channel is restarted faster than Rx DMA channel is 
> > reinitialized.
> > This causes the SPI Rx FIFO overflow and data loss.
> > 
> > > 
> > > In an idea world, you should break the sw descriptor submitted into N hw
> > > descriptors and submit to hardware and let user know when the sw
> > > descriptor is completed. Of course we do not do that :(
> > 
> > Well, the current Dw DMA driver does that. But due to the two slave device
> > buffers inter-dependency this isn't enough to perform safe DMA transactions.
> > Due to the interrupts handling latency Tx DMA channel pushes data to the 
> > slave
> > device buffer faster than Rx DMA channel starts to handle incoming data. 
> > This
> > causes the SPI Rx FIFO overflow.
> > 
> > > 
> > > > };
> > > > As Andy suggested it's value should be interpreted as:
> > > > 0  - unlimited number of entries,
> > > > 1:MAX_UINT - actual limit to the number of entries.
> > > 
> > 
> > > Hmm why 0, why not MAX_UINT for unlimited?
> > 
> > 0 is much better for many reasons. First of all MAX_UINT is a lot, but it's
> > still a number. On x64 platform this might be actual limit if for instance
> > the block-size register is 32-bits wide. Secondly interpreting 0 as 
> > unlimited
> > number of entries would be more suitable since most of the drivers support
> > LLP functionality and we wouldn't need to update their code to set MAX_UINT.
> > Thirdly DMA engines, which don't support LLPs would need to set this 
> > parameter
> > as 1. So if we do as you say and interpret unlimited number of LLPs as 
> > MAX_UINT,
> > then 0 would left unused.
> > 
> > To sum up I also think that using 0 as unlimited number SG entries 
> > supported is
> > much better.
> 
> ok
> 
> > > > In addition to that seeing the dma_get_slave_caps() method 

Re: [PATCH net-next 1/2] net: hns3: adds support for dynamic VLAN mode

2020-05-20 Thread Jakub Kicinski
On Thu, 21 May 2020 09:33:14 +0800 tanhuazhong wrote:
> On 2020/5/21 5:06, Jakub Kicinski wrote:
> > On Wed, 20 May 2020 09:20:12 +0800 Huazhong Tan wrote:  
> >> From: GuoJia Liao 
> >>
> >> There is a scenario which needs vNICs enable the VLAN filter
> >> in access port, while disable the VLAN filter in trunk port.
> >> Access port and trunk port can switch according to the user's
> >> configuration.
> >>
> >> This patch adds support for the dynamic VLAN mode. then the
> >> HNS3 driver can support two VLAN modes: default VLAN mode and
> >> dynamic VLAN mode. User can switch the mode through the
> >> configuration file.  
> > 
> > What configuration file? Sounds like you're reimplementing trusted
> > VFs (ndo_set_vf_trust).
> >   
> 
> Hi, Jakub.
> 
> Maybe this configuration file here is a little misleading,
> this VLAN mode is decided by the firmware, the driver will
> query the VLAN mode from firmware during  intializing.

And the FW got that configuration from?

> I will modified this description in V2. BTW, is there any
> other suggestion about this patch?

The other suggestion was to trusted vf. What's the difference between
trusted VF and "dynamic VLAN mode"?

> >> In default VLAN mode, port based VLAN filter and VF VLAN
> >> filter should always be enabled.
> >>
> >> In dynamic VLAN mode, port based VLAN filter is disabled, and
> >> VF VLAN filter is disabled defaultly, and should be enabled
> >> when there is a non-zero VLAN ID. In addition, VF VLAN filter
> >> is enabled if PVID is enabled for vNIC.
> >>
> >> When enable promisc, VLAN filter should be disabled. When disable
> >> promisc, VLAN filter's status depends on the value of
> >> 'vport->vf_vlan_en', which is used to record the VF VLAN filter
> >> status.
> >>
> >> In default VLAN mode, 'vport->vf_vlan_en' always be 'true', so
> >> VF VLAN filter will set to be enabled after disabling promisc.
> >>
> >> In dynamic VLAN mode, 'vport->vf_vlan_en' lies on whether there
> >> is a non-zero VLAN ID.
> >>
> >> Signed-off-by: GuoJia Liao 
> >> Signed-off-by: Huazhong Tan   



Re: [PATCH v2 2/2] [media] mtk-mdp: use pm_runtime in MDP component driver

2020-05-20 Thread Eizan Miyamoto
On Thu, May 7, 2020 at 3:07 AM Enric Balletbo Serra  wrote:
>
> Hi Eizan,
>
> Thank you for the patch.
>
> Missatge de Eizan Miyamoto  del dia dc., 6 de maig
> 2020 a les 10:42:
> >
> > Without this change, the MDP components are not fully integrated into
> > the runtime power management subsystem, and the MDP driver does not
> > work.
> >
> > For each of the component device drivers to be able to call
> > pm_runtime_get/put_sync() a pointer to the component's device struct
> > had to be added to struct mtk_mdp_comp, set by mtk_mdp_comp_init().
> >
> > Note that the dev argument to mtk_mdp_comp_clock_on/off() has been
> > removed. Those functions used to be called from the "master" mdp driver
> > in mtk_mdp_core.c, but the component's device pointer no longer
> > corresponds to the mdp master device pointer, which is not the right
> > device to pass to pm_runtime_put/get_sync() which we had to add to get
> > the driver to work properly.
> >
> > Signed-off-by: Eizan Miyamoto 
> > ---
> >
> > Changes in v2:
>
> Ah, I guess this change log corresponds to the first patch.
>
> > - remove empty mtk_mdp_comp_init
> > - update documentation for enum mtk_mdp_comp_type
> > - remove comma after last element of mtk_mdp_comp_driver_dt_match
> >
> >  drivers/media/platform/mtk-mdp/mtk_mdp_comp.c | 22 ++-
> >  drivers/media/platform/mtk-mdp/mtk_mdp_comp.h |  6 +++--
> >  drivers/media/platform/mtk-mdp/mtk_mdp_core.c |  6 ++---
> >  3 files changed, 23 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/media/platform/mtk-mdp/mtk_mdp_comp.c 
> > b/drivers/media/platform/mtk-mdp/mtk_mdp_comp.c
> > index 5b4d482df778..228c58f92c8c 100644
> > --- a/drivers/media/platform/mtk-mdp/mtk_mdp_comp.c
> > +++ b/drivers/media/platform/mtk-mdp/mtk_mdp_comp.c
> > @@ -15,6 +15,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include "mtk_mdp_comp.h"
> >  #include "mtk_mdp_core.h"
> > @@ -53,7 +54,7 @@ static const struct of_device_id 
> > mtk_mdp_comp_driver_dt_match[] = {
> >  };
> >  MODULE_DEVICE_TABLE(of, mtk_mdp_comp_driver_dt_match);
> >
> > -void mtk_mdp_comp_clock_on(struct device *dev, struct mtk_mdp_comp *comp)
> > +void mtk_mdp_comp_clock_on(struct mtk_mdp_comp *comp)
> >  {
> > int i, err;
> >
> > @@ -62,25 +63,31 @@ void mtk_mdp_comp_clock_on(struct device *dev, struct 
> > mtk_mdp_comp *comp)
> > if (err) {
> > enum mtk_mdp_comp_type comp_type =
> > (enum mtk_mdp_comp_type)
> > -   of_device_get_match_data(dev);
> > -   dev_err(dev,
> > +   of_device_get_match_data(comp->dev);
> > +   dev_err(comp->dev,
> > "failed to get larb, err %d. type:%d\n",
> > err, comp_type);
> > }
> > }
> >
> > +   err = pm_runtime_get_sync(comp->dev);
> > +   if (err < 0)
> > +   dev_err(comp->dev,
> > +   "failed to runtime get, err %d.\n",
> > +   err);
>
> Should you really ignore this error? If that's the case I'd just call
> pm_runtime_get_sync() without adding the logic to just print an error
> message.

This is mostly consistent with style elsewhere, e.g., in mtk_mdp_m2m.c
mtk_mdp_m2m_start_streaming and mtk_mdp_m2m_stop_streaming.

>
> > +
> > for (i = 0; i < ARRAY_SIZE(comp->clk); i++) {
> > if (IS_ERR(comp->clk[i]))
> > continue;
> > err = clk_prepare_enable(comp->clk[i]);
> > if (err)
> > -   dev_err(dev,
> > +   dev_err(comp->dev,
> > "failed to enable clock, err %d. i:%d\n",
> > err, i);
>
> Although ignoring errors seems to be a common pattern in this file and
> I know you did not introduce this.

Maybe the issue is that since no action is taken, logging at the 'error' log
level is not the right thing? IOW, should it be changed to an informational
message instead? Nevertheless, I think we should defer these changes to a
follow-up patch instead.

>
> > }
> >  }
> >
> > -void mtk_mdp_comp_clock_off(struct device *dev, struct mtk_mdp_comp *comp)
> > +void mtk_mdp_comp_clock_off(struct mtk_mdp_comp *comp)
> >  {
> > int i;
> >
> > @@ -92,6 +99,8 @@ void mtk_mdp_comp_clock_off(struct device *dev, struct 
> > mtk_mdp_comp *comp)
> >
> > if (comp->larb_dev)
> > mtk_smi_larb_put(comp->larb_dev);
> > +
> > +   pm_runtime_put_sync(comp->dev);
> >  }
> >
> >  static int mtk_mdp_comp_bind(struct device *dev, struct device *master,
> > @@ -101,6 +110,7 @@ static int mtk_mdp_comp_bind(struct device *dev, struct 
> > device *master,
> > struct mtk_mdp_dev *mdp = data;
> >
> > mtk_mdp_register_component(mdp, comp);
> > +   

Re: [PATCH bpf] security: Fix hook iteration for secid_to_secctx

2020-05-20 Thread Alexei Starovoitov
On Wed, May 20, 2020 at 8:15 AM Casey Schaufler  wrote:
>
>
> On 5/20/2020 5:56 AM, KP Singh wrote:
> > From: KP Singh 
> >
> > secid_to_secctx is not stackable, and since the BPF LSM registers this
> > hook by default, the call_int_hook logic is not suitable which
> > "bails-on-fail" and casues issues when other LSMs register this hook and
> > eventually breaks Audit.
> >
> > In order to fix this, directly iterate over the security hooks instead
> > of using call_int_hook as suggested in:
> >
> > https: 
> > //lore.kernel.org/bpf/9d0eb6c6-803a-ff3a-5603-9ad6d9edf...@schaufler-ca.com/#t
> >
> > Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks")
> > Fixes: 625236ba3832 ("security: Fix the default value of secid_to_secctx 
> > hook"
> > Reported-by: Alexei Starovoitov 
> > Signed-off-by: KP Singh 
>
> This looks fine.

Tested. audit works now.
I fixed missing ')' in the commit log
and applied to bpf tree.
It will be on the way to Linus tree soon.

Thanks!


Re: [PATCH] kthread: Use TASK_IDLE state for newly created kernel threads

2020-05-20 Thread Pavan Kondeti
On Wed, May 20, 2020 at 08:18:58PM +0200, Greg Kroah-Hartman wrote:
> On Wed, May 20, 2020 at 05:25:09PM +0530, Pavankumar Kondeti wrote:
> > When kernel threads are created for later use, they will be in
> > TASK_UNINTERRUPTIBLE state until they are woken up. This results
> > in increased loadavg and false hung task reports. To fix this,
> > use TASK_IDLE state instead of TASK_UNINTERRUPTIBLE when
> > a kernel thread schedules out for the first time.
> > 
> > Signed-off-by: Pavankumar Kondeti 
> > ---
> >  kernel/kthread.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/kernel/kthread.c b/kernel/kthread.c
> > index bfbfa48..b74ed8e 100644
> > --- a/kernel/kthread.c
> > +++ b/kernel/kthread.c
> > @@ -250,7 +250,7 @@ static int kthread(void *_create)
> > current->vfork_done = >exited;
> >  
> > /* OK, tell user we're spawned, wait for stop or wakeup */
> > -   __set_current_state(TASK_UNINTERRUPTIBLE);
> > +   __set_current_state(TASK_IDLE);
> > create->result = current;
> > /*
> >  * Thread is going to call schedule(), do not preempt it,
> > @@ -428,7 +428,7 @@ static void __kthread_bind(struct task_struct *p, 
> > unsigned int cpu, long state)
> >  
> >  void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
> >  {
> > -   __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
> > +   __kthread_bind_mask(p, mask, TASK_IDLE);
> >  }
> >  
> >  /**
> > @@ -442,7 +442,7 @@ void kthread_bind_mask(struct task_struct *p, const 
> > struct cpumask *mask)
> >   */
> >  void kthread_bind(struct task_struct *p, unsigned int cpu)
> >  {
> > -   __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
> > +   __kthread_bind(p, cpu, TASK_IDLE);
> >  }
> >  EXPORT_SYMBOL(kthread_bind);
> 
> It's as if people never read mailing lists:
>   
> https://lore.kernel.org/r/dm6pr11mb3531d3b164357b2dc476102ddf...@dm6pr11mb3531.namprd11.prod.outlook.com
> 
> Given that this is an identical resend of the previous patch, why are
> you doing so, and what has changed since that original rejection?
> 
I did not know that it is attempted before. Thanks for pointing to the
previous discussion. 

We have seen hung task reports from customers and it is due to a downstream
change which create bunch of kernel threads for later use. From Peter's
reply, I understood that one must wake up the kthread after creation and put
it in INTERRUPTIBLE sleep. I will pass on the message.

Thanks,
Pavan
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


Re: [PATCH v2 2/6] dt-bindings: dma: dw: Add max burst transaction length property

2020-05-20 Thread Serge Semin
On Tue, May 19, 2020 at 10:43:04PM +0530, Vinod Koul wrote:
> On 17-05-20, 20:47, Serge Semin wrote:
> > On Fri, May 15, 2020 at 02:11:13PM +0300, Serge Semin wrote:
> > > On Fri, May 15, 2020 at 04:26:58PM +0530, Vinod Koul wrote:
> > > > On 15-05-20, 13:51, Andy Shevchenko wrote:
> > > > > On Fri, May 15, 2020 at 11:39:11AM +0530, Vinod Koul wrote:
> > > > > > On 12-05-20, 15:38, Andy Shevchenko wrote:
> > > > > > > On Tue, May 12, 2020 at 02:49:46PM +0300, Serge Semin wrote:
> > > > > > > > On Tue, May 12, 2020 at 12:08:04PM +0300, Andy Shevchenko wrote:
> > > > > > > > > On Tue, May 12, 2020 at 12:35:31AM +0300, Serge Semin wrote:
> > > > > > > > > > On Tue, May 12, 2020 at 12:01:38AM +0300, Andy Shevchenko 
> > > > > > > > > > wrote:
> > > > > > > > > > > On Mon, May 11, 2020 at 11:05:28PM +0300, Serge Semin 
> > > > > > > > > > > wrote:
> > > > > > > > > > > > On Fri, May 08, 2020 at 02:12:42PM +0300, Andy 
> > > > > > > > > > > > Shevchenko wrote:
> > > > > > > > > > > > > On Fri, May 08, 2020 at 01:53:00PM +0300, Serge Semin 
> > > > > > > > > > > > > wrote:
> > > > > 
> > > > > ...
> > > > > 
> > > > > > > I leave it to Rob and Vinod.
> > > > > > > It won't break our case, so, feel free with your approach.
> > > > > > 
> > > > > > I agree the DT is about describing the hardware and looks like 
> > > > > > value of
> > > > > > 1 is not allowed. If allowed it should be added..
> > > > > 
> > > > > It's allowed at *run time*, it's illegal in *pre-silicon stage* when
> > > > > synthesizing the IP.
> > > > 
> > > > Then it should be added ..
> > > 
> > > Vinod, max-burst-len is "MAXimum" burst length not "run-time or current 
> > > or any
> > > other" burst length. It's a constant defined at the IP-core synthesis 
> > > stage and
> > > according to the Data Book, MAX burst length can't be 1. The allowed 
> > > values are
> > > exactly as I described in the binding [4, 8, 16, 32, ...]. MAX burst 
> > > length
> > > defines the upper limit of the run-time burst length. So setting it to 1 
> > > isn't
> > > about describing a hardware, but using DT for the software convenience.
> > > 
> > > -Sergey
> > 
> > Vinod, to make this completely clear. According to the DW DMAC data book:
> > - In general, run-time parameter of the DMA transaction burst length (set in
> >   the SRC_MSIZE/DST_MSIZE fields of the channel control register) may belong
> >   to the set [1, 4, 8, 16, 32, 64, 128, 256].
> 
> so 1 is valid value for msize

Right.

> 
> > - Actual upper limit of the burst length run-time parameter is limited by a
> >   constant defined at the IP-synthesize stage (it's called 
> > DMAH_CHx_MAX_MULT_SIZE)
> >   and this constant belongs to the set [4, 8, 16, 32, 64, 128, 256]. (See, 
> > no 1
> >   in this set).
> 
> maximum can be 4 onwards, but in my configuration I can choose 1 as
> value for msize

It's true for all configurations. msize can be at least 0 or 1, which correspond
to 1 and 4 burst length respectively.

> 
> > So the run-time burst length in a case of particular DW DMA controller 
> > belongs
> > to the range:
> > 1 <= SRC_MSIZE <= DMAH_CHx_MAX_MULT_SIZE
> > and
> > 1 <= DST_MSIZE <= DMAH_CHx_MAX_MULT_SIZE
> > 
> > See. No mater which DW DMA controller we get each of them will at least 
> > support
> > the burst length of 1 and 4 transfer words. This is determined by design of 
> > the
> > DW DMA controller IP since DMAH_CHx_MAX_MULT_SIZE constant set starts with 
> > 4.
> > 
> > In this patch I suggest to add the max-burst-len property, which specifies
> > the upper limit for the run-time burst length. Since the maximum burst 
> > length
> > capable to be set to the SRC_MSIZE/DST_MSIZE fields of the DMA channel 
> > control
> > register is determined by the DMAH_CHx_MAX_MULT_SIZE constant (which can't 
> > be 1
> > by the DW DMA IP design), max-burst-len property as being also responsible 
> > for
> > the maximum burst length setting should be associated with 
> > DMAH_CHx_MAX_MULT_SIZE
> > thus should belong to the same set [4, 8, 16, 32, 64, 128, 256].
> > 
> > So 1 shouldn't be in the enum of the max-burst-len property constraint, 
> > because
> > hardware doesn't support such limitation by design, while setting 1 as
> > max-burst-len would mean incorrect description of the DMA controller.
> > 
> > Vinod, could you take a look at the info I provided above and say your 
> > final word
> > whether 1 should be really allowed to be in the max-burst-len enum 
> > constraints?
> > I'll do as you say in the next version of the patchset.
> 
> You are specifying the parameter which will be used to pick, i think
> starting with 4 makes sense as we are specifying maximum allowed values
> for msize. Values lesser than or equal to this would be allowed, I guess
> that should be added to documentation.

Right. Thanks. I'll a proper description to the property in the binding file.

-Sergey

> 
> thanks
> -- 
> ~Vinod


Re: [PATCH net-next 1/2] net: hns3: adds support for dynamic VLAN mode

2020-05-20 Thread tanhuazhong




On 2020/5/21 5:06, Jakub Kicinski wrote:

On Wed, 20 May 2020 09:20:12 +0800 Huazhong Tan wrote:

From: GuoJia Liao 

There is a scenario which needs vNICs enable the VLAN filter
in access port, while disable the VLAN filter in trunk port.
Access port and trunk port can switch according to the user's
configuration.

This patch adds support for the dynamic VLAN mode. then the
HNS3 driver can support two VLAN modes: default VLAN mode and
dynamic VLAN mode. User can switch the mode through the
configuration file.


What configuration file? Sounds like you're reimplementing trusted
VFs (ndo_set_vf_trust).



Hi, Jakub.

Maybe this configuration file here is a little misleading,
this VLAN mode is decided by the firmware, the driver will
query the VLAN mode from firmware during  intializing.

I will modified this description in V2. BTW, is there any
other suggestion about this patch?

Thanks:)



In default VLAN mode, port based VLAN filter and VF VLAN
filter should always be enabled.

In dynamic VLAN mode, port based VLAN filter is disabled, and
VF VLAN filter is disabled defaultly, and should be enabled
when there is a non-zero VLAN ID. In addition, VF VLAN filter
is enabled if PVID is enabled for vNIC.

When enable promisc, VLAN filter should be disabled. When disable
promisc, VLAN filter's status depends on the value of
'vport->vf_vlan_en', which is used to record the VF VLAN filter
status.

In default VLAN mode, 'vport->vf_vlan_en' always be 'true', so
VF VLAN filter will set to be enabled after disabling promisc.

In dynamic VLAN mode, 'vport->vf_vlan_en' lies on whether there
is a non-zero VLAN ID.

Signed-off-by: GuoJia Liao 
Signed-off-by: Huazhong Tan 


.





Re: [PATCH v2 5/7] mm: parallelize deferred_init_memmap()

2020-05-20 Thread Alexander Duyck
On Wed, May 20, 2020 at 11:27 AM Daniel Jordan
 wrote:
>
> Deferred struct page init is a significant bottleneck in kernel boot.
> Optimizing it maximizes availability for large-memory systems and allows
> spinning up short-lived VMs as needed without having to leave them
> running.  It also benefits bare metal machines hosting VMs that are
> sensitive to downtime.  In projects such as VMM Fast Restart[1], where
> guest state is preserved across kexec reboot, it helps prevent
> application and network timeouts in the guests.
>
> Multithread to take full advantage of system memory bandwidth.
>
> The maximum number of threads is capped at the number of CPUs on the
> node because speedups always improve with additional threads on every
> system tested, and at this phase of boot, the system is otherwise idle
> and waiting on page init to finish.
>
> Helper threads operate on section-aligned ranges to both avoid false
> sharing when setting the pageblock's migrate type and to avoid accessing
> uninitialized buddy pages, though max order alignment is enough for the
> latter.
>
> The minimum chunk size is also a section.  There was benefit to using
> multiple threads even on relatively small memory (1G) systems, and this
> is the smallest size that the alignment allows.
>
> The time (milliseconds) is the slowest node to initialize since boot
> blocks until all nodes finish.  intel_pstate is loaded in active mode
> without hwp and with turbo enabled, and intel_idle is active as well.
>
> Intel(R) Xeon(R) Platinum 8167M CPU @ 2.00GHz (Skylake, bare metal)
>   2 nodes * 26 cores * 2 threads = 104 CPUs
>   384G/node = 768G memory
>
>kernel boot deferred init
>
> node% (thr)speedup  time_ms (stdev)speedup  time_ms (stdev)
>   (  0) --   4078.0 (  9.0) --   1779.0 (  8.7)
>2% (  1)   1.4%   4021.3 (  2.9)   3.4%   1717.7 (  7.8)
>   12% (  6)  35.1%   2644.7 ( 35.3)  80.8%341.0 ( 35.5)
>   25% ( 13)  38.7%   2498.0 ( 34.2)  89.1%193.3 ( 32.3)
>   37% ( 19)  39.1%   2482.0 ( 25.2)  90.1%175.3 ( 31.7)
>   50% ( 26)  38.8%   2495.0 (  8.7)  89.1%193.7 (  3.5)
>   75% ( 39)  39.2%   2478.0 ( 21.0)  90.3%172.7 ( 26.7)
>  100% ( 52)  40.0%   2448.0 (  2.0)  91.9%143.3 (  1.5)
>
> Intel(R) Xeon(R) CPU E5-2699C v4 @ 2.20GHz (Broadwell, bare metal)
>   1 node * 16 cores * 2 threads = 32 CPUs
>   192G/node = 192G memory
>
>kernel boot deferred init
>
> node% (thr)speedup  time_ms (stdev)speedup  time_ms (stdev)
>   (  0) --   1996.0 ( 18.0) --   1104.3 (  6.7)
>3% (  1)   1.4%   1968.0 (  3.0)   2.7%   1074.7 (  9.0)
>   12% (  4)  40.1%   1196.0 ( 22.7)  72.4%305.3 ( 16.8)
>   25% (  8)  47.4%   1049.3 ( 17.2)  84.2%174.0 ( 10.6)
>   37% ( 12)  48.3%   1032.0 ( 14.9)  86.8%145.3 (  2.5)
>   50% ( 16)  48.9%   1020.3 (  2.5)  88.0%133.0 (  1.7)
>   75% ( 24)  49.1%   1016.3 (  8.1)  88.4%128.0 (  1.7)
>  100% ( 32)  49.4%   1009.0 (  8.5)  88.6%126.3 (  0.6)
>
> Intel(R) Xeon(R) CPU E5-2699 v3 @ 2.30GHz (Haswell, bare metal)
>   2 nodes * 18 cores * 2 threads = 72 CPUs
>   128G/node = 256G memory
>
>kernel boot deferred init
>
> node% (thr)speedup  time_ms (stdev)speedup  time_ms (stdev)
>   (  0) --   1682.7 (  6.7) --630.0 (  4.6)
>3% (  1)   0.4%   1676.0 (  2.0)   0.7%625.3 (  3.2)
>   12% (  4)  25.8%   1249.0 (  1.0)  68.2%200.3 (  1.2)
>   25% (  9)  30.0%   1178.0 (  5.2)  79.7%128.0 (  3.5)
>   37% ( 13)  30.6%   1167.7 (  3.1)  81.3%117.7 (  1.2)
>   50% ( 18)  30.6%   1167.3 (  2.3)  81.4%117.0 (  1.0)
>   75% ( 27)  31.0%   1161.3 (  4.6)  82.5%110.0 (  6.9)
>  100% ( 36)  32.1%   1142.0 (  3.6)  85.7% 90.0 (  1.0)
>
> AMD EPYC 7551 32-Core Processor (Zen, kvm guest)
>   1 node * 8 cores * 2 threads = 16 CPUs
>   64G/node = 64G memory
>
>kernel boot deferred init
>
> node% (thr)speedup  time_ms (stdev)speedup  time_ms (stdev)
>   (  0) --   1003.7 ( 16.6) --243.3 (  8.1)
>6% (  1)   1.4%990.0 (  4.6)   1.2%240.3 (  1.5)
>   12% (  2)  11.4%889.3 ( 16.7)  44.5%135.0 (  3.0)
>   25% (  4)  16.8%835.3 (  9.0)   

[PATCH v3 12/16] spi: dw: Add DW SPI DMA/PCI/MMIO dependency on the DW SPI core

2020-05-20 Thread Serge Semin
Seeing all of the DW SPI driver components like DW SPI DMA/PCI/MMIO
depend on the DW SPI core code it's better to use the if-endif
conditional kernel config statement to signify that common dependency.

Co-developed-by: Georgy Vlasov 
Signed-off-by: Georgy Vlasov 
Co-developed-by: Ramil Zaripov 
Signed-off-by: Ramil Zaripov 
Signed-off-by: Serge Semin 
Reviewed-by: Andy Shevchenko 
Cc: Alexey Malahov 
Cc: Thomas Bogendoerfer 
Cc: Paul Burton 
Cc: Ralf Baechle 
Cc: Arnd Bergmann 
Cc: Andy Shevchenko 
Cc: Rob Herring 
Cc: linux-m...@vger.kernel.org
Cc: devicet...@vger.kernel.org
---
 drivers/spi/Kconfig | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 6a84f3dad35c..3cdf8310d185 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -226,17 +226,20 @@ config SPI_DESIGNWARE
help
  general driver for SPI controller core from DesignWare
 
+if SPI_DESIGNWARE
+
 config SPI_DW_DMA
bool "DMA support for DW SPI controller"
-   depends on SPI_DESIGNWARE
 
 config SPI_DW_PCI
tristate "PCI interface driver for DW SPI core"
-   depends on SPI_DESIGNWARE && PCI
+   depends on PCI
 
 config SPI_DW_MMIO
tristate "Memory-mapped io interface driver for DW SPI core"
-   depends on SPI_DESIGNWARE
+   depends on HAS_IOMEM
+
+endif
 
 config SPI_DLN2
tristate "Diolan DLN-2 USB SPI adapter"
-- 
2.25.1



[PATCH v3 14/16] spi: dw: Add DMA support to the DW SPI MMIO driver

2020-05-20 Thread Serge Semin
Since the common code in the spi-dw-dma.c driver is ready to be used
by the MMIO driver and now provides a method to generically (on any
DT or ACPI-based platforms) retrieve the Tx/Rx DMA channel handlers,
we can use it and a set of the common DW SPI DMA callbacks to enable
DMA at least for generic "snps,dw-apb-ssi" and "snps,dwc-ssi-1.01a"
devices.

Co-developed-by: Georgy Vlasov 
Signed-off-by: Georgy Vlasov 
Co-developed-by: Ramil Zaripov 
Signed-off-by: Ramil Zaripov 
Signed-off-by: Serge Semin 
Reviewed-by: Andy Shevchenko 
Cc: Alexey Malahov 
Cc: Thomas Bogendoerfer 
Cc: Paul Burton 
Cc: Ralf Baechle 
Cc: Arnd Bergmann 
Cc: Andy Shevchenko 
Cc: Rob Herring 
Cc: linux-m...@vger.kernel.org
Cc: devicet...@vger.kernel.org
---
 drivers/spi/spi-dw-mmio.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 0894b4c09496..e23d0c53a664 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -149,6 +149,8 @@ static int dw_spi_dw_apb_init(struct platform_device *pdev,
/* Register hook to configure CTRLR0 */
dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
 
+   dw_spi_dma_setup_generic(>dws);
+
return 0;
 }
 
@@ -158,6 +160,8 @@ static int dw_spi_dwc_ssi_init(struct platform_device *pdev,
/* Register hook to configure CTRLR0 */
dwsmmio->dws.update_cr0 = dw_spi_update_cr0_v1_01a;
 
+   dw_spi_dma_setup_generic(>dws);
+
return 0;
 }
 
-- 
2.25.1



[PATCH v3 11/16] spi: dw: Remove DW DMA code dependency from DW_DMAC_PCI

2020-05-20 Thread Serge Semin
Since there is a generic method available to initialize the DW SPI DMA
interface on any DT and ACPI-based platforms, which in general can be
designed with not only DW DMAC but with any DMA engine on board, we can
freely remove the CONFIG_DW_DMAC_PCI config from dependency list of
CONFIG_SPI_DW_DMA. Especially seeing that we don't use anything DW DMAC
specific in the new driver.

Co-developed-by: Georgy Vlasov 
Signed-off-by: Georgy Vlasov 
Co-developed-by: Ramil Zaripov 
Signed-off-by: Ramil Zaripov 
Signed-off-by: Serge Semin 
Reviewed-by: Andy Shevchenko 
Cc: Alexey Malahov 
Cc: Thomas Bogendoerfer 
Cc: Paul Burton 
Cc: Ralf Baechle 
Cc: Rob Herring 
Cc: Arnd Bergmann 
Cc: Andy Shevchenko 
Cc: linux-m...@vger.kernel.org
Cc: devicet...@vger.kernel.org
---
 drivers/spi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 03b061975f70..6a84f3dad35c 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -228,7 +228,7 @@ config SPI_DESIGNWARE
 
 config SPI_DW_DMA
bool "DMA support for DW SPI controller"
-   depends on SPI_DESIGNWARE && DW_DMAC_PCI
+   depends on SPI_DESIGNWARE
 
 config SPI_DW_PCI
tristate "PCI interface driver for DW SPI core"
-- 
2.25.1



  1   2   3   4   5   6   7   8   9   10   >