Re: [PATCH v1 1/2] perf/core: Use sysctl to turn on/off dropping leaked kernel samples

2018-06-14 Thread Stephane Eranian
On Thu, Jun 14, 2018 at 7:10 PM Jin Yao  wrote:
>
> When doing sampling, for example:
>
> perf record -e cycles:u ...
>
> On workloads that do a lot of kernel entry/exits we see kernel
> samples, even though :u is specified. This is due to skid existing.
>
> This might be a security issue because it can leak kernel addresses even
> though kernel sampling support is disabled.
>
> One patch "perf/core: Drop kernel samples even though :u is specified"
> was posted in last year but it was reverted because it introduced a
> regression issue that broke the rr-project, which used sampling
> events to receive a signal on overflow. These signals were critical
> to the correct operation of rr.
>
> See '6a8a75f32357 ("Revert "perf/core: Drop kernel samples even
> though :u is specified"")' for detail.
>
> Now the idea is to use sysctl to control the dropping of leaked
> kernel samples.
>
> /sys/devices/cpu/perf_allow_sample_leakage:
>
> 0 - default, drop the leaked kernel samples.
> 1 - don't drop the leaked kernel samples.
>
> For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage.
>
> For example,
>
> root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
> 0
> root@skl:/tmp# perf record -e cycles:u ./div
> root@skl:/tmp# perf report --stdio
>
>   ...  .  
>
> 47.01%  div  div[.] main
> 20.74%  div  libc-2.23.so   [.] __random_r
> 15.59%  div  libc-2.23.so   [.] __random
>  8.68%  div  div[.] compute_flag
>  4.48%  div  libc-2.23.so   [.] rand
>  3.50%  div  div[.] rand@plt
>  0.00%  div  ld-2.23.so [.] do_lookup_x
>  0.00%  div  ld-2.23.so [.] memcmp
>  0.00%  div  ld-2.23.so [.] _dl_start
>  0.00%  div  ld-2.23.so [.] _start
>
> There is no kernel symbol reported.
>
> root@skl:/tmp# echo 1 > /sys/devices/cpu/perf_allow_sample_leakage
> root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
> 1
> root@skl:/tmp# perf record -e cycles:u ./div
> root@skl:/tmp# perf report --stdio
>
>   ...    .
>
> 47.53%  div  div   [.] main
> 20.62%  div  libc-2.23.so  [.] __random_r
> 15.32%  div  libc-2.23.so  [.] __random
>  8.66%  div  div   [.] compute_flag
>  4.53%  div  libc-2.23.so  [.] rand
>  3.34%  div  div   [.] rand@plt
>  0.00%  div  [kernel.vmlinux]  [k] apic_timer_interrupt
>  0.00%  div  libc-2.23.so  [.] intel_check_word
>  0.00%  div  ld-2.23.so[.] brk
>  0.00%  div  [kernel.vmlinux]  [k] page_fault
>  0.00%  div  ld-2.23.so[.] _start
>
> We can see the kernel symbols apic_timer_interrupt and page_fault.
>
These kernel symbols do not match your description here. How much skid
do you think you have here?
You're saying you are at the user level, you get a counter overflow,
and the interrupted IP lands in the kernel
because you where there by the time the interrupt is delivered. How
many instructions does it take to get
from user land to apic_timer_interrupt() or page_fault()? These
functions are not right at the kernel entry,
I believe. So how did you get there, the skid must have been VERY big
or symbolization has a problem.

> Signed-off-by: Jin Yao 
> ---
>  kernel/events/core.c | 58 
> 
>  1 file changed, 58 insertions(+)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 80cca2b..7867541 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7721,6 +7721,28 @@ int perf_event_account_interrupt(struct perf_event 
> *event)
> return __perf_event_account_interrupt(event, 1);
>  }
>
> +static int perf_allow_sample_leakage __read_mostly;
> +
> +static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
> +{
> +   int allow_leakage = READ_ONCE(perf_allow_sample_leakage);
> +
> +   if (allow_leakage)
> +   return true;
> +
> +   /*
> +* Due to interrupt latency (AKA "skid"), we may enter the
> +* kernel before taking an overflow, even if the PMU is only
> +* counting user events.
> +* To avoid leaking information to userspace, we must always
> +* reject kernel samples when exclude_kernel is set.
> +*/
> +   if (event->attr.exclude_kernel && !user_mode(regs))
> +   return false;
> +
> +   return true;
> +}
> +
>  /*
>   * Generic event overflow handling, sampling.
>   */
> @@ -7742,6 +7764,12 @@ static int __perf_event_overflow(struct perf_event 
> *event,
> ret = __perf_event_account_interrupt(event, throttle);
>
> /*
> +* For security, drop the skid kernel samples if necessary.
> +*/
> +   if (!sample_is_allowed(event, regs))
> +   return ret;
> +
> +   /*
>  * XXX 

Re: [PATCH v1 1/2] perf/core: Use sysctl to turn on/off dropping leaked kernel samples

2018-06-14 Thread Stephane Eranian
On Thu, Jun 14, 2018 at 7:10 PM Jin Yao  wrote:
>
> When doing sampling, for example:
>
> perf record -e cycles:u ...
>
> On workloads that do a lot of kernel entry/exits we see kernel
> samples, even though :u is specified. This is due to skid existing.
>
> This might be a security issue because it can leak kernel addresses even
> though kernel sampling support is disabled.
>
> One patch "perf/core: Drop kernel samples even though :u is specified"
> was posted in last year but it was reverted because it introduced a
> regression issue that broke the rr-project, which used sampling
> events to receive a signal on overflow. These signals were critical
> to the correct operation of rr.
>
> See '6a8a75f32357 ("Revert "perf/core: Drop kernel samples even
> though :u is specified"")' for detail.
>
> Now the idea is to use sysctl to control the dropping of leaked
> kernel samples.
>
> /sys/devices/cpu/perf_allow_sample_leakage:
>
> 0 - default, drop the leaked kernel samples.
> 1 - don't drop the leaked kernel samples.
>
> For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage.
>
> For example,
>
> root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
> 0
> root@skl:/tmp# perf record -e cycles:u ./div
> root@skl:/tmp# perf report --stdio
>
>   ...  .  
>
> 47.01%  div  div[.] main
> 20.74%  div  libc-2.23.so   [.] __random_r
> 15.59%  div  libc-2.23.so   [.] __random
>  8.68%  div  div[.] compute_flag
>  4.48%  div  libc-2.23.so   [.] rand
>  3.50%  div  div[.] rand@plt
>  0.00%  div  ld-2.23.so [.] do_lookup_x
>  0.00%  div  ld-2.23.so [.] memcmp
>  0.00%  div  ld-2.23.so [.] _dl_start
>  0.00%  div  ld-2.23.so [.] _start
>
> There is no kernel symbol reported.
>
> root@skl:/tmp# echo 1 > /sys/devices/cpu/perf_allow_sample_leakage
> root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
> 1
> root@skl:/tmp# perf record -e cycles:u ./div
> root@skl:/tmp# perf report --stdio
>
>   ...    .
>
> 47.53%  div  div   [.] main
> 20.62%  div  libc-2.23.so  [.] __random_r
> 15.32%  div  libc-2.23.so  [.] __random
>  8.66%  div  div   [.] compute_flag
>  4.53%  div  libc-2.23.so  [.] rand
>  3.34%  div  div   [.] rand@plt
>  0.00%  div  [kernel.vmlinux]  [k] apic_timer_interrupt
>  0.00%  div  libc-2.23.so  [.] intel_check_word
>  0.00%  div  ld-2.23.so[.] brk
>  0.00%  div  [kernel.vmlinux]  [k] page_fault
>  0.00%  div  ld-2.23.so[.] _start
>
> We can see the kernel symbols apic_timer_interrupt and page_fault.
>
These kernel symbols do not match your description here. How much skid
do you think you have here?
You're saying you are at the user level, you get a counter overflow,
and the interrupted IP lands in the kernel
because you where there by the time the interrupt is delivered. How
many instructions does it take to get
from user land to apic_timer_interrupt() or page_fault()? These
functions are not right at the kernel entry,
I believe. So how did you get there, the skid must have been VERY big
or symbolization has a problem.

> Signed-off-by: Jin Yao 
> ---
>  kernel/events/core.c | 58 
> 
>  1 file changed, 58 insertions(+)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 80cca2b..7867541 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7721,6 +7721,28 @@ int perf_event_account_interrupt(struct perf_event 
> *event)
> return __perf_event_account_interrupt(event, 1);
>  }
>
> +static int perf_allow_sample_leakage __read_mostly;
> +
> +static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
> +{
> +   int allow_leakage = READ_ONCE(perf_allow_sample_leakage);
> +
> +   if (allow_leakage)
> +   return true;
> +
> +   /*
> +* Due to interrupt latency (AKA "skid"), we may enter the
> +* kernel before taking an overflow, even if the PMU is only
> +* counting user events.
> +* To avoid leaking information to userspace, we must always
> +* reject kernel samples when exclude_kernel is set.
> +*/
> +   if (event->attr.exclude_kernel && !user_mode(regs))
> +   return false;
> +
> +   return true;
> +}
> +
>  /*
>   * Generic event overflow handling, sampling.
>   */
> @@ -7742,6 +7764,12 @@ static int __perf_event_overflow(struct perf_event 
> *event,
> ret = __perf_event_account_interrupt(event, throttle);
>
> /*
> +* For security, drop the skid kernel samples if necessary.
> +*/
> +   if (!sample_is_allowed(event, regs))
> +   return ret;
> +
> +   /*
>  * XXX 

Re: [PATCH 14/39] ovl: stack file ops

2018-06-14 Thread Al Viro
On Wed, Jun 13, 2018 at 11:21:30AM +0200, Miklos Szeredi wrote:
> On Tue, Jun 12, 2018 at 8:31 PM, Al Viro  wrote:
> > On Tue, Jun 12, 2018 at 07:24:23PM +0100, Al Viro wrote:
> 
> >> I hate it, but... consider path_open() objections withdrawn for now.
> 
> Is that an ACK for the pull if I follow up with fixes for mmap botch, etc?

Yes.

> >> Uses of ->vm_file (and rules for those) are too convoluted to untangle
> >> at the moment.  I still would love to get that straightened out, but
> >> it's not this cycle fodder, more's the pity...
> 
> Looked at some other options...  What coda mmap does looks very
> dubious.  It only sets f_mapping, not vm_file.  That's going to get
> into all sorts of trouble when underlying fs tries to look at
> file_inode() or worse, ->private_data.  Looks like that should be
> converted to what overlayfs does, to have a remote chance of actually
> not crashing on most filesystems.  Does anybody actually use coda
> still?

Keep in mind that coda is using the local fs only as cache; IOW, its needs
are much more limited than those of overlayfs - local r/w filesystem,
disk-backed or tmpfs, used pretty much as a scratch space.

> > PS: conversion of ->f_path.dentry is easy and that can probably go this
> > cycle - it's a fairly trivial change, with no functional changes unless
> > overlayfs is used with , fixing really bad shit if it ever
> > gets used thus.  I'm not asking to put that into overlayfs pull *and*
> > it's independent from the "want to kill that fucking kludge" stuff.
> > The latter is too hard for this cycle, unfortunately.
> 
> So this is about adding a file_dentry_check() (or whatever we want to
> call it) helper to be used by all filesystems when dereferecing
> f_path.dentry, right?

file_dentry(), and some of the users should be converted to file_inode().
There's also a missing helper for debugfs uses - more or less a combination
of file_dentry() and debugfs_file_get() (if not a conversion of
debugfs_file_get() to taking struct file - almost all users are of that
form, if not entirely all of them).  I've some of that done in local
branch...


Re: [PATCH 14/39] ovl: stack file ops

2018-06-14 Thread Al Viro
On Wed, Jun 13, 2018 at 11:21:30AM +0200, Miklos Szeredi wrote:
> On Tue, Jun 12, 2018 at 8:31 PM, Al Viro  wrote:
> > On Tue, Jun 12, 2018 at 07:24:23PM +0100, Al Viro wrote:
> 
> >> I hate it, but... consider path_open() objections withdrawn for now.
> 
> Is that an ACK for the pull if I follow up with fixes for mmap botch, etc?

Yes.

> >> Uses of ->vm_file (and rules for those) are too convoluted to untangle
> >> at the moment.  I still would love to get that straightened out, but
> >> it's not this cycle fodder, more's the pity...
> 
> Looked at some other options...  What coda mmap does looks very
> dubious.  It only sets f_mapping, not vm_file.  That's going to get
> into all sorts of trouble when underlying fs tries to look at
> file_inode() or worse, ->private_data.  Looks like that should be
> converted to what overlayfs does, to have a remote chance of actually
> not crashing on most filesystems.  Does anybody actually use coda
> still?

Keep in mind that coda is using the local fs only as cache; IOW, its needs
are much more limited than those of overlayfs - local r/w filesystem,
disk-backed or tmpfs, used pretty much as a scratch space.

> > PS: conversion of ->f_path.dentry is easy and that can probably go this
> > cycle - it's a fairly trivial change, with no functional changes unless
> > overlayfs is used with , fixing really bad shit if it ever
> > gets used thus.  I'm not asking to put that into overlayfs pull *and*
> > it's independent from the "want to kill that fucking kludge" stuff.
> > The latter is too hard for this cycle, unfortunately.
> 
> So this is about adding a file_dentry_check() (or whatever we want to
> call it) helper to be used by all filesystems when dereferecing
> f_path.dentry, right?

file_dentry(), and some of the users should be converted to file_inode().
There's also a missing helper for debugfs uses - more or less a combination
of file_dentry() and debugfs_file_get() (if not a conversion of
debugfs_file_get() to taking struct file - almost all users are of that
form, if not entirely all of them).  I've some of that done in local
branch...


Re: [PATCH] x86/pti: don't report XenPV as vulnerable

2018-06-14 Thread Juergen Gross
On 15/06/18 00:32, Jiri Kosina wrote:
> From: Jiri Kosina 
> 
> Xen PV domain is not by design affected by meltdown as it's enforcing 
> split CR3 itself. Let's not report such systems as "Vulnerable" in sysfs 
> (we're also already forcing PTI to off in X86_HYPER_XEN_PV cases)
> 
> Reported-and-tested-by: Mike Latimer 
> Signed-off-by: Jiri Kosina 
> ---
> 
> I originally wanted to just not set X86_BUG_CPU_MELTDOWN in 
> cpu_set_bug_bits() in the first place, but that has two issues:
> 
> - cpu_set_bug_bits() gets invoked from early_identify_cpu() before 
>   init_hypervisor_platform() had a chance to run, and therefore the
>   hypervisor type check doesn't work there
> 
> - it'd actually be inaccurate; the CPU *does* have the bug at the end
>   of the day (so it's properly kept being reported in cpuinfo), it's
>   "just a setup matter" that we don't need any addtional mitigation to
>   be applied by the kernel
> 
> So let's not overcomplicate it.
> 
>  arch/x86/kernel/cpu/bugs.c |4 
>  1 file changed, 4 insertions(+)
> 
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  static void __init spectre_v2_select_mitigation(void);
>  static void __init ssb_select_mitigation(void);
> @@ -685,6 +686,9 @@ static ssize_t cpu_show_common(struct de
>   if (boot_cpu_has(X86_FEATURE_PTI))
>   return sprintf(buf, "Mitigation: PTI\n");
>  
> + if (hypervisor_is_type(X86_HYPER_XEN_PV))
> + return sprintf(buf, "Not affected\n");

I don't like this. This is wrong for 32-bit guests and maybe wrong for
64-bit, too, in case the mitigation is disabled at hypervisor level.

So the test should be done only for CONFIG_X86_64 and the returned
string should be e.g. "Mitigation: XEN".


Juergen



Re: [PATCH] x86/pti: don't report XenPV as vulnerable

2018-06-14 Thread Juergen Gross
On 15/06/18 00:32, Jiri Kosina wrote:
> From: Jiri Kosina 
> 
> Xen PV domain is not by design affected by meltdown as it's enforcing 
> split CR3 itself. Let's not report such systems as "Vulnerable" in sysfs 
> (we're also already forcing PTI to off in X86_HYPER_XEN_PV cases)
> 
> Reported-and-tested-by: Mike Latimer 
> Signed-off-by: Jiri Kosina 
> ---
> 
> I originally wanted to just not set X86_BUG_CPU_MELTDOWN in 
> cpu_set_bug_bits() in the first place, but that has two issues:
> 
> - cpu_set_bug_bits() gets invoked from early_identify_cpu() before 
>   init_hypervisor_platform() had a chance to run, and therefore the
>   hypervisor type check doesn't work there
> 
> - it'd actually be inaccurate; the CPU *does* have the bug at the end
>   of the day (so it's properly kept being reported in cpuinfo), it's
>   "just a setup matter" that we don't need any addtional mitigation to
>   be applied by the kernel
> 
> So let's not overcomplicate it.
> 
>  arch/x86/kernel/cpu/bugs.c |4 
>  1 file changed, 4 insertions(+)
> 
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  static void __init spectre_v2_select_mitigation(void);
>  static void __init ssb_select_mitigation(void);
> @@ -685,6 +686,9 @@ static ssize_t cpu_show_common(struct de
>   if (boot_cpu_has(X86_FEATURE_PTI))
>   return sprintf(buf, "Mitigation: PTI\n");
>  
> + if (hypervisor_is_type(X86_HYPER_XEN_PV))
> + return sprintf(buf, "Not affected\n");

I don't like this. This is wrong for 32-bit guests and maybe wrong for
64-bit, too, in case the mitigation is disabled at hypervisor level.

So the test should be done only for CONFIG_X86_64 and the returned
string should be e.g. "Mitigation: XEN".


Juergen



Re: [PATCH V2 4/4] mmc: host: Register changes for sdcc V5

2018-06-14 Thread Vijay Viswanath




On 6/13/2018 4:55 AM, Stephen Boyd wrote:

Quoting Vijay Viswanath (2018-05-29 02:52:41)

@@ -137,6 +125,12 @@
  /* Timeout value to avoid infinite waiting for pwr_irq */
  #define MSM_PWR_IRQ_TIMEOUT_MS 5000
  
+#define MSM_HOST_READL(msm_host, host, offset) \

+   msm_host->var_ops->msm_readl_relaxed(host, offset)
+
+#define MSM_HOST_WRITEL(msm_host, val, host, offset) \
+   msm_host->var_ops->msm_writel_relaxed(val, host, offset)


Is there a reason these macros are capitalized? We don't have READL and
WRITEL macros in the kernel because function-like macros are typically
lowercase.



will change them to lower case. Didn't notice that...


+
  struct sdhci_msm_offset {
 u32 core_hc_mode;
 u32 core_mci_data_cnt;
@@ -268,6 +262,14 @@ struct sdhci_msm_host {
 const struct sdhci_msm_offset *offset;
  };
  
+const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host *host)


static?



will do


+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return msm_host->offset;
+}
+

--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH V2 4/4] mmc: host: Register changes for sdcc V5

2018-06-14 Thread Vijay Viswanath




On 6/13/2018 4:55 AM, Stephen Boyd wrote:

Quoting Vijay Viswanath (2018-05-29 02:52:41)

@@ -137,6 +125,12 @@
  /* Timeout value to avoid infinite waiting for pwr_irq */
  #define MSM_PWR_IRQ_TIMEOUT_MS 5000
  
+#define MSM_HOST_READL(msm_host, host, offset) \

+   msm_host->var_ops->msm_readl_relaxed(host, offset)
+
+#define MSM_HOST_WRITEL(msm_host, val, host, offset) \
+   msm_host->var_ops->msm_writel_relaxed(val, host, offset)


Is there a reason these macros are capitalized? We don't have READL and
WRITEL macros in the kernel because function-like macros are typically
lowercase.



will change them to lower case. Didn't notice that...


+
  struct sdhci_msm_offset {
 u32 core_hc_mode;
 u32 core_mci_data_cnt;
@@ -268,6 +262,14 @@ struct sdhci_msm_host {
 const struct sdhci_msm_offset *offset;
  };
  
+const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host *host)


static?



will do


+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return msm_host->offset;
+}
+

--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[GIT PULL] modules for v4.18

2018-06-14 Thread Jessica Yu
Hi Linus, 


The following changes since commit 60cc43fc888428bb2f18f08997432d426a243338:

 Linux 4.17-rc1 (2018-04-15 18:24:20 -0700)

are available in the git repository at:

 ssh://g...@gitolite.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git 
tags/modules-for-v4.18

for you to fetch changes up to c554b89868015d86cd330d9cc10656c3756352a5:

 module: Allow to always show the status of modsign (2018-04-16 23:49:33 +0200)


Modules updates for v4.18

Summary of modules changes for the 4.18 merge window:

- Minor code cleanup and also allow sig_enforce param to be shown in
 sysfs with CONFIG_MODULE_SIG_FORCE

Signed-off-by: Jessica Yu 


Jia Zhang (2):
 module: Do not access sig_enforce directly
 module: Allow to always show the status of modsign

kernel/module.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)


[GIT PULL] modules for v4.18

2018-06-14 Thread Jessica Yu
Hi Linus, 


The following changes since commit 60cc43fc888428bb2f18f08997432d426a243338:

 Linux 4.17-rc1 (2018-04-15 18:24:20 -0700)

are available in the git repository at:

 ssh://g...@gitolite.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git 
tags/modules-for-v4.18

for you to fetch changes up to c554b89868015d86cd330d9cc10656c3756352a5:

 module: Allow to always show the status of modsign (2018-04-16 23:49:33 +0200)


Modules updates for v4.18

Summary of modules changes for the 4.18 merge window:

- Minor code cleanup and also allow sig_enforce param to be shown in
 sysfs with CONFIG_MODULE_SIG_FORCE

Signed-off-by: Jessica Yu 


Jia Zhang (2):
 module: Do not access sig_enforce directly
 module: Allow to always show the status of modsign

kernel/module.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)


RE: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode

2018-06-14 Thread Daniel Sangorrin
> -Original Message-
> From: stable-ow...@vger.kernel.org [mailto:stable-ow...@vger.kernel.org] On
> Behalf Of 'Greg Kroah-Hartman'
> Sent: Friday, June 15, 2018 1:56 PM
> To: Daniel Sangorrin 
> Cc: linux-kernel@vger.kernel.org; sta...@vger.kernel.org; 'Andy Lutomirski'
> ; 'Rik van Riel' ; 'Borislav Petkov'
> ; 'Brian Gerst' ; 'Dave Hansen'
> ; 'Denys Vlasenko' ;
> 'Fenghua Yu' ; 'H. Peter Anvin' ; 'Josh
> Poimboeuf' ; 'Linus Torvalds'
> ; 'Oleg Nesterov' ; 'Peter
> Zijlstra' ; 'Quentin Casasnovas'
> ; 'Thomas Gleixner' ;
> pbonz...@redhat.com; 'Ingo Molnar' 
> Subject: Re: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode
> 
> On Fri, Jun 15, 2018 at 01:24:27PM +0900, Daniel Sangorrin wrote:
> > Hi Greg,
> >
> > >  /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
> > > --- a/arch/x86/include/asm/fpu/internal.h
> > > +++ b/arch/x86/include/asm/fpu/internal.h
> > > @@ -58,7 +58,7 @@ extern u64 fpu__get_supported_xfeatures_
> > >   */
> > >  static __always_inline __pure bool use_eager_fpu(void)
> > >  {
> > > - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
> > > + return true;
> > >  }
> >
> > Since this function returns always true then we can remove the code 
> > depending on
> lazy FPU mode.
> > Actually this has already been done in "x86/fpu: Remove use_eager_fpu()"
> > Ref: https://patchwork.kernel.org/patch/9365883/
> >
> > >  static void __init fpu__init_parse_early_param(void)
> > >  {
> > > - if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
> > > - eagerfpu = DISABLE;
> > > - fpu__clear_eager_fpu_features();
> > > - }
> >
> > Since this patch removes the kernel boot parameter "eagerfpu", maybe we 
> > should
> remove it from the Documentation.
> > This has also been done by commit "x86/fpu: Finish excising 'eagerfpu'"
> > Ref: https://patchwork.kernel.org/patch/9380673/
> >
> > I will try backporting those patches unless anyone has any objections.
> 
> What are the git commit ids of those patches in Linus's tree?  No need
> to point to patchwork links, I don't use that tool.

OK, I got it.

"x86/fpu: Remove use_eager_fpu()": c592b57347069abfc0dcad3b3a302cf882602597
"x86/fpu: Finish excising 'eagerfpu'": e63650840e8b053aa09ad934877e87e9941ed135

Unfortunately, they don't apply cleanly to stable kernels.

Thanks,
Daniel Sangorrin





RE: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode

2018-06-14 Thread Daniel Sangorrin
> -Original Message-
> From: stable-ow...@vger.kernel.org [mailto:stable-ow...@vger.kernel.org] On
> Behalf Of 'Greg Kroah-Hartman'
> Sent: Friday, June 15, 2018 1:56 PM
> To: Daniel Sangorrin 
> Cc: linux-kernel@vger.kernel.org; sta...@vger.kernel.org; 'Andy Lutomirski'
> ; 'Rik van Riel' ; 'Borislav Petkov'
> ; 'Brian Gerst' ; 'Dave Hansen'
> ; 'Denys Vlasenko' ;
> 'Fenghua Yu' ; 'H. Peter Anvin' ; 'Josh
> Poimboeuf' ; 'Linus Torvalds'
> ; 'Oleg Nesterov' ; 'Peter
> Zijlstra' ; 'Quentin Casasnovas'
> ; 'Thomas Gleixner' ;
> pbonz...@redhat.com; 'Ingo Molnar' 
> Subject: Re: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode
> 
> On Fri, Jun 15, 2018 at 01:24:27PM +0900, Daniel Sangorrin wrote:
> > Hi Greg,
> >
> > >  /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
> > > --- a/arch/x86/include/asm/fpu/internal.h
> > > +++ b/arch/x86/include/asm/fpu/internal.h
> > > @@ -58,7 +58,7 @@ extern u64 fpu__get_supported_xfeatures_
> > >   */
> > >  static __always_inline __pure bool use_eager_fpu(void)
> > >  {
> > > - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
> > > + return true;
> > >  }
> >
> > Since this function returns always true then we can remove the code 
> > depending on
> lazy FPU mode.
> > Actually this has already been done in "x86/fpu: Remove use_eager_fpu()"
> > Ref: https://patchwork.kernel.org/patch/9365883/
> >
> > >  static void __init fpu__init_parse_early_param(void)
> > >  {
> > > - if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
> > > - eagerfpu = DISABLE;
> > > - fpu__clear_eager_fpu_features();
> > > - }
> >
> > Since this patch removes the kernel boot parameter "eagerfpu", maybe we 
> > should
> remove it from the Documentation.
> > This has also been done by commit "x86/fpu: Finish excising 'eagerfpu'"
> > Ref: https://patchwork.kernel.org/patch/9380673/
> >
> > I will try backporting those patches unless anyone has any objections.
> 
> What are the git commit ids of those patches in Linus's tree?  No need
> to point to patchwork links, I don't use that tool.

OK, I got it.

"x86/fpu: Remove use_eager_fpu()": c592b57347069abfc0dcad3b3a302cf882602597
"x86/fpu: Finish excising 'eagerfpu'": e63650840e8b053aa09ad934877e87e9941ed135

Unfortunately, they don't apply cleanly to stable kernels.

Thanks,
Daniel Sangorrin





Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Greg Kroah-Hartman
On Thu, Jun 14, 2018 at 04:31:26PM -0600, Shuah Khan wrote:
> On 06/14/2018 08:03 AM, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 4.17.2 release.
> > There are 45 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> > Anything received after that time might be too late.
> > 
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-4.17.y
> > and the diffstat can be found below.
> > 
> > thanks,
> > 
> > greg k-h
> > 
> 
> Compiled and booted on my test system. No dmesg regressions.

Thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Greg Kroah-Hartman
On Thu, Jun 14, 2018 at 04:31:26PM -0600, Shuah Khan wrote:
> On 06/14/2018 08:03 AM, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 4.17.2 release.
> > There are 45 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> > Anything received after that time might be too late.
> > 
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-4.17.y
> > and the diffstat can be found below.
> > 
> > thanks,
> > 
> > greg k-h
> > 
> 
> Compiled and booted on my test system. No dmesg regressions.

Thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Greg Kroah-Hartman
On Fri, Jun 15, 2018 at 06:15:48AM +0530, Naresh Kamboju wrote:
> On 14 June 2018 at 19:33, Greg Kroah-Hartman  
> wrote:
> > This is the start of the stable review cycle for the 4.17.2 release.
> > There are 45 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> >
> > Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> > Anything received after that time might be too late.
> >
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-4.17.y
> > and the diffstat can be found below.
> >
> > thanks,
> >
> > greg k-h
> 
> Results from Linaro’s test farm.
> No regressions on arm64, arm and x86_64.

Thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Greg Kroah-Hartman
On Fri, Jun 15, 2018 at 06:15:48AM +0530, Naresh Kamboju wrote:
> On 14 June 2018 at 19:33, Greg Kroah-Hartman  
> wrote:
> > This is the start of the stable review cycle for the 4.17.2 release.
> > There are 45 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> >
> > Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> > Anything received after that time might be too late.
> >
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-4.17.y
> > and the diffstat can be found below.
> >
> > thanks,
> >
> > greg k-h
> 
> Results from Linaro’s test farm.
> No regressions on arm64, arm and x86_64.

Thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH v1 0/2] perf: Drop leaked kernel samples

2018-06-14 Thread Jin, Yao




On 6/15/2018 11:35 AM, Kyle Huey wrote:

I strongly object to this patch as written. As I said when I
originally reported[0] the regression introduced by the previous
version of this patch a year ago.

"It seems like this change should, at a bare minimum, be limited to
counters that actually perform sampling of register state when the
interrupt fires.  In our case, with the retired conditional branches
counter restricted to counting userspace events only, it makes no
difference that the PMU interrupt happened to be delivered in the
kernel."

This means identifying which values of `perf_event_attr::sample_type`
are security concerns (presumably PERF_SAMPLE_IP is, and
PERF_SAMPLE_TIME is not, and someone needs to go through and decide on
all of them) and filtering on those values for this new behavior.

And because rr sets its sample_type to 0, once you do that, the sysctl
will not be necessary.

- Kyle



Since rr sets sample_type to 0, the easiest way is to add checking like:

if (event->attr.sample_type) {
if (event->attr.exclude_kernel && !user_mode(regs))
return false;
}

So the rr doesn't need to be changed and for other use cases the leaked 
kernel samples will be dropped.


But I don't like this is because:

1. It's too specific for rr case.

2. If we create a new sample_type, e.g. PERF_SAMPLE_ALLOW_LEAKAGE, the 
code will be:


if !(event->attr.sample_type & PERF_SAMPLE_ALLOW_LEAKAGE) {
if (event->attr.exclude_kernel && !user_mode(regs))
return false;
}

But rr needs to add PERF_SAMPLE_ALLOW_LEAKAGE to sample_type since by 
default the bit is not set.


3. Sysctl is a more flexible way. It provides us with an option when we 
want to see if skid is existing, we can use sysctl to turn on that.


Thanks
Jin Yao



Re: [PATCH v1 0/2] perf: Drop leaked kernel samples

2018-06-14 Thread Jin, Yao




On 6/15/2018 11:35 AM, Kyle Huey wrote:

I strongly object to this patch as written. As I said when I
originally reported[0] the regression introduced by the previous
version of this patch a year ago.

"It seems like this change should, at a bare minimum, be limited to
counters that actually perform sampling of register state when the
interrupt fires.  In our case, with the retired conditional branches
counter restricted to counting userspace events only, it makes no
difference that the PMU interrupt happened to be delivered in the
kernel."

This means identifying which values of `perf_event_attr::sample_type`
are security concerns (presumably PERF_SAMPLE_IP is, and
PERF_SAMPLE_TIME is not, and someone needs to go through and decide on
all of them) and filtering on those values for this new behavior.

And because rr sets its sample_type to 0, once you do that, the sysctl
will not be necessary.

- Kyle



Since rr sets sample_type to 0, the easiest way is to add checking like:

if (event->attr.sample_type) {
if (event->attr.exclude_kernel && !user_mode(regs))
return false;
}

So the rr doesn't need to be changed and for other use cases the leaked 
kernel samples will be dropped.


But I don't like this is because:

1. It's too specific for rr case.

2. If we create a new sample_type, e.g. PERF_SAMPLE_ALLOW_LEAKAGE, the 
code will be:


if !(event->attr.sample_type & PERF_SAMPLE_ALLOW_LEAKAGE) {
if (event->attr.exclude_kernel && !user_mode(regs))
return false;
}

But rr needs to add PERF_SAMPLE_ALLOW_LEAKAGE to sample_type since by 
default the bit is not set.


3. Sysctl is a more flexible way. It provides us with an option when we 
want to see if skid is existing, we can use sysctl to turn on that.


Thanks
Jin Yao



Re: Restartable Sequences system call merged into Linux

2018-06-14 Thread Florian Weimer

On 06/14/2018 03:46 PM, Mathieu Desnoyers wrote:

This would allow registering various TLS data structures with a single
system call without hindering flexibility on the user-space side. For
instance, we could still use initial-exec and the __rseq_abi symbol for
rseq with this approach.

Thoughts ?


Isn't this just a very narrow case of the usual batched syscalls 
proposal? 8-)


Florian


Re: Restartable Sequences system call merged into Linux

2018-06-14 Thread Florian Weimer

On 06/14/2018 03:46 PM, Mathieu Desnoyers wrote:

This would allow registering various TLS data structures with a single
system call without hindering flexibility on the user-space side. For
instance, we could still use initial-exec and the __rseq_abi symbol for
rseq with this approach.

Thoughts ?


Isn't this just a very narrow case of the usual batched syscalls 
proposal? 8-)


Florian


Re: Restartable Sequences system call merged into Linux

2018-06-14 Thread Florian Weimer

On 06/14/2018 03:01 PM, Mathieu Desnoyers wrote:

Another alternative would be to somehow let glibc handle the registration,
perhaps only doing it for applications expressing their interest for rseq.


That's not really possible.  We can't rely on the visibility of symbol 
bindings due to lazy binding and hidden visibility.  Registration of 
intent by other means will not work because if it is done from user 
code, some other library may have already launched a thread at this point.


(It's also a moot point if we want to use restartable sequences in glibc 
itself.)


Thanks,
Florian


Re: Restartable Sequences system call merged into Linux

2018-06-14 Thread Florian Weimer

On 06/14/2018 03:01 PM, Mathieu Desnoyers wrote:

Another alternative would be to somehow let glibc handle the registration,
perhaps only doing it for applications expressing their interest for rseq.


That's not really possible.  We can't rely on the visibility of symbol 
bindings due to lazy binding and hidden visibility.  Registration of 
intent by other means will not work because if it is done from user 
code, some other library may have already launched a thread at this point.


(It's also a moot point if we want to use restartable sequences in glibc 
itself.)


Thanks,
Florian


Re: [RFC PATCH 5/6] arm64: dts: ti: Add Support for AM654 SoC

2018-06-14 Thread Tony Lindgren
* Nishanth Menon  [180614 13:07]:
> On 12:38-20180614, Tony Lindgren wrote:
> > Some comments on the ranges below.
> 
> Thanks for reviewing in detail (I understand we are in the middle of
> merge window, so thanks for the extra effort).
> 
> > 
> > * Nishanth Menon  [180607 16:41]:
> > > + soc0: soc0 {
> > > + compatible = "simple-bus";
> > > + #address-cells = <2>;
> > > + #size-cells = <2>;
> > > + ranges;
> > 
> > I suggest you leave out the soc0, that's not real. Just make
> 
> Why is that so, on a more complex board representation with multiple
> SoCs, this is a clear node indicating what the main SoC is in the final
> dtb representation.

It does not have a real reg or range.

> > the cbass@0 the top level interconnect. It can then provide
> > ranges to mcu interconnect which can provide ranges to the wkup
> > interconnect. So just model it after what's in the hardware :)
> 
> That might blow up things quite a bit - it is like the comment in:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm/boot/dts/dra7.dtsi#n141

That comment at the link above not true I've found. What we have
there as "ocp" should be just "l3" and then the "l4" instances are
children of "l3". The direct ports from some "l4" devices are just
ranges at the parent "l3". And this will get changed slowly over
next few merge cycles.

> The trees are pretty deep with many interconnections (example main does
> have direct connections to wkup as well, which is simplified off in
> top level diagram) - basically it is not a direct one dimensional
> relationship. But then, the same is the case for other SoCs..

In the above example the connection from main to wkup is just a
range provided by main so not a problem.

> we can represent NAVSS as a bus segment as well.

Well ideally each module on the interconnects would be set up
separately to prevent drivers trying to ioremap ranges from
multiple modules. This is important as flushing posted write to
one module will not flush it for the other module.

> > I found the following ranges based on a quick look at the TRM,
> > they could be split further if needed for power domains for
> > genpd for example.
> 
> genpd is not really an issue, since it is handled in system firmware and
> OSes dont have a visibility into the permitted ranges that the OS is
> allowed to use.

There are other reasons beyond genpd too. Flushing posted writes
to modules is one. Getting rid of pointless deferred probe is
another one. Preventing device drivers trying to ioremap multiple
module is yet another one..

> I think it is just how accurate a representation is it worth.

The dts really is intended to describe the hardware :) So
let's not repeat the same mistake again with imaginary ranges.

> > 
> > main covers
> > 0x00 - 0x540200
> > 
> > main provides at least the following ranges for mcu
> > 0x002838 - 0x002bc0
> > 0x004008 - 0x0041c8
> > 0x004510 - 0x004518
> > 0x004560 - 0x004564
> > 0x004581 - 0x004586
> > 0x004595 - 0x0045950400
> > 0x0045a5 - 0x0045a50400
> > 0x0045b04000 - 0x0045b06400
> > 0x0045d1 - 0x0045d24000
> > 0x004600 - 0x006000
> > 0x04 - 0x08
> > 0x4c3c02 - 0x4c3c03
> > 0x4c3e00 - 0x4c3e04
> > 0x54 - 0x540200
> > 
> > then mcu provides the following ranges for wkup
> > 0x004200 - 0x0044410020
> > 0x004500 - 0x004503
> > 0x004508 - 0x00450a
> > 0x0045808000 - 0x0045808800
> > 0x0045b0 - 0x0045b02400
> > 
> > This based on looking at "figure 1-1. device top-level
> > block diagram" and the memory map in TRM.
> 
> Thanks for researching. I did debate something like:
> 
> From A53 view, a more accurate view might be  - from an interconnect
> view of the world (still simplified - i have ignored the sub bus
> segments in the representations below):
> 
> msmc {
>   navss_main {
>   cbass_main{
>   cbass_mcu {
>   navss_mcu {
>   };
>   cbass_wkup{
>   };
>   };
>   };
>   };
> };
> 
> From R5 view, the view will be very different ofcourse:
> view of the world (still simplified):
> 
> cbass_mcu {
>   navss_mcu {
>   };
>   cbass_wkup{
>   };
>   cbass_main{
>   navss_

Re: [RFC PATCH 5/6] arm64: dts: ti: Add Support for AM654 SoC

2018-06-14 Thread Tony Lindgren
* Nishanth Menon  [180614 13:07]:
> On 12:38-20180614, Tony Lindgren wrote:
> > Some comments on the ranges below.
> 
> Thanks for reviewing in detail (I understand we are in the middle of
> merge window, so thanks for the extra effort).
> 
> > 
> > * Nishanth Menon  [180607 16:41]:
> > > + soc0: soc0 {
> > > + compatible = "simple-bus";
> > > + #address-cells = <2>;
> > > + #size-cells = <2>;
> > > + ranges;
> > 
> > I suggest you leave out the soc0, that's not real. Just make
> 
> Why is that so, on a more complex board representation with multiple
> SoCs, this is a clear node indicating what the main SoC is in the final
> dtb representation.

It does not have a real reg or range.

> > the cbass@0 the top level interconnect. It can then provide
> > ranges to mcu interconnect which can provide ranges to the wkup
> > interconnect. So just model it after what's in the hardware :)
> 
> That might blow up things quite a bit - it is like the comment in:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm/boot/dts/dra7.dtsi#n141

That comment at the link above not true I've found. What we have
there as "ocp" should be just "l3" and then the "l4" instances are
children of "l3". The direct ports from some "l4" devices are just
ranges at the parent "l3". And this will get changed slowly over
next few merge cycles.

> The trees are pretty deep with many interconnections (example main does
> have direct connections to wkup as well, which is simplified off in
> top level diagram) - basically it is not a direct one dimensional
> relationship. But then, the same is the case for other SoCs..

In the above example the connection from main to wkup is just a
range provided by main so not a problem.

> we can represent NAVSS as a bus segment as well.

Well ideally each module on the interconnects would be set up
separately to prevent drivers trying to ioremap ranges from
multiple modules. This is important as flushing posted write to
one module will not flush it for the other module.

> > I found the following ranges based on a quick look at the TRM,
> > they could be split further if needed for power domains for
> > genpd for example.
> 
> genpd is not really an issue, since it is handled in system firmware and
> OSes dont have a visibility into the permitted ranges that the OS is
> allowed to use.

There are other reasons beyond genpd too. Flushing posted writes
to modules is one. Getting rid of pointless deferred probe is
another one. Preventing device drivers trying to ioremap multiple
module is yet another one..

> I think it is just how accurate a representation is it worth.

The dts really is intended to describe the hardware :) So
let's not repeat the same mistake again with imaginary ranges.

> > 
> > main covers
> > 0x00 - 0x540200
> > 
> > main provides at least the following ranges for mcu
> > 0x002838 - 0x002bc0
> > 0x004008 - 0x0041c8
> > 0x004510 - 0x004518
> > 0x004560 - 0x004564
> > 0x004581 - 0x004586
> > 0x004595 - 0x0045950400
> > 0x0045a5 - 0x0045a50400
> > 0x0045b04000 - 0x0045b06400
> > 0x0045d1 - 0x0045d24000
> > 0x004600 - 0x006000
> > 0x04 - 0x08
> > 0x4c3c02 - 0x4c3c03
> > 0x4c3e00 - 0x4c3e04
> > 0x54 - 0x540200
> > 
> > then mcu provides the following ranges for wkup
> > 0x004200 - 0x0044410020
> > 0x004500 - 0x004503
> > 0x004508 - 0x00450a
> > 0x0045808000 - 0x0045808800
> > 0x0045b0 - 0x0045b02400
> > 
> > This based on looking at "figure 1-1. device top-level
> > block diagram" and the memory map in TRM.
> 
> Thanks for researching. I did debate something like:
> 
> From A53 view, a more accurate view might be  - from an interconnect
> view of the world (still simplified - i have ignored the sub bus
> segments in the representations below):
> 
> msmc {
>   navss_main {
>   cbass_main{
>   cbass_mcu {
>   navss_mcu {
>   };
>   cbass_wkup{
>   };
>   };
>   };
>   };
> };
> 
> From R5 view, the view will be very different ofcourse:
> view of the world (still simplified):
> 
> cbass_mcu {
>   navss_mcu {
>   };
>   cbass_wkup{
>   };
>   cbass_main{
>   navss_

Re: Restartable Sequences system call merged into Linux

2018-06-14 Thread Florian Weimer

On 06/14/2018 02:27 PM, Pavel Machek wrote:


Should we treat it the same way?  Always allocate it for each new thread
and register it with the kernel?


That would be an efficient way to do it, indeed. There is very little
performance overhead to have rseq registered for all threads, whether or
not they intend to run rseq critical sections.


People with slow / low memory machines would prefer not to see
overhead they don't need...


I can try to get rid of the >500 byte per-thread area for the stub 
resolver.  That should compensate for the overhead introduced.


Thanks,
Florian


Re: Restartable Sequences system call merged into Linux

2018-06-14 Thread Florian Weimer

On 06/14/2018 02:27 PM, Pavel Machek wrote:


Should we treat it the same way?  Always allocate it for each new thread
and register it with the kernel?


That would be an efficient way to do it, indeed. There is very little
performance overhead to have rseq registered for all threads, whether or
not they intend to run rseq critical sections.


People with slow / low memory machines would prefer not to see
overhead they don't need...


I can try to get rid of the >500 byte per-thread area for the stub 
resolver.  That should compensate for the overhead introduced.


Thanks,
Florian


Re: [PATCH V2 2/4] mmc: sdhci-msm: Add msm version specific ops and data structures

2018-06-14 Thread Vijay Viswanath

Hi Stephen,

On 6/13/2018 5:06 AM, Stephen Boyd wrote:

Quoting Vijay Viswanath (2018-05-29 02:52:39)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4050c99..2a66aa0 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -226,6 +226,24 @@ struct sdhci_msm_offset {
 .core_ddr_config_2 = 0x1bc,
  };
  
+struct sdhci_msm_variant_ops {

+   u8 (*msm_readb_relaxed)(struct sdhci_host *host, u32 offset);
+   u32 (*msm_readl_relaxed)(struct sdhci_host *host, u32 offset);
+   void (*msm_writeb_relaxed)(u8 val, struct sdhci_host *host, u32 offset);
+   void (*msm_writel_relaxed)(u32 val, struct sdhci_host *host,
+   u32 offset);
+};
+
+/*
+ * From V5, register spaces have changed. Wrap this info in a structure
+ * and choose the data_structure based on version info mentioned in DT.
+ */


This is sort of odd. Usually we have a read/write function that swizzles
based on register variants, and that's contained with that function. Now
it's the other way.


+struct sdhci_msm_variant_info {
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
+};
+
  struct sdhci_msm_host {
 struct platform_device *pdev;
 void __iomem *core_mem; /* MSM SDCC mapped address */
@@ -245,8 +263,45 @@ struct sdhci_msm_host {
 wait_queue_head_t pwr_irq_wait;
 bool pwr_irq_flag;
 u32 caps_0;
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
  };
  
+/*

+ * APIs to read/write to vendor specific registers which were there in the
+ * core_mem region before MCI was removed.
+ */
+static u32 sdhci_msm_mci_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return readl_relaxed(msm_host->core_mem + offset);


Is core_mem assigned in the new hardware? Maybe that needs to be
'repurposed' for vendor specific registers on v5 and renamed to
something like msm_host::vendor_base or something like that.



There is no core_mem in the new hardware. We can assign hc_mem address 
to core_mem variable (if SDCC5) and do away with the need of special 
read/write functions, but I feel thats a bad approach and misleading.



+}
+
+static u32 sdhci_msm_v5_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   return readl_relaxed(host->ioaddr + offset);
+}
+
+static void sdhci_msm_mci_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   writel_relaxed(val, msm_host->core_mem + offset);
+}
+
+static void sdhci_msm_v5_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   writel_relaxed(val, host->ioaddr + offset);
+}
+
  static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
 unsigned int clock)
  {
@@ -1481,6 +1536,28 @@ static void sdhci_msm_set_regulator_caps(struct 
sdhci_msm_host *msm_host)
 pr_debug("%s: supported caps: 0x%08x\n", mmc_hostname(mmc), caps);
  }
  
+static const struct sdhci_msm_variant_ops mci_var_ops = {

+   .msm_readl_relaxed = sdhci_msm_mci_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_mci_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_ops v5_var_ops = {
+   .msm_readl_relaxed = sdhci_msm_v5_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_v5_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_mci_var = {
+   .mci_removed = 0,


Please use true and false instead of 0 and 1 when the type is bool.



Will do


+   .var_ops = _var_ops,
+   .offset = _msm_mci_offset,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_v5_var = {
+   .mci_removed = 1,
+   .var_ops = _var_ops,
+   .offset = _msm_v5_offset,
+};

--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH V2 2/4] mmc: sdhci-msm: Add msm version specific ops and data structures

2018-06-14 Thread Vijay Viswanath

Hi Stephen,

On 6/13/2018 5:06 AM, Stephen Boyd wrote:

Quoting Vijay Viswanath (2018-05-29 02:52:39)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4050c99..2a66aa0 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -226,6 +226,24 @@ struct sdhci_msm_offset {
 .core_ddr_config_2 = 0x1bc,
  };
  
+struct sdhci_msm_variant_ops {

+   u8 (*msm_readb_relaxed)(struct sdhci_host *host, u32 offset);
+   u32 (*msm_readl_relaxed)(struct sdhci_host *host, u32 offset);
+   void (*msm_writeb_relaxed)(u8 val, struct sdhci_host *host, u32 offset);
+   void (*msm_writel_relaxed)(u32 val, struct sdhci_host *host,
+   u32 offset);
+};
+
+/*
+ * From V5, register spaces have changed. Wrap this info in a structure
+ * and choose the data_structure based on version info mentioned in DT.
+ */


This is sort of odd. Usually we have a read/write function that swizzles
based on register variants, and that's contained with that function. Now
it's the other way.


+struct sdhci_msm_variant_info {
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
+};
+
  struct sdhci_msm_host {
 struct platform_device *pdev;
 void __iomem *core_mem; /* MSM SDCC mapped address */
@@ -245,8 +263,45 @@ struct sdhci_msm_host {
 wait_queue_head_t pwr_irq_wait;
 bool pwr_irq_flag;
 u32 caps_0;
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
  };
  
+/*

+ * APIs to read/write to vendor specific registers which were there in the
+ * core_mem region before MCI was removed.
+ */
+static u32 sdhci_msm_mci_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return readl_relaxed(msm_host->core_mem + offset);


Is core_mem assigned in the new hardware? Maybe that needs to be
'repurposed' for vendor specific registers on v5 and renamed to
something like msm_host::vendor_base or something like that.



There is no core_mem in the new hardware. We can assign hc_mem address 
to core_mem variable (if SDCC5) and do away with the need of special 
read/write functions, but I feel thats a bad approach and misleading.



+}
+
+static u32 sdhci_msm_v5_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   return readl_relaxed(host->ioaddr + offset);
+}
+
+static void sdhci_msm_mci_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   writel_relaxed(val, msm_host->core_mem + offset);
+}
+
+static void sdhci_msm_v5_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   writel_relaxed(val, host->ioaddr + offset);
+}
+
  static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
 unsigned int clock)
  {
@@ -1481,6 +1536,28 @@ static void sdhci_msm_set_regulator_caps(struct 
sdhci_msm_host *msm_host)
 pr_debug("%s: supported caps: 0x%08x\n", mmc_hostname(mmc), caps);
  }
  
+static const struct sdhci_msm_variant_ops mci_var_ops = {

+   .msm_readl_relaxed = sdhci_msm_mci_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_mci_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_ops v5_var_ops = {
+   .msm_readl_relaxed = sdhci_msm_v5_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_v5_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_mci_var = {
+   .mci_removed = 0,


Please use true and false instead of 0 and 1 when the type is bool.



Will do


+   .var_ops = _var_ops,
+   .offset = _msm_mci_offset,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_v5_var = {
+   .mci_removed = 1,
+   .var_ops = _var_ops,
+   .offset = _msm_v5_offset,
+};

--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode

2018-06-14 Thread 'Greg Kroah-Hartman'
On Fri, Jun 15, 2018 at 01:24:27PM +0900, Daniel Sangorrin wrote:
> Hi Greg,
> 
> >  /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
> > --- a/arch/x86/include/asm/fpu/internal.h
> > +++ b/arch/x86/include/asm/fpu/internal.h
> > @@ -58,7 +58,7 @@ extern u64 fpu__get_supported_xfeatures_
> >   */
> >  static __always_inline __pure bool use_eager_fpu(void)
> >  {
> > -   return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
> > +   return true;
> >  }
> 
> Since this function returns always true then we can remove the code depending 
> on lazy FPU mode.
> Actually this has already been done in "x86/fpu: Remove use_eager_fpu()"
> Ref: https://patchwork.kernel.org/patch/9365883/
> 
> >  static void __init fpu__init_parse_early_param(void)
> >  {
> > -   if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
> > -   eagerfpu = DISABLE;
> > -   fpu__clear_eager_fpu_features();
> > -   }
> 
> Since this patch removes the kernel boot parameter "eagerfpu", maybe we 
> should remove it from the Documentation.
> This has also been done by commit "x86/fpu: Finish excising 'eagerfpu'"
> Ref: https://patchwork.kernel.org/patch/9380673/
> 
> I will try backporting those patches unless anyone has any objections.

What are the git commit ids of those patches in Linus's tree?  No need
to point to patchwork links, I don't use that tool.

thanks,

greg k-h


Re: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode

2018-06-14 Thread 'Greg Kroah-Hartman'
On Fri, Jun 15, 2018 at 01:24:27PM +0900, Daniel Sangorrin wrote:
> Hi Greg,
> 
> >  /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
> > --- a/arch/x86/include/asm/fpu/internal.h
> > +++ b/arch/x86/include/asm/fpu/internal.h
> > @@ -58,7 +58,7 @@ extern u64 fpu__get_supported_xfeatures_
> >   */
> >  static __always_inline __pure bool use_eager_fpu(void)
> >  {
> > -   return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
> > +   return true;
> >  }
> 
> Since this function returns always true then we can remove the code depending 
> on lazy FPU mode.
> Actually this has already been done in "x86/fpu: Remove use_eager_fpu()"
> Ref: https://patchwork.kernel.org/patch/9365883/
> 
> >  static void __init fpu__init_parse_early_param(void)
> >  {
> > -   if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
> > -   eagerfpu = DISABLE;
> > -   fpu__clear_eager_fpu_features();
> > -   }
> 
> Since this patch removes the kernel boot parameter "eagerfpu", maybe we 
> should remove it from the Documentation.
> This has also been done by commit "x86/fpu: Finish excising 'eagerfpu'"
> Ref: https://patchwork.kernel.org/patch/9380673/
> 
> I will try backporting those patches unless anyone has any objections.

What are the git commit ids of those patches in Linus's tree?  No need
to point to patchwork links, I don't use that tool.

thanks,

greg k-h


RESEARCHERS

2018-06-14 Thread Sarah Paige
Greetings,
 
We are contracted probate researchers. This is an investigation about a late 
client with whom you share the same surname; your assistance will be greatly 
appreciated. Are you aware of any investment made by such a person with us? 
Please clarify,

EmaiL Reply to : research...@mail2consultant.com

 Sarah Paige,
For Research Firm.


RESEARCHERS

2018-06-14 Thread Sarah Paige
Greetings,
 
We are contracted probate researchers. This is an investigation about a late 
client with whom you share the same surname; your assistance will be greatly 
appreciated. Are you aware of any investment made by such a person with us? 
Please clarify,

EmaiL Reply to : research...@mail2consultant.com

 Sarah Paige,
For Research Firm.


linux-next: Tree for Jun 15

2018-06-14 Thread Stephen Rothwell
Hi all,

Note: please do *not* add any v4.19 material to your linux-next included
branches until after v4.18-rc1 has been released.

Changes since 20180614:

The overlayfs tree gained conflicts against Linus' tree.

Non-merge commits (relative to Linus' tree): 778
 600 files changed, 13285 insertions(+), 9945 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a
multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), I do an x86_64 modules_install followed by
builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit),
ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc
and sparc64 defconfig. And finally, a simple boot test of the powerpc
pseries_le_defconfig kernel in qemu (with and without kvm enabled).

Below is a summary of the state of the merge.

I am currently merging 278 trees (counting Linus' and 64 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (b5d903c2d656 Merge branch 'akpm' (patches from Andrew))
Merging fixes/master (147a89bc71e7 Merge tag 'kconfig-v4.17' of 
git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild)
Merging kbuild-current/fixes (2837461dbe6f Merge tag 'scsi-fixes' of 
git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi)
Merging arc-current/for-curr (e6c62399504c ARCv2: support manual regfile save 
on interrupts)
Merging arm-current/fixes (92d44a42af81 ARM: fix kill( ,SIGFPE) breakage)
Merging arm64-fixes/for-next/fixes (82034c23fcbc arm64: Make sure permission 
updates happen for pmd/pud)
Merging m68k-current/for-linus (b12c8a70643f m68k: Set default dma mask for 
platform devices)
Merging powerpc-fixes/fixes (faf37c44a105 powerpc/64s: Clear PCR on boot)
Merging sparc/master (1aaccb5fa0ea Merge tag 'rtc-4.18' of 
git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux)
Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2)
Merging net/master (7f6afc338405 Merge branch 'l2tp-fixes')
Merging bpf/master (3bce593ac06b selftests: bpf: config: add config fragments)
Merging ipsec/master (d6990976af7c vti6: fix PMTU caching and reporting on xmit)
Merging netfilter/master (60d061e34703 Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf)
Merging ipvs/master (312564269535 net: netsec: reduce DMA mask to 40 bits)
Merging wireless-drivers/master (ab1068d6866e iwlwifi: pcie: compare with 
number of IRQs requested for, not number of CPUs)
Merging mac80211/master (885892fb378d mlx4_core: restore optimal ICM memory 
allocation)
Merging rdma-fixes/for-rc (1eb9364ce81d IB/uverbs: Fix ordering of ucontext 
check in ib_uverbs_write)
Merging sound-current/for-linus (ad6baae62381 ALSA: usb-audio: Always create 
the interrupt pipe for the mixer)
Merging sound-asoc-fixes/for-linus (2858e2cfc2ef Merge branch 'asoc-4.17' into 
asoc-linus)
Merging regmap-fixes/for-linus (97fe106a8027 Merge branch 'regmap-4.17' into 
regmap-linus)
Merging regulator-fixes/for-linus (59ce5f3e5530 Merge branch 'regulator-4.17' 
into regulator-linus)
Merging spi-fixes/for-linus (5d3257b8ea48 Merge branch 'spi-4.17' into 
spi-linus)
Merging pci-current/for-linus (0cf22d6b317c PCI: Add "PCIe" to 
pcie_print_link_status() messages)
Merging driver-core.current/driver-core-linus (3ca24ce9ff76 Merge branch 
'proc-cmdline')
Merging tty.current/tty-linus (3ca24ce9ff76 Merge branch 'proc-cmdline')
Merging usb.current/usb-linus (3ca24ce9ff76 Merge branch 'proc-cmdline')
Merging usb-gadget-fixes/fixes (6d08b06e67cd Linux 4.17-rc2)
Merging usb-serial-fixes/usb-linus (75bc37fefc44 Linux 4.17-rc4)
Merging usb-chipidea-fixes/ci-for-usb-stable (964728f9f407 USB: chipidea: msm: 
fix ulpi-node lookup)
Merging phy/fixes (60cc43fc8884 Linux 4.17-rc1)
Merging staging.current/staging-linus (3

linux-next: Tree for Jun 15

2018-06-14 Thread Stephen Rothwell
Hi all,

Note: please do *not* add any v4.19 material to your linux-next included
branches until after v4.18-rc1 has been released.

Changes since 20180614:

The overlayfs tree gained conflicts against Linus' tree.

Non-merge commits (relative to Linus' tree): 778
 600 files changed, 13285 insertions(+), 9945 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a
multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), I do an x86_64 modules_install followed by
builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit),
ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc
and sparc64 defconfig. And finally, a simple boot test of the powerpc
pseries_le_defconfig kernel in qemu (with and without kvm enabled).

Below is a summary of the state of the merge.

I am currently merging 278 trees (counting Linus' and 64 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (b5d903c2d656 Merge branch 'akpm' (patches from Andrew))
Merging fixes/master (147a89bc71e7 Merge tag 'kconfig-v4.17' of 
git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild)
Merging kbuild-current/fixes (2837461dbe6f Merge tag 'scsi-fixes' of 
git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi)
Merging arc-current/for-curr (e6c62399504c ARCv2: support manual regfile save 
on interrupts)
Merging arm-current/fixes (92d44a42af81 ARM: fix kill( ,SIGFPE) breakage)
Merging arm64-fixes/for-next/fixes (82034c23fcbc arm64: Make sure permission 
updates happen for pmd/pud)
Merging m68k-current/for-linus (b12c8a70643f m68k: Set default dma mask for 
platform devices)
Merging powerpc-fixes/fixes (faf37c44a105 powerpc/64s: Clear PCR on boot)
Merging sparc/master (1aaccb5fa0ea Merge tag 'rtc-4.18' of 
git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux)
Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2)
Merging net/master (7f6afc338405 Merge branch 'l2tp-fixes')
Merging bpf/master (3bce593ac06b selftests: bpf: config: add config fragments)
Merging ipsec/master (d6990976af7c vti6: fix PMTU caching and reporting on xmit)
Merging netfilter/master (60d061e34703 Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf)
Merging ipvs/master (312564269535 net: netsec: reduce DMA mask to 40 bits)
Merging wireless-drivers/master (ab1068d6866e iwlwifi: pcie: compare with 
number of IRQs requested for, not number of CPUs)
Merging mac80211/master (885892fb378d mlx4_core: restore optimal ICM memory 
allocation)
Merging rdma-fixes/for-rc (1eb9364ce81d IB/uverbs: Fix ordering of ucontext 
check in ib_uverbs_write)
Merging sound-current/for-linus (ad6baae62381 ALSA: usb-audio: Always create 
the interrupt pipe for the mixer)
Merging sound-asoc-fixes/for-linus (2858e2cfc2ef Merge branch 'asoc-4.17' into 
asoc-linus)
Merging regmap-fixes/for-linus (97fe106a8027 Merge branch 'regmap-4.17' into 
regmap-linus)
Merging regulator-fixes/for-linus (59ce5f3e5530 Merge branch 'regulator-4.17' 
into regulator-linus)
Merging spi-fixes/for-linus (5d3257b8ea48 Merge branch 'spi-4.17' into 
spi-linus)
Merging pci-current/for-linus (0cf22d6b317c PCI: Add "PCIe" to 
pcie_print_link_status() messages)
Merging driver-core.current/driver-core-linus (3ca24ce9ff76 Merge branch 
'proc-cmdline')
Merging tty.current/tty-linus (3ca24ce9ff76 Merge branch 'proc-cmdline')
Merging usb.current/usb-linus (3ca24ce9ff76 Merge branch 'proc-cmdline')
Merging usb-gadget-fixes/fixes (6d08b06e67cd Linux 4.17-rc2)
Merging usb-serial-fixes/usb-linus (75bc37fefc44 Linux 4.17-rc4)
Merging usb-chipidea-fixes/ci-for-usb-stable (964728f9f407 USB: chipidea: msm: 
fix ulpi-node lookup)
Merging phy/fixes (60cc43fc8884 Linux 4.17-rc1)
Merging staging.current/staging-linus (3

RE: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode

2018-06-14 Thread Daniel Sangorrin
Hi Greg,

>  /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
> --- a/arch/x86/include/asm/fpu/internal.h
> +++ b/arch/x86/include/asm/fpu/internal.h
> @@ -58,7 +58,7 @@ extern u64 fpu__get_supported_xfeatures_
>   */
>  static __always_inline __pure bool use_eager_fpu(void)
>  {
> - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
> + return true;
>  }

Since this function returns always true then we can remove the code depending 
on lazy FPU mode.
Actually this has already been done in "x86/fpu: Remove use_eager_fpu()"
Ref: https://patchwork.kernel.org/patch/9365883/

>  static void __init fpu__init_parse_early_param(void)
>  {
> - if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
> - eagerfpu = DISABLE;
> - fpu__clear_eager_fpu_features();
> - }

Since this patch removes the kernel boot parameter "eagerfpu", maybe we should 
remove it from the Documentation.
This has also been done by commit "x86/fpu: Finish excising 'eagerfpu'"
Ref: https://patchwork.kernel.org/patch/9380673/

I will try backporting those patches unless anyone has any objections.

Thanks,
Daniel Sangorrin





RE: [PATCH 4.4 10/24] x86/fpu: Hard-disable lazy FPU mode

2018-06-14 Thread Daniel Sangorrin
Hi Greg,

>  /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
> --- a/arch/x86/include/asm/fpu/internal.h
> +++ b/arch/x86/include/asm/fpu/internal.h
> @@ -58,7 +58,7 @@ extern u64 fpu__get_supported_xfeatures_
>   */
>  static __always_inline __pure bool use_eager_fpu(void)
>  {
> - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
> + return true;
>  }

Since this function returns always true then we can remove the code depending 
on lazy FPU mode.
Actually this has already been done in "x86/fpu: Remove use_eager_fpu()"
Ref: https://patchwork.kernel.org/patch/9365883/

>  static void __init fpu__init_parse_early_param(void)
>  {
> - if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
> - eagerfpu = DISABLE;
> - fpu__clear_eager_fpu_features();
> - }

Since this patch removes the kernel boot parameter "eagerfpu", maybe we should 
remove it from the Documentation.
This has also been done by commit "x86/fpu: Finish excising 'eagerfpu'"
Ref: https://patchwork.kernel.org/patch/9380673/

I will try backporting those patches unless anyone has any objections.

Thanks,
Daniel Sangorrin





Re: [PATCH 00/32] VFS: Introduce filesystem context [ver #8]

2018-06-14 Thread Eric W. Biederman
David Howells  writes:

> Here are a set of patches to create a filesystem context prior to setting
> up a new mount, populating it with the parsed options/binary data, creating
> the superblock and then effecting the mount.  This is also used for remount
> since much of the parsing stuff is common in many filesystems.

Dave,
I have read through these patches and I noticed a significant issue.

Today in mount_bdev we do something that looks like:

mount_bdev(...)
{
s = sget(..., bdev);
if (s->s_root) {
/* Noop */
} else {
err = fill_super(s, ...);
if (err) {
deactivate_locked_super(s);
return ERR_PTR(err);
}
s->s_flags |= SB_ATTIVE;
bdev->bd_super = s;
}
return dget(s->s_root);
}

The key point is that we don't process the mount options at all if
a super block already exists in the kernel.  Similar to what
your fscontext changes are doing (after parsing the options).

Your fscontext changes do not improve upon this area of the mount api at
all and that concerns me.  This is an area where people can and already
do shoot themselves in their feet.

The real world security issue we had in with this involved devpts.  The
devpts filesystem requires the mode and gid parameters for new ttys to
be specified to be posix compliant.  People were setting up chroot
environments and mounting devpts with the wrong arguments.  As these two
devpts mounts shared a super block a change of arguments on one was a
change of arguments on the other.  Which mean the chroots were
periodically breaking the primary devpts and causing new terminals to be
opened with essentially unusable permissions.  Fun when you are trying
to ssh in to a box.

Creating a new mount and finding an old mount are the same operation in
the kernel today.  This is fundamentally confusing.  In the new api
could we please separate these two operations?

Perhaps someting like:
x create
x find

With the "x create" case failing if the filesystem already exists,
still allowing "x find"?  And with the "x find" case failing if
the superblock is not already created in the kernel.

That should make it clear to a userspace program what is going on
and give it a chance to mount a filesystem anyway.



In a similar vein could we please clarify the rules for changing mount
options for an existing superblock are in the new api?

Today mount assumes that it has to provide all of the existing options
to reconfigure a mount.  What people want to do and what most
filesystems support is just specifying the options that need to be
changed.  Can we please make this the rule of how this are expected
to work for fscontext?  That only changing mount options need to
be specified before: "x reconfigure"

Eric




Re: [PATCH 00/32] VFS: Introduce filesystem context [ver #8]

2018-06-14 Thread Eric W. Biederman
David Howells  writes:

> Here are a set of patches to create a filesystem context prior to setting
> up a new mount, populating it with the parsed options/binary data, creating
> the superblock and then effecting the mount.  This is also used for remount
> since much of the parsing stuff is common in many filesystems.

Dave,
I have read through these patches and I noticed a significant issue.

Today in mount_bdev we do something that looks like:

mount_bdev(...)
{
s = sget(..., bdev);
if (s->s_root) {
/* Noop */
} else {
err = fill_super(s, ...);
if (err) {
deactivate_locked_super(s);
return ERR_PTR(err);
}
s->s_flags |= SB_ATTIVE;
bdev->bd_super = s;
}
return dget(s->s_root);
}

The key point is that we don't process the mount options at all if
a super block already exists in the kernel.  Similar to what
your fscontext changes are doing (after parsing the options).

Your fscontext changes do not improve upon this area of the mount api at
all and that concerns me.  This is an area where people can and already
do shoot themselves in their feet.

The real world security issue we had in with this involved devpts.  The
devpts filesystem requires the mode and gid parameters for new ttys to
be specified to be posix compliant.  People were setting up chroot
environments and mounting devpts with the wrong arguments.  As these two
devpts mounts shared a super block a change of arguments on one was a
change of arguments on the other.  Which mean the chroots were
periodically breaking the primary devpts and causing new terminals to be
opened with essentially unusable permissions.  Fun when you are trying
to ssh in to a box.

Creating a new mount and finding an old mount are the same operation in
the kernel today.  This is fundamentally confusing.  In the new api
could we please separate these two operations?

Perhaps someting like:
x create
x find

With the "x create" case failing if the filesystem already exists,
still allowing "x find"?  And with the "x find" case failing if
the superblock is not already created in the kernel.

That should make it clear to a userspace program what is going on
and give it a chance to mount a filesystem anyway.



In a similar vein could we please clarify the rules for changing mount
options for an existing superblock are in the new api?

Today mount assumes that it has to provide all of the existing options
to reconfigure a mount.  What people want to do and what most
filesystems support is just specifying the options that need to be
changed.  Can we please make this the rule of how this are expected
to work for fscontext?  That only changing mount options need to
be specified before: "x reconfigure"

Eric




[PATCH] vfs: discard ATTR_ATTR_FLAG

2018-06-14 Thread NeilBrown

This flag was introduce in 2.1.37pre1 and the only place it was tested
was removed in 2.1.43pre1.  The flag was never set.

Let's discard it properly.

Signed-off-by: NeilBrown 
---
 fs/hostfs/hostfs.h | 3 +--
 include/linux/fs.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index cb8374af08a6..b16619b8e298 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -19,9 +19,8 @@
 #define HOSTFS_ATTR_ATIME_SET  128
 #define HOSTFS_ATTR_MTIME_SET  256
 
-/* These two are unused by hostfs. */
+/* This one is unused by hostfs. */
 #define HOSTFS_ATTR_FORCE  512 /* Not a change, but a change it */
-#define HOSTFS_ATTR_ATTR_FLAG  1024
 
 /*
  * If you are very careful, you'll notice that these two are missing:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bacd33e3a4af..c1ad4346fab9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -176,7 +176,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t 
offset,
 #define ATTR_ATIME_SET (1 << 7)
 #define ATTR_MTIME_SET (1 << 8)
 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG (1 << 10)
 #define ATTR_KILL_SUID (1 << 11)
 #define ATTR_KILL_SGID (1 << 12)
 #define ATTR_FILE  (1 << 13)
-- 
2.14.0.rc0.dirty



signature.asc
Description: PGP signature


[PATCH] vfs: discard ATTR_ATTR_FLAG

2018-06-14 Thread NeilBrown

This flag was introduce in 2.1.37pre1 and the only place it was tested
was removed in 2.1.43pre1.  The flag was never set.

Let's discard it properly.

Signed-off-by: NeilBrown 
---
 fs/hostfs/hostfs.h | 3 +--
 include/linux/fs.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index cb8374af08a6..b16619b8e298 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -19,9 +19,8 @@
 #define HOSTFS_ATTR_ATIME_SET  128
 #define HOSTFS_ATTR_MTIME_SET  256
 
-/* These two are unused by hostfs. */
+/* This one is unused by hostfs. */
 #define HOSTFS_ATTR_FORCE  512 /* Not a change, but a change it */
-#define HOSTFS_ATTR_ATTR_FLAG  1024
 
 /*
  * If you are very careful, you'll notice that these two are missing:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bacd33e3a4af..c1ad4346fab9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -176,7 +176,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t 
offset,
 #define ATTR_ATIME_SET (1 << 7)
 #define ATTR_MTIME_SET (1 << 8)
 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG (1 << 10)
 #define ATTR_KILL_SUID (1 << 11)
 #define ATTR_KILL_SGID (1 << 12)
 #define ATTR_FILE  (1 << 13)
-- 
2.14.0.rc0.dirty



signature.asc
Description: PGP signature


Differences in cpu utilization reported by sar, emon

2018-06-14 Thread Solio Sarabia
Hello --

I'm running into an issue where sar, mpstat, top, and other tools show
less cpu utilization compared to emon [1]. Sar uses /proc/stat as its
source, and was configured to collect in 1s intervals. Emon reads
hardware counter MSRs in the PMU in timer intervals, 0.1s for this
scenario.

The platform is based on Xeon E5-2699 v3 (Haswell) 2.3GHz, 2_sockets,
18_cores/socket, 36_cores in total, running Ubuntu 16.04, Linux
4.4.0-128-generic. A network micro workload, ntttcp-for-linux [2],
sends packets from client to server, through a 40GbE direct link.
Numbers below are from server side.

 total %util
   CPU11CPU21CPU22CPU25
emon   99.9915.9036.2236.82
sar99.99 0.06 0.36 0.35

 interrupts/sec
   CPU11CPU21CPU22CPU25
intrs/sec8462892312844 6304
Contributors to /proc/interrupts:
CPU11: Local timer interrupts and Rescheduling interrupts
CPU21-CPU25: PCI MSI vector from network driver

 softirqs/sec
   CPU11CPU21CPU22CPU25
TIMER198121
NET_RX 1288892355318546
TASKLET02888911676 6249


Somehow hardware irqs and softirqs do not have an effect on the core's
utilization. Another observation is that as more cores are used to
process packets, the emon/sar gap increases.

Kernels used default HZ=250. I also tried HZ=1000, which helped improve
throughput, but difference in util is still there. Same for newer
kernels 4.13, 4.15. I would appreciate pointers to debug this, or
insights as what could cause this behavior.

[1] https://software.intel.com/en-us/download/emon-users-guide
[2] https://github.com/simonxiaoss/ntttcp-for-linux

Thanks,
-Solio


Differences in cpu utilization reported by sar, emon

2018-06-14 Thread Solio Sarabia
Hello --

I'm running into an issue where sar, mpstat, top, and other tools show
less cpu utilization compared to emon [1]. Sar uses /proc/stat as its
source, and was configured to collect in 1s intervals. Emon reads
hardware counter MSRs in the PMU in timer intervals, 0.1s for this
scenario.

The platform is based on Xeon E5-2699 v3 (Haswell) 2.3GHz, 2_sockets,
18_cores/socket, 36_cores in total, running Ubuntu 16.04, Linux
4.4.0-128-generic. A network micro workload, ntttcp-for-linux [2],
sends packets from client to server, through a 40GbE direct link.
Numbers below are from server side.

 total %util
   CPU11CPU21CPU22CPU25
emon   99.9915.9036.2236.82
sar99.99 0.06 0.36 0.35

 interrupts/sec
   CPU11CPU21CPU22CPU25
intrs/sec8462892312844 6304
Contributors to /proc/interrupts:
CPU11: Local timer interrupts and Rescheduling interrupts
CPU21-CPU25: PCI MSI vector from network driver

 softirqs/sec
   CPU11CPU21CPU22CPU25
TIMER198121
NET_RX 1288892355318546
TASKLET02888911676 6249


Somehow hardware irqs and softirqs do not have an effect on the core's
utilization. Another observation is that as more cores are used to
process packets, the emon/sar gap increases.

Kernels used default HZ=250. I also tried HZ=1000, which helped improve
throughput, but difference in util is still there. Same for newer
kernels 4.13, 4.15. I would appreciate pointers to debug this, or
insights as what could cause this behavior.

[1] https://software.intel.com/en-us/download/emon-users-guide
[2] https://github.com/simonxiaoss/ntttcp-for-linux

Thanks,
-Solio


Re: [PATCH] autofs - fix directory and symlink access

2018-06-14 Thread Ian Kent
On Fri, 2018-06-15 at 08:06 +0800, Ian Kent wrote:

Opps, missing Signed-off-by, please add it!

> Depending on how it is configured the autofs user space daemon can
> leave in use mounts mounted at exit and re-connect to them at start
> up. But for this to work best the state of the autofs file system
> needs to be left intact over the restart.
> 
> Also, at system shutdown, mounts in an autofs file system might be
> umounted exposing a mount point trigger for which subsequent access
> can lead to a hang. So recent versions of automount(8) now does its
> best to set autofs file system mounts catatonic at shutdown.
> 
> When autofs file system mounts are catatonic it's currently possible
> to create and remove directories and symlinks which can be a problem
> at restart, as described above.
> 
> So return EACCES in the directory, symlink and unlink methods if the
> autofs file system is catatonic.

Signed-off-by: Ian Kent 

> ---
>  fs/autofs/root.c |   33 ++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index a3d414150578..782e57b911ab 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -559,6 +559,13 @@ static int autofs_dir_symlink(struct inode *dir,
>   if (!autofs_oz_mode(sbi))
>   return -EACCES;
>  
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
> +
>   BUG_ON(!ino);
>  
>   autofs_clean_ino(ino);
> @@ -612,9 +619,15 @@ static int autofs_dir_unlink(struct inode *dir, struct
> dentry *dentry)
>   struct autofs_info *ino = autofs_dentry_ino(dentry);
>   struct autofs_info *p_ino;
>  
> - /* This allows root to remove symlinks */
> - if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
> - return -EPERM;
> + if (!autofs_oz_mode(sbi))
> + return -EACCES;
> +
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
>  
>   if (atomic_dec_and_test(>count)) {
>   p_ino = autofs_dentry_ino(dentry->d_parent);
> @@ -697,6 +710,13 @@ static int autofs_dir_rmdir(struct inode *dir, struct
> dentry *dentry)
>   if (!autofs_oz_mode(sbi))
>   return -EACCES;
>  
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
> +
>   spin_lock(>lookup_lock);
>   if (!simple_empty(dentry)) {
>   spin_unlock(>lookup_lock);
> @@ -735,6 +755,13 @@ static int autofs_dir_mkdir(struct inode *dir,
>   if (!autofs_oz_mode(sbi))
>   return -EACCES;
>  
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
> +
>   pr_debug("dentry %p, creating %pd\n", dentry, dentry);
>  
>   BUG_ON(!ino);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe autofs" in


Re: [PATCH] autofs - fix directory and symlink access

2018-06-14 Thread Ian Kent
On Fri, 2018-06-15 at 08:06 +0800, Ian Kent wrote:

Opps, missing Signed-off-by, please add it!

> Depending on how it is configured the autofs user space daemon can
> leave in use mounts mounted at exit and re-connect to them at start
> up. But for this to work best the state of the autofs file system
> needs to be left intact over the restart.
> 
> Also, at system shutdown, mounts in an autofs file system might be
> umounted exposing a mount point trigger for which subsequent access
> can lead to a hang. So recent versions of automount(8) now does its
> best to set autofs file system mounts catatonic at shutdown.
> 
> When autofs file system mounts are catatonic it's currently possible
> to create and remove directories and symlinks which can be a problem
> at restart, as described above.
> 
> So return EACCES in the directory, symlink and unlink methods if the
> autofs file system is catatonic.

Signed-off-by: Ian Kent 

> ---
>  fs/autofs/root.c |   33 ++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index a3d414150578..782e57b911ab 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -559,6 +559,13 @@ static int autofs_dir_symlink(struct inode *dir,
>   if (!autofs_oz_mode(sbi))
>   return -EACCES;
>  
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
> +
>   BUG_ON(!ino);
>  
>   autofs_clean_ino(ino);
> @@ -612,9 +619,15 @@ static int autofs_dir_unlink(struct inode *dir, struct
> dentry *dentry)
>   struct autofs_info *ino = autofs_dentry_ino(dentry);
>   struct autofs_info *p_ino;
>  
> - /* This allows root to remove symlinks */
> - if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
> - return -EPERM;
> + if (!autofs_oz_mode(sbi))
> + return -EACCES;
> +
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
>  
>   if (atomic_dec_and_test(>count)) {
>   p_ino = autofs_dentry_ino(dentry->d_parent);
> @@ -697,6 +710,13 @@ static int autofs_dir_rmdir(struct inode *dir, struct
> dentry *dentry)
>   if (!autofs_oz_mode(sbi))
>   return -EACCES;
>  
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
> +
>   spin_lock(>lookup_lock);
>   if (!simple_empty(dentry)) {
>   spin_unlock(>lookup_lock);
> @@ -735,6 +755,13 @@ static int autofs_dir_mkdir(struct inode *dir,
>   if (!autofs_oz_mode(sbi))
>   return -EACCES;
>  
> + /* autofs_oz_mode() needs to allow path walks when the
> +  * autofs mount is catatonic but the state of an autofs
> +  * file system needs to be preserved over restarts.
> +  */
> + if (sbi->catatonic)
> + return -EACCES;
> +
>   pr_debug("dentry %p, creating %pd\n", dentry, dentry);
>  
>   BUG_ON(!ino);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe autofs" in


Re: [PATCH v1 0/2] perf: Drop leaked kernel samples

2018-06-14 Thread Kyle Huey
I strongly object to this patch as written. As I said when I
originally reported[0] the regression introduced by the previous
version of this patch a year ago.

"It seems like this change should, at a bare minimum, be limited to
counters that actually perform sampling of register state when the
interrupt fires.  In our case, with the retired conditional branches
counter restricted to counting userspace events only, it makes no
difference that the PMU interrupt happened to be delivered in the
kernel."

This means identifying which values of `perf_event_attr::sample_type`
are security concerns (presumably PERF_SAMPLE_IP is, and
PERF_SAMPLE_TIME is not, and someone needs to go through and decide on
all of them) and filtering on those values for this new behavior.

And because rr sets its sample_type to 0, once you do that, the sysctl
will not be necessary.

- Kyle

On Fri, Jun 15, 2018 at 3:03 AM, Jin Yao  wrote:
> On workloads that do a lot of kernel entry/exits we see kernel
> samples, even though :u is specified. This is due to skid existing.
>
> This might be a security issue because it can leak kernel addresses even
> though kernel sampling support is disabled.
>
> One patch "perf/core: Drop kernel samples even though :u is specified"
> was posted in last year but it was reverted because it introduced a
> regression issue that broke the rr-project.
>
> Now this patch set uses sysctl to control the dropping of leaked
> kernel samples.
>
> /sys/devices/cpu/perf_allow_sample_leakage:
>
> 0 - default, drop the leaked kernel samples.
> 1 - don't drop the leaked kernel samples.
>
> For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage to
> keep original system behavior.
>
> Jin Yao (2):
>   perf/core: Use sysctl to turn on/off dropping leaked kernel samples
>   perf Documentation: Introduce the sysctl perf_allow_sample_leakage
>
>  kernel/events/core.c | 58 
> 
>  tools/perf/Documentation/perf-record.txt | 14 
>  2 files changed, 72 insertions(+)
>
> --
> 2.7.4
>

[0] https://lkml.org/lkml/2017/6/27/1159


Re: [PATCH v1 0/2] perf: Drop leaked kernel samples

2018-06-14 Thread Kyle Huey
I strongly object to this patch as written. As I said when I
originally reported[0] the regression introduced by the previous
version of this patch a year ago.

"It seems like this change should, at a bare minimum, be limited to
counters that actually perform sampling of register state when the
interrupt fires.  In our case, with the retired conditional branches
counter restricted to counting userspace events only, it makes no
difference that the PMU interrupt happened to be delivered in the
kernel."

This means identifying which values of `perf_event_attr::sample_type`
are security concerns (presumably PERF_SAMPLE_IP is, and
PERF_SAMPLE_TIME is not, and someone needs to go through and decide on
all of them) and filtering on those values for this new behavior.

And because rr sets its sample_type to 0, once you do that, the sysctl
will not be necessary.

- Kyle

On Fri, Jun 15, 2018 at 3:03 AM, Jin Yao  wrote:
> On workloads that do a lot of kernel entry/exits we see kernel
> samples, even though :u is specified. This is due to skid existing.
>
> This might be a security issue because it can leak kernel addresses even
> though kernel sampling support is disabled.
>
> One patch "perf/core: Drop kernel samples even though :u is specified"
> was posted in last year but it was reverted because it introduced a
> regression issue that broke the rr-project.
>
> Now this patch set uses sysctl to control the dropping of leaked
> kernel samples.
>
> /sys/devices/cpu/perf_allow_sample_leakage:
>
> 0 - default, drop the leaked kernel samples.
> 1 - don't drop the leaked kernel samples.
>
> For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage to
> keep original system behavior.
>
> Jin Yao (2):
>   perf/core: Use sysctl to turn on/off dropping leaked kernel samples
>   perf Documentation: Introduce the sysctl perf_allow_sample_leakage
>
>  kernel/events/core.c | 58 
> 
>  tools/perf/Documentation/perf-record.txt | 14 
>  2 files changed, 72 insertions(+)
>
> --
> 2.7.4
>

[0] https://lkml.org/lkml/2017/6/27/1159


Re: [PATCH] infiniband: fix a subtle race condition

2018-06-14 Thread Jason Gunthorpe
On Thu, Jun 14, 2018 at 04:14:13PM -0700, Cong Wang wrote:
> On Thu, Jun 14, 2018 at 10:24 AM, Jason Gunthorpe  wrote:
> > On Thu, Jun 14, 2018 at 10:03:09AM -0700, Cong Wang wrote:
> >> On Thu, Jun 14, 2018 at 7:24 AM, Jason Gunthorpe  wrote:
> >> >
> >> > This was my brief reaction too, this code path almost certainly has a
> >> > use-after-free, and we should fix the concurrency between the two
> >> > places in some correct way..
> >>
> >> First of all, why use-after-free could trigger an imbalance unlock?
> >> IOW, why do we have to solve use-after-free to fix this imbalance
> >> unlock?
> >
> > The issue syzkaller hit is that accessing ctx->file does not seem
> > locked in any way and can race with other manipulations of ctx->file.
> >
> > So.. for this patch to be correct we need to understand how this
> > statement:
> >
> >f = ctx->file
> >
> > Avoids f becoming a dangling pointer - and without locking, my
> 
> It doesn't, because this is not the point, this is not the cause
> of the unlock imbalance either. syzbot didn't report use-after-free
> or a kernel segfault here.

No, it *is* the point - you've proposed a solution, one of many, and
we need to see an actual sensible design for how the locking around
ctx->file should work correctly.

We need solutions that solve the underlying problem, not just paper
over the symptoms.

Stated another way, for a syzkaller report like this there are a few
really obvious fixes.

1) Capture the lock pointer on the stack:
  f = ctx->file
  mutex_lock(>mut);
  mutex_unlock(>mut);

2) Prevent ctx->file from changing, eg add more locking:
  mutex_lock();
  mutex_lock(>file->mut);
  mutex_unlock(>file->mut));
  mutex_unlock();

3) Prevent ctx->file from being changing/freed by flushing the
   WQ at the right times:

   rdma_addr_cancel(...);
   ctx->file = XYZ;

This patch proposed #1. An explanation is required why that is a
correct locking design for this code. It sure looks like it isn't.

Looking at this *just a bit*, I wonder why not do something like
this:

  mutex_lock();
  f = ctx->file;
  mutex_lock(>mutex);
  mutex_unlock();
 
? At least that *might* make sense. Though probably it deadlocks as it
looks like we call rdma_addr_cancel() while holding mut. Yuk.

But maybe that sequence could be done before launching the work..

> > I'm not sure that race exists, there should be something that flushes
> > the WQ on the path to close... (though I have another email that
> > perhaps that is broken, sigh)
> 
> This is not related to my patch, but to convince you, let me explain:
> 
> struct ucma_file is not refcnt'ed, I know you cancel the work in
> rdma_destroy_id(), but after ucma_migrate_id() the ctx has already
> been moved to the new file, for the old file, it won't cancel the
> ctx flying with workqueue. So, I think the following use-after-free
> could happen:
> 
> ucma_event_handler():
> cur_file = ctx->file; // old file
> 
> ucma_migrate_id():
> lock();
> list_move_tail(>list, _file->ctx_list);
> ctx->file = new_file;
> unlock();
> 
> ucma_close():
> // retrieve old file via filp->private_data
> // the loop won't cover the ctx moved to the new_file
> kfree(file);

Yep. That sure seems like the right analysis!

> This is _not_ the cause of the unlock imbalance, and is _not_ expected
> to solve by patch either.

What do you mean? Not calling rdma_addr_cancel() prior to freeing the
file is *exactly* the cause of the lock imbalance.

The design of this code *assumes* that rdma_addr_cancel() will be
called before altering/freeing/etc any of the state it is working on,
migration makes a state change that violates that invariant.

Jason


Re: [PATCH] infiniband: fix a subtle race condition

2018-06-14 Thread Jason Gunthorpe
On Thu, Jun 14, 2018 at 04:14:13PM -0700, Cong Wang wrote:
> On Thu, Jun 14, 2018 at 10:24 AM, Jason Gunthorpe  wrote:
> > On Thu, Jun 14, 2018 at 10:03:09AM -0700, Cong Wang wrote:
> >> On Thu, Jun 14, 2018 at 7:24 AM, Jason Gunthorpe  wrote:
> >> >
> >> > This was my brief reaction too, this code path almost certainly has a
> >> > use-after-free, and we should fix the concurrency between the two
> >> > places in some correct way..
> >>
> >> First of all, why use-after-free could trigger an imbalance unlock?
> >> IOW, why do we have to solve use-after-free to fix this imbalance
> >> unlock?
> >
> > The issue syzkaller hit is that accessing ctx->file does not seem
> > locked in any way and can race with other manipulations of ctx->file.
> >
> > So.. for this patch to be correct we need to understand how this
> > statement:
> >
> >f = ctx->file
> >
> > Avoids f becoming a dangling pointer - and without locking, my
> 
> It doesn't, because this is not the point, this is not the cause
> of the unlock imbalance either. syzbot didn't report use-after-free
> or a kernel segfault here.

No, it *is* the point - you've proposed a solution, one of many, and
we need to see an actual sensible design for how the locking around
ctx->file should work correctly.

We need solutions that solve the underlying problem, not just paper
over the symptoms.

Stated another way, for a syzkaller report like this there are a few
really obvious fixes.

1) Capture the lock pointer on the stack:
  f = ctx->file
  mutex_lock(>mut);
  mutex_unlock(>mut);

2) Prevent ctx->file from changing, eg add more locking:
  mutex_lock();
  mutex_lock(>file->mut);
  mutex_unlock(>file->mut));
  mutex_unlock();

3) Prevent ctx->file from being changing/freed by flushing the
   WQ at the right times:

   rdma_addr_cancel(...);
   ctx->file = XYZ;

This patch proposed #1. An explanation is required why that is a
correct locking design for this code. It sure looks like it isn't.

Looking at this *just a bit*, I wonder why not do something like
this:

  mutex_lock();
  f = ctx->file;
  mutex_lock(>mutex);
  mutex_unlock();
 
? At least that *might* make sense. Though probably it deadlocks as it
looks like we call rdma_addr_cancel() while holding mut. Yuk.

But maybe that sequence could be done before launching the work..

> > I'm not sure that race exists, there should be something that flushes
> > the WQ on the path to close... (though I have another email that
> > perhaps that is broken, sigh)
> 
> This is not related to my patch, but to convince you, let me explain:
> 
> struct ucma_file is not refcnt'ed, I know you cancel the work in
> rdma_destroy_id(), but after ucma_migrate_id() the ctx has already
> been moved to the new file, for the old file, it won't cancel the
> ctx flying with workqueue. So, I think the following use-after-free
> could happen:
> 
> ucma_event_handler():
> cur_file = ctx->file; // old file
> 
> ucma_migrate_id():
> lock();
> list_move_tail(>list, _file->ctx_list);
> ctx->file = new_file;
> unlock();
> 
> ucma_close():
> // retrieve old file via filp->private_data
> // the loop won't cover the ctx moved to the new_file
> kfree(file);

Yep. That sure seems like the right analysis!

> This is _not_ the cause of the unlock imbalance, and is _not_ expected
> to solve by patch either.

What do you mean? Not calling rdma_addr_cancel() prior to freeing the
file is *exactly* the cause of the lock imbalance.

The design of this code *assumes* that rdma_addr_cancel() will be
called before altering/freeing/etc any of the state it is working on,
migration makes a state change that violates that invariant.

Jason


Re: [PATCH v3] PCI: mediatek: Add system pm support for MT2712

2018-06-14 Thread Honghui Zhang
On Fri, 2018-06-01 at 13:52 +0300, Andy Shevchenko wrote:
> On Fri, Jun 1, 2018 at 1:49 PM, Honghui Zhang
>  wrote:
> > On Fri, 2018-06-01 at 13:17 +0300, Andy Shevchenko wrote:
> >> On Fri, Jun 1, 2018 at 6:04 AM,   wrote:
> >> > From: Honghui Zhang 
> >>
> >> > +#ifdef CONFIG_PM_SLEEP
> >> > +static int mtk_pcie_suspend_noirq(struct device *dev)
> >>
> >> __maybe_unused
> >>
> >
> > Hi, Andy, thanks for your review.
> > Bjorn had point this out that at:
> > https://www.spinics.net/lists/arm-kernel/msg656774.html
> 
> Nice, one more maintainer with strong opinion here.
> 
> Arnd, that's what I mentioned as a split in opinions earlier.
> Any new developer or even existing contributor would be now really
> confusing since maintainers asked for two different approaches on the
> same matter.
> 

Hi, Arnd, Do you have any comments about this patch? Sorry I missed the
previous discussion about which should be used for this approach.

thanks.



Re: [PATCH v3] PCI: mediatek: Add system pm support for MT2712

2018-06-14 Thread Honghui Zhang
On Fri, 2018-06-01 at 13:52 +0300, Andy Shevchenko wrote:
> On Fri, Jun 1, 2018 at 1:49 PM, Honghui Zhang
>  wrote:
> > On Fri, 2018-06-01 at 13:17 +0300, Andy Shevchenko wrote:
> >> On Fri, Jun 1, 2018 at 6:04 AM,   wrote:
> >> > From: Honghui Zhang 
> >>
> >> > +#ifdef CONFIG_PM_SLEEP
> >> > +static int mtk_pcie_suspend_noirq(struct device *dev)
> >>
> >> __maybe_unused
> >>
> >
> > Hi, Andy, thanks for your review.
> > Bjorn had point this out that at:
> > https://www.spinics.net/lists/arm-kernel/msg656774.html
> 
> Nice, one more maintainer with strong opinion here.
> 
> Arnd, that's what I mentioned as a split in opinions earlier.
> Any new developer or even existing contributor would be now really
> confusing since maintainers asked for two different approaches on the
> same matter.
> 

Hi, Arnd, Do you have any comments about this patch? Sorry I missed the
previous discussion about which should be used for this approach.

thanks.



Re: [PATCH] cpufreq: kryo: Fix possible error code dereference

2018-06-14 Thread Viresh Kumar
On 14-06-18, 22:29, ilia@gmail.com wrote:
> From: Ilia Lin 
> 
> In event of error returned by the nvmem_cell_read() non-pointer value
> may be dereferenced. Fix this with error handling.
> 
> Signed-off-by: Ilia Lin 

Fixes tag ?

> ---
>  drivers/cpufreq/qcom-cpufreq-kryo.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c 
> b/drivers/cpufreq/qcom-cpufreq-kryo.c
> index d049fe4b80c4..5e9511223ce9 100644
> --- a/drivers/cpufreq/qcom-cpufreq-kryo.c
> +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
> @@ -115,6 +115,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device 
> *pdev)
>  
>   speedbin = nvmem_cell_read(speedbin_nvmem, );
>   nvmem_cell_put(speedbin_nvmem);
> + if (IS_ERR(speedbin))
> + return PTR_ERR(speedbin);
>  
>   switch (msm8996_version) {
>   case MSM8996_V3:
> -- 
> 2.11.0

-- 
viresh


Re: [PATCH] cpufreq: kryo: Fix possible error code dereference

2018-06-14 Thread Viresh Kumar
On 14-06-18, 22:29, ilia@gmail.com wrote:
> From: Ilia Lin 
> 
> In event of error returned by the nvmem_cell_read() non-pointer value
> may be dereferenced. Fix this with error handling.
> 
> Signed-off-by: Ilia Lin 

Fixes tag ?

> ---
>  drivers/cpufreq/qcom-cpufreq-kryo.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c 
> b/drivers/cpufreq/qcom-cpufreq-kryo.c
> index d049fe4b80c4..5e9511223ce9 100644
> --- a/drivers/cpufreq/qcom-cpufreq-kryo.c
> +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
> @@ -115,6 +115,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device 
> *pdev)
>  
>   speedbin = nvmem_cell_read(speedbin_nvmem, );
>   nvmem_cell_put(speedbin_nvmem);
> + if (IS_ERR(speedbin))
> + return PTR_ERR(speedbin);
>  
>   switch (msm8996_version) {
>   case MSM8996_V3:
> -- 
> 2.11.0

-- 
viresh


Re: [PATCH v2] cpufreq: kryo: Add module remove and exit

2018-06-14 Thread Viresh Kumar
On 14-06-18, 22:42, ilia@gmail.com wrote:
> From: Ilia Lin 
> 
> Add device remove and module exit code to make the driver
> functioning as a loadable module.
> 
> Fixes: ac28927659be (cpufreq: kryo: allow building as a loadable module)
> Signed-off-by: Ilia Lin 
> ---
>  drivers/cpufreq/qcom-cpufreq-kryo.c | 23 ---
>  1 file changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c 
> b/drivers/cpufreq/qcom-cpufreq-kryo.c
> index 5e9511223ce9..01bddacf5c3b 100644
> --- a/drivers/cpufreq/qcom-cpufreq-kryo.c
> +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
> @@ -42,6 +42,8 @@ enum _msm8996_version {
>   NUM_OF_MSM8996_VERSIONS,
>  };
>  
> +struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
> +
>  static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
>  {
>   size_t len;
> @@ -74,7 +76,6 @@ static enum _msm8996_version __init 
> qcom_cpufreq_kryo_get_msm_id(void)
>  static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
>  {
>   struct opp_table *opp_tables[NR_CPUS] = {0};
> - struct platform_device *cpufreq_dt_pdev;
>   enum _msm8996_version msm8996_version;
>   struct nvmem_cell *speedbin_nvmem;
>   struct device_node *np;
> @@ -129,6 +130,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device 
> *pdev)
>   BUG();
>   break;
>   }
> + kfree(speedbin);

Again, stuff like this can't go in this patch itself. It is a separate
bug fix and you should send it with a matching fixes tag separately.

>  
>   for_each_possible_cpu(cpu) {
>   cpu_dev = get_cpu_device(cpu);
> @@ -164,8 +166,15 @@ static int qcom_cpufreq_kryo_probe(struct 
> platform_device *pdev)
>   return ret;
>  }
>  
> +static int qcom_cpufreq_kryo_remove(struct platform_device *pdev)
> +{
> + platform_device_unregister(cpufreq_dt_pdev);
> + return 0;
> +}
> +
>  static struct platform_driver qcom_cpufreq_kryo_driver = {
>   .probe = qcom_cpufreq_kryo_probe,
> + .remove = qcom_cpufreq_kryo_remove,
>   .driver = {
>   .name = "qcom-cpufreq-kryo",
>   },
> @@ -200,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void)
>   if (unlikely(ret < 0))
>   return ret;
>  
> - ret = PTR_ERR_OR_ZERO(platform_device_register_simple(
> - "qcom-cpufreq-kryo", -1, NULL, 0));
> + kryo_cpufreq_pdev = platform_device_register_simple(
> + "qcom-cpufreq-kryo", -1, NULL, 0);
> + ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev);
>   if (0 == ret)
>   return 0;
>  
> @@ -210,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void)
>  }
>  module_init(qcom_cpufreq_kryo_init);
>  
> +static void __init qcom_cpufreq_kryo_exit(void)
> +{
> + platform_device_unregister(kryo_cpufreq_pdev);
> + platform_driver_unregister(_cpufreq_kryo_driver);
> +}
> +module_exit(qcom_cpufreq_kryo_exit);
> +
>  MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
>  MODULE_LICENSE("GPL v2");
> -- 
> 2.11.0

-- 
viresh


Re: [PATCH v2] cpufreq: kryo: Add module remove and exit

2018-06-14 Thread Viresh Kumar
On 14-06-18, 22:42, ilia@gmail.com wrote:
> From: Ilia Lin 
> 
> Add device remove and module exit code to make the driver
> functioning as a loadable module.
> 
> Fixes: ac28927659be (cpufreq: kryo: allow building as a loadable module)
> Signed-off-by: Ilia Lin 
> ---
>  drivers/cpufreq/qcom-cpufreq-kryo.c | 23 ---
>  1 file changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c 
> b/drivers/cpufreq/qcom-cpufreq-kryo.c
> index 5e9511223ce9..01bddacf5c3b 100644
> --- a/drivers/cpufreq/qcom-cpufreq-kryo.c
> +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
> @@ -42,6 +42,8 @@ enum _msm8996_version {
>   NUM_OF_MSM8996_VERSIONS,
>  };
>  
> +struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
> +
>  static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
>  {
>   size_t len;
> @@ -74,7 +76,6 @@ static enum _msm8996_version __init 
> qcom_cpufreq_kryo_get_msm_id(void)
>  static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
>  {
>   struct opp_table *opp_tables[NR_CPUS] = {0};
> - struct platform_device *cpufreq_dt_pdev;
>   enum _msm8996_version msm8996_version;
>   struct nvmem_cell *speedbin_nvmem;
>   struct device_node *np;
> @@ -129,6 +130,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device 
> *pdev)
>   BUG();
>   break;
>   }
> + kfree(speedbin);

Again, stuff like this can't go in this patch itself. It is a separate
bug fix and you should send it with a matching fixes tag separately.

>  
>   for_each_possible_cpu(cpu) {
>   cpu_dev = get_cpu_device(cpu);
> @@ -164,8 +166,15 @@ static int qcom_cpufreq_kryo_probe(struct 
> platform_device *pdev)
>   return ret;
>  }
>  
> +static int qcom_cpufreq_kryo_remove(struct platform_device *pdev)
> +{
> + platform_device_unregister(cpufreq_dt_pdev);
> + return 0;
> +}
> +
>  static struct platform_driver qcom_cpufreq_kryo_driver = {
>   .probe = qcom_cpufreq_kryo_probe,
> + .remove = qcom_cpufreq_kryo_remove,
>   .driver = {
>   .name = "qcom-cpufreq-kryo",
>   },
> @@ -200,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void)
>   if (unlikely(ret < 0))
>   return ret;
>  
> - ret = PTR_ERR_OR_ZERO(platform_device_register_simple(
> - "qcom-cpufreq-kryo", -1, NULL, 0));
> + kryo_cpufreq_pdev = platform_device_register_simple(
> + "qcom-cpufreq-kryo", -1, NULL, 0);
> + ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev);
>   if (0 == ret)
>   return 0;
>  
> @@ -210,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void)
>  }
>  module_init(qcom_cpufreq_kryo_init);
>  
> +static void __init qcom_cpufreq_kryo_exit(void)
> +{
> + platform_device_unregister(kryo_cpufreq_pdev);
> + platform_driver_unregister(_cpufreq_kryo_driver);
> +}
> +module_exit(qcom_cpufreq_kryo_exit);
> +
>  MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
>  MODULE_LICENSE("GPL v2");
> -- 
> 2.11.0

-- 
viresh


Re: [PATCH] optoe: driver to read/write SFP/QSFP EEPROMs

2018-06-14 Thread Don Bollinger
On Thu, Jun 14, 2018 at 09:46:36AM +0200, Arnd Bergmann wrote:
> On Thu, Jun 14, 2018 at 2:40 AM, Don Bollinger  wrote:
> > On Mon, Jun 11, 2018 at 03:43:02PM +0200, Arnd Bergmann wrote:
> >> On Mon, Jun 11, 2018 at 6:25 AM, Don Bollinger  
> >> wrote:
> 
> >>
> >> I don't understand this part: I see some older patches introducing an
> >> EEPROM_CLASS, but nothing ever seems to have made it into the
> >> mainline kernel.
> >>
> >> If that class isn't there, this code shouldn't be either. You can always
> >> add it back in case we decide to introduce that class later, but then
> >> I wouldn't make it a compile-time option but just a hard dependency
> >> instead.
> >
> > Thanks for the feedback.
> >
> > Some background will explain how optoe got here...
> 
> Ok, I see. For the upstream submission of course, none of the forked
> kernel code bases matter at all, what we want is a driver that makes
> sense by itself, and none of it should depend on any third party code.

Got it.

> For traditional devices, we would use a header in
> include/linux/platform_data/, but a more modern way of doing this
> would be to use named device properties that are either put
> in the devicetree file (on embedded machines) or added through
> the .properties field when statically declaring an i2c device from
> a PCI device parent.
> 
>   Arnd

Thanks for the guidance.  It turns out that getting into mainline makes
it easier for my partners to consume a header in
include/linux/platform_data.  I'll restore that file and remove all of the
unnecessary items, which should address the concerns you have raised.  

Rev 2 coming soon.

Don


Re: [PATCH] optoe: driver to read/write SFP/QSFP EEPROMs

2018-06-14 Thread Don Bollinger
On Thu, Jun 14, 2018 at 09:46:36AM +0200, Arnd Bergmann wrote:
> On Thu, Jun 14, 2018 at 2:40 AM, Don Bollinger  wrote:
> > On Mon, Jun 11, 2018 at 03:43:02PM +0200, Arnd Bergmann wrote:
> >> On Mon, Jun 11, 2018 at 6:25 AM, Don Bollinger  
> >> wrote:
> 
> >>
> >> I don't understand this part: I see some older patches introducing an
> >> EEPROM_CLASS, but nothing ever seems to have made it into the
> >> mainline kernel.
> >>
> >> If that class isn't there, this code shouldn't be either. You can always
> >> add it back in case we decide to introduce that class later, but then
> >> I wouldn't make it a compile-time option but just a hard dependency
> >> instead.
> >
> > Thanks for the feedback.
> >
> > Some background will explain how optoe got here...
> 
> Ok, I see. For the upstream submission of course, none of the forked
> kernel code bases matter at all, what we want is a driver that makes
> sense by itself, and none of it should depend on any third party code.

Got it.

> For traditional devices, we would use a header in
> include/linux/platform_data/, but a more modern way of doing this
> would be to use named device properties that are either put
> in the devicetree file (on embedded machines) or added through
> the .properties field when statically declaring an i2c device from
> a PCI device parent.
> 
>   Arnd

Thanks for the guidance.  It turns out that getting into mainline makes
it easier for my partners to consume a header in
include/linux/platform_data.  I'll restore that file and remove all of the
unnecessary items, which should address the concerns you have raised.  

Rev 2 coming soon.

Don


Re: linux-next: manual merge of the y2038 tree with the overlayfs tree

2018-06-14 Thread Stephen Rothwell
Hi all,

This is now a conflict between the overlayfs tree and Linus' tree.  (I
restarted my merging after I noticed that Linus merged more stuff.)

On Fri, 15 Jun 2018 10:43:24 +1000 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the y2038 tree got conflicts in:
> 
>   fs/inode.c
>   fs/overlayfs/inode.c
>   fs/overlayfs/overlayfs.h
> 
> between various commits from the overlayfs tree and commits:
> 
>   8efd6894ff08 ("fs: add timespec64_truncate()")
>   95582b008388 ("vfs: change inode times to use struct timespec64")
> 
> from the y2038 tree.
> 
> I fixed it up (I copied the resolutions that used to be in the merge of
> the overlayfs into the y2038 tree - see below) and can carry the fix as
> necessary. This is now fixed as far as linux-next is concerned, but any
> non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging.  You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
> 
> -- 
> Cheers,
> Stephen Rothwell
> 
> diff --cc fs/inode.c
> index 9a6fc2f2d220,9fe1f941be02..55373fcba3a5
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@@ -1635,10 -1681,11 +1635,10 @@@ static int update_time(struct inode *in
>*  This function automatically handles read only file systems and media,
>*  as well as the "noatime" flag and inode specific "noatime" markers.
>*/
>  -bool __atime_needs_update(const struct path *path, struct inode *inode,
>  -  bool rcu)
>  +bool atime_needs_update(const struct path *path, struct inode *inode)
>   {
>   struct vfsmount *mnt = path->mnt;
> - struct timespec now;
> + struct timespec64 now;
>   
>   if (inode->i_flags & S_NOATIME)
>   return false;
> @@@ -1661,10 -1708,10 +1661,10 @@@
>   
>   now = current_time(inode);
>   
> - if (!relatime_need_update(mnt, inode, now))
>  -if (!relatime_need_update(path, inode, timespec64_to_timespec(now), 
> rcu))
> ++if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
>   return false;
>   
> - if (timespec_equal(>i_atime, ))
> + if (timespec64_equal(>i_atime, ))
>   return false;
>   
>   return true;
> @@@ -1674,9 -1721,9 +1674,9 @@@ void touch_atime(const struct path *pat
>   {
>   struct vfsmount *mnt = path->mnt;
>   struct inode *inode = d_inode(path->dentry);
> - struct timespec now;
> + struct timespec64 now;
>   
>  -if (!__atime_needs_update(path, inode, false))
>  +if (!atime_needs_update(path, inode))
>   return;
>   
>   if (!sb_start_write_trylock(inode->i_sb))
> diff --cc fs/overlayfs/file.c
> index f801e1175a0b,..c6bce11ac6d3
> mode 100644,00..100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@@ -1,508 -1,0 +1,508 @@@
>  +/*
>  + * Copyright (C) 2017 Red Hat, Inc.
>  + *
>  + * This program is free software; you can redistribute it and/or modify it
>  + * under the terms of the GNU General Public License version 2 as published 
> by
>  + * the Free Software Foundation.
>  + */
>  +
>  +#include 
>  +#include 
>  +#include 
>  +#include 
>  +#include 
>  +#include "overlayfs.h"
>  +
>  +static char ovl_whatisit(struct inode *inode, struct inode *realinode)
>  +{
>  +if (realinode != ovl_inode_upper(inode))
>  +return 'l';
>  +if (ovl_has_upperdata(inode))
>  +return 'u';
>  +else
>  +return 'm';
>  +}
>  +
>  +static struct file *ovl_open_realfile(const struct file *file,
>  +  struct inode *realinode)
>  +{
>  +struct inode *inode = file_inode(file);
>  +struct file *realfile;
>  +const struct cred *old_cred;
>  +
>  +old_cred = ovl_override_creds(inode->i_sb);
>  +realfile = path_open(>f_path, file->f_flags | O_NOATIME,
>  + realinode, current_cred(), false);
>  +revert_creds(old_cred);
>  +
>  +pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
>  + file, file, ovl_whatisit(inode, realinode), file->f_flags,
>  + realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
>  +
>  +return realfile;
>  +}
>  +
>  +#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
>  +
>  +static int ovl_change_flags(struct file *file, unsigned int flags)
>  +{
>  +struct inode *inode = file_inode(file);
>  +int err;
>  +
>  +/* No atime modificaton on underlying */
>  +flags |= O_NOATIME;
>  +
>  +/* If some flag changed that cannot be changed then something's amiss */
>  +if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
>  +return -EIO;
>  +
>  +flags &= OVL_SETFL_MASK;
>  +
>  +if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
>  +return -EPERM;
>  +
>  +if (flags & O_DIRECT) {
>  +if (!file->f_mapping->a_ops ||
>  +

Re: linux-next: manual merge of the y2038 tree with the overlayfs tree

2018-06-14 Thread Stephen Rothwell
Hi all,

This is now a conflict between the overlayfs tree and Linus' tree.  (I
restarted my merging after I noticed that Linus merged more stuff.)

On Fri, 15 Jun 2018 10:43:24 +1000 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the y2038 tree got conflicts in:
> 
>   fs/inode.c
>   fs/overlayfs/inode.c
>   fs/overlayfs/overlayfs.h
> 
> between various commits from the overlayfs tree and commits:
> 
>   8efd6894ff08 ("fs: add timespec64_truncate()")
>   95582b008388 ("vfs: change inode times to use struct timespec64")
> 
> from the y2038 tree.
> 
> I fixed it up (I copied the resolutions that used to be in the merge of
> the overlayfs into the y2038 tree - see below) and can carry the fix as
> necessary. This is now fixed as far as linux-next is concerned, but any
> non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging.  You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
> 
> -- 
> Cheers,
> Stephen Rothwell
> 
> diff --cc fs/inode.c
> index 9a6fc2f2d220,9fe1f941be02..55373fcba3a5
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@@ -1635,10 -1681,11 +1635,10 @@@ static int update_time(struct inode *in
>*  This function automatically handles read only file systems and media,
>*  as well as the "noatime" flag and inode specific "noatime" markers.
>*/
>  -bool __atime_needs_update(const struct path *path, struct inode *inode,
>  -  bool rcu)
>  +bool atime_needs_update(const struct path *path, struct inode *inode)
>   {
>   struct vfsmount *mnt = path->mnt;
> - struct timespec now;
> + struct timespec64 now;
>   
>   if (inode->i_flags & S_NOATIME)
>   return false;
> @@@ -1661,10 -1708,10 +1661,10 @@@
>   
>   now = current_time(inode);
>   
> - if (!relatime_need_update(mnt, inode, now))
>  -if (!relatime_need_update(path, inode, timespec64_to_timespec(now), 
> rcu))
> ++if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
>   return false;
>   
> - if (timespec_equal(>i_atime, ))
> + if (timespec64_equal(>i_atime, ))
>   return false;
>   
>   return true;
> @@@ -1674,9 -1721,9 +1674,9 @@@ void touch_atime(const struct path *pat
>   {
>   struct vfsmount *mnt = path->mnt;
>   struct inode *inode = d_inode(path->dentry);
> - struct timespec now;
> + struct timespec64 now;
>   
>  -if (!__atime_needs_update(path, inode, false))
>  +if (!atime_needs_update(path, inode))
>   return;
>   
>   if (!sb_start_write_trylock(inode->i_sb))
> diff --cc fs/overlayfs/file.c
> index f801e1175a0b,..c6bce11ac6d3
> mode 100644,00..100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@@ -1,508 -1,0 +1,508 @@@
>  +/*
>  + * Copyright (C) 2017 Red Hat, Inc.
>  + *
>  + * This program is free software; you can redistribute it and/or modify it
>  + * under the terms of the GNU General Public License version 2 as published 
> by
>  + * the Free Software Foundation.
>  + */
>  +
>  +#include 
>  +#include 
>  +#include 
>  +#include 
>  +#include 
>  +#include "overlayfs.h"
>  +
>  +static char ovl_whatisit(struct inode *inode, struct inode *realinode)
>  +{
>  +if (realinode != ovl_inode_upper(inode))
>  +return 'l';
>  +if (ovl_has_upperdata(inode))
>  +return 'u';
>  +else
>  +return 'm';
>  +}
>  +
>  +static struct file *ovl_open_realfile(const struct file *file,
>  +  struct inode *realinode)
>  +{
>  +struct inode *inode = file_inode(file);
>  +struct file *realfile;
>  +const struct cred *old_cred;
>  +
>  +old_cred = ovl_override_creds(inode->i_sb);
>  +realfile = path_open(>f_path, file->f_flags | O_NOATIME,
>  + realinode, current_cred(), false);
>  +revert_creds(old_cred);
>  +
>  +pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
>  + file, file, ovl_whatisit(inode, realinode), file->f_flags,
>  + realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
>  +
>  +return realfile;
>  +}
>  +
>  +#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
>  +
>  +static int ovl_change_flags(struct file *file, unsigned int flags)
>  +{
>  +struct inode *inode = file_inode(file);
>  +int err;
>  +
>  +/* No atime modificaton on underlying */
>  +flags |= O_NOATIME;
>  +
>  +/* If some flag changed that cannot be changed then something's amiss */
>  +if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
>  +return -EIO;
>  +
>  +flags &= OVL_SETFL_MASK;
>  +
>  +if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
>  +return -EPERM;
>  +
>  +if (flags & O_DIRECT) {
>  +if (!file->f_mapping->a_ops ||
>  +

Re: [PATCH v3 2/7] serdev: add dev_pm_domain_attach|detach()

2018-06-14 Thread Sean Wang
On Thu, 2018-06-14 at 10:58 +0200, Ulf Hansson wrote:
> On Thu, 14 Jun 2018 at 09:14,  wrote:
> >
> > From: Sean Wang 
> >
> > In order to open up the required power gate before any operation can be
> > effectively performed over the serial bus between CPU and serdev, it's
> > clearly essential to add common attach functions for PM domains to serdev
> > at the probe phase.
> >
> > Similarly, the relevant dettach function for the PM domains should be
> > properly and reversely added at the remove phase.
> >
> > Signed-off-by: Sean Wang 
> > Cc: Rob Herring 
> > Cc: Ulf Hansson 
> > Cc: Greg Kroah-Hartman 
> > Cc: Jiri Slaby 
> > Cc: linux-ser...@vger.kernel.org
> > ---
> >  drivers/tty/serdev/core.c | 14 +-
> >  1 file changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
> > index df93b72..c93d8ee 100644
> > --- a/drivers/tty/serdev/core.c
> > +++ b/drivers/tty/serdev/core.c
> > @@ -13,6 +13,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >
> > @@ -330,8 +331,16 @@ EXPORT_SYMBOL_GPL(serdev_device_set_tiocm);
> >  static int serdev_drv_probe(struct device *dev)
> >  {
> > const struct serdev_device_driver *sdrv = 
> > to_serdev_device_driver(dev->driver);
> > +   int ret;
> > +
> > +   ret = dev_pm_domain_attach(dev, true);
> > +   if (ret != -EPROBE_DEFER) {
> 
> From 4.18 rc1 via commit 919b7308fcc4, dev_pm_domain_attach() will
> return better error codes.
> 
> I suggest to change the above error path to:
> if (ret)
>  return ret;
> 
> Then continue with the probing below.

Thanks for sharing me the information. I'll happily respin using the
patch because it makes the most sense.

> 
> > +   ret = sdrv->probe(to_serdev_device(dev));
> > +   if (ret)
> > +   dev_pm_domain_detach(dev, true);
> > +   }
> >
> > -   return sdrv->probe(to_serdev_device(dev));
> > +   return ret;
> >  }
> >
> >  static int serdev_drv_remove(struct device *dev)
> > @@ -339,6 +348,9 @@ static int serdev_drv_remove(struct device *dev)
> > const struct serdev_device_driver *sdrv = 
> > to_serdev_device_driver(dev->driver);
> > if (sdrv->remove)
> > sdrv->remove(to_serdev_device(dev));
> > +
> > +   dev_pm_domain_detach(dev, true);
> > +
> > return 0;
> >  }
> >
> > --
> > 2.7.4
> >
> 
> Otherwise, this makes sense to me!
> 

really thanks for your review!

> Kind regards
> Uffe




Re: [PATCH v3 2/7] serdev: add dev_pm_domain_attach|detach()

2018-06-14 Thread Sean Wang
On Thu, 2018-06-14 at 10:58 +0200, Ulf Hansson wrote:
> On Thu, 14 Jun 2018 at 09:14,  wrote:
> >
> > From: Sean Wang 
> >
> > In order to open up the required power gate before any operation can be
> > effectively performed over the serial bus between CPU and serdev, it's
> > clearly essential to add common attach functions for PM domains to serdev
> > at the probe phase.
> >
> > Similarly, the relevant dettach function for the PM domains should be
> > properly and reversely added at the remove phase.
> >
> > Signed-off-by: Sean Wang 
> > Cc: Rob Herring 
> > Cc: Ulf Hansson 
> > Cc: Greg Kroah-Hartman 
> > Cc: Jiri Slaby 
> > Cc: linux-ser...@vger.kernel.org
> > ---
> >  drivers/tty/serdev/core.c | 14 +-
> >  1 file changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
> > index df93b72..c93d8ee 100644
> > --- a/drivers/tty/serdev/core.c
> > +++ b/drivers/tty/serdev/core.c
> > @@ -13,6 +13,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >
> > @@ -330,8 +331,16 @@ EXPORT_SYMBOL_GPL(serdev_device_set_tiocm);
> >  static int serdev_drv_probe(struct device *dev)
> >  {
> > const struct serdev_device_driver *sdrv = 
> > to_serdev_device_driver(dev->driver);
> > +   int ret;
> > +
> > +   ret = dev_pm_domain_attach(dev, true);
> > +   if (ret != -EPROBE_DEFER) {
> 
> From 4.18 rc1 via commit 919b7308fcc4, dev_pm_domain_attach() will
> return better error codes.
> 
> I suggest to change the above error path to:
> if (ret)
>  return ret;
> 
> Then continue with the probing below.

Thanks for sharing me the information. I'll happily respin using the
patch because it makes the most sense.

> 
> > +   ret = sdrv->probe(to_serdev_device(dev));
> > +   if (ret)
> > +   dev_pm_domain_detach(dev, true);
> > +   }
> >
> > -   return sdrv->probe(to_serdev_device(dev));
> > +   return ret;
> >  }
> >
> >  static int serdev_drv_remove(struct device *dev)
> > @@ -339,6 +348,9 @@ static int serdev_drv_remove(struct device *dev)
> > const struct serdev_device_driver *sdrv = 
> > to_serdev_device_driver(dev->driver);
> > if (sdrv->remove)
> > sdrv->remove(to_serdev_device(dev));
> > +
> > +   dev_pm_domain_detach(dev, true);
> > +
> > return 0;
> >  }
> >
> > --
> > 2.7.4
> >
> 
> Otherwise, this makes sense to me!
> 

really thanks for your review!

> Kind regards
> Uffe




[PATCH v3 0/4] clk: rockchip: support clock controller for px30 SoC

2018-06-14 Thread Elaine Zhang
Change in V3:
[PATCH v3 1/4]: Correct description
[PATCH v3 2/4]: Use an SPDX tag instead.
[PATCH v3 3/4]: Use an SPDX tag instead,
parent_rate might overflow and fix it.
fix up the checkpatch warning.
add more CMPOSITE_xxx_HALFdiv.

Change in V2:
[PATCH v2 2/4]: modify the Author name
[PATCH v2 3/4]: provide a bit more explanation for commit message

Elaine Zhang (4):
  dt-bindings: add bindings for px30 clock controller
  clk: rockchip: add dt-binding header for px30
  clk: rockchip: add support for half divider
  clk: rockchip: add clock controller for px30

 .../bindings/clock/rockchip,px30-cru.txt   |   66 ++
 drivers/clk/rockchip/Makefile  |2 +
 drivers/clk/rockchip/clk-half-divider.c|  231 +
 drivers/clk/rockchip/clk-px30.c| 1080 
 drivers/clk/rockchip/clk.c |   10 +
 drivers/clk/rockchip/clk.h |  126 ++-
 include/dt-bindings/clock/px30-cru.h   |  389 +++
 7 files changed, 1903 insertions(+), 1 deletion(-)
 create mode 100644 
Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
 create mode 100644 drivers/clk/rockchip/clk-half-divider.c
 create mode 100644 drivers/clk/rockchip/clk-px30.c
 create mode 100644 include/dt-bindings/clock/px30-cru.h

-- 
1.9.1




[PATCH v3 0/4] clk: rockchip: support clock controller for px30 SoC

2018-06-14 Thread Elaine Zhang
Change in V3:
[PATCH v3 1/4]: Correct description
[PATCH v3 2/4]: Use an SPDX tag instead.
[PATCH v3 3/4]: Use an SPDX tag instead,
parent_rate might overflow and fix it.
fix up the checkpatch warning.
add more CMPOSITE_xxx_HALFdiv.

Change in V2:
[PATCH v2 2/4]: modify the Author name
[PATCH v2 3/4]: provide a bit more explanation for commit message

Elaine Zhang (4):
  dt-bindings: add bindings for px30 clock controller
  clk: rockchip: add dt-binding header for px30
  clk: rockchip: add support for half divider
  clk: rockchip: add clock controller for px30

 .../bindings/clock/rockchip,px30-cru.txt   |   66 ++
 drivers/clk/rockchip/Makefile  |2 +
 drivers/clk/rockchip/clk-half-divider.c|  231 +
 drivers/clk/rockchip/clk-px30.c| 1080 
 drivers/clk/rockchip/clk.c |   10 +
 drivers/clk/rockchip/clk.h |  126 ++-
 include/dt-bindings/clock/px30-cru.h   |  389 +++
 7 files changed, 1903 insertions(+), 1 deletion(-)
 create mode 100644 
Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
 create mode 100644 drivers/clk/rockchip/clk-half-divider.c
 create mode 100644 drivers/clk/rockchip/clk-px30.c
 create mode 100644 include/dt-bindings/clock/px30-cru.h

-- 
1.9.1




[PATCH v3 1/4] dt-bindings: add bindings for px30 clock controller

2018-06-14 Thread Elaine Zhang
Add devicetree bindings for Rockchip cru which found on
Rockchip SoCs.

Signed-off-by: Elaine Zhang 
---
 .../bindings/clock/rockchip,px30-cru.txt   | 66 ++
 1 file changed, 66 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt

diff --git a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt 
b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
new file mode 100644
index ..1077692d6489
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
@@ -0,0 +1,66 @@
+* Rockchip PX30 Clock and Reset Unit
+
+The PX30 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: PMU for CRU should be "rockchip,px30-pmu-cru"
+- compatible: CRU should be "rockchip,px30-cru"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+  If missing, pll rates are not changeable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/px30-cru.h headers and can be
+used in device tree sources. Similar macros exist for the reset sources in
+these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "i2sx_clkin" - external I2S clock - optional,
+ - "gmac_clkin" - external GMAC clock - optional
+
+Example: Clock controller node:
+
+   pmucru: clock-controller@ff2bc000 {
+   compatible = "rockchip,px30-pmucru";
+   reg = <0x0 0xff2bc000 0x0 0x1000>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
+   cru: clock-controller@ff2b {
+   compatible = "rockchip,px30-cru";
+   reg = <0x0 0xff2b 0x0 0x1000>;
+   rockchip,grf = <>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
+Example: UART controller node that consumes the clock generated by the clock
+  controller:
+
+   uart0: serial@ff03 {
+   compatible = "rockchip,px30-uart", "snps,dw-apb-uart";
+   reg = <0x0 0xff03 0x0 0x100>;
+   interrupts = ;
+   clocks = < SCLK_UART0_PMU>, < PCLK_UART0_PMU>;
+   clock-names = "baudclk", "apb_pclk";
+   reg-shift = <2>;
+   reg-io-width = <4>;
+   };
+
-- 
1.9.1




[PATCH v3 1/4] dt-bindings: add bindings for px30 clock controller

2018-06-14 Thread Elaine Zhang
Add devicetree bindings for Rockchip cru which found on
Rockchip SoCs.

Signed-off-by: Elaine Zhang 
---
 .../bindings/clock/rockchip,px30-cru.txt   | 66 ++
 1 file changed, 66 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt

diff --git a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt 
b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
new file mode 100644
index ..1077692d6489
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
@@ -0,0 +1,66 @@
+* Rockchip PX30 Clock and Reset Unit
+
+The PX30 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: PMU for CRU should be "rockchip,px30-pmu-cru"
+- compatible: CRU should be "rockchip,px30-cru"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+  If missing, pll rates are not changeable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/px30-cru.h headers and can be
+used in device tree sources. Similar macros exist for the reset sources in
+these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "i2sx_clkin" - external I2S clock - optional,
+ - "gmac_clkin" - external GMAC clock - optional
+
+Example: Clock controller node:
+
+   pmucru: clock-controller@ff2bc000 {
+   compatible = "rockchip,px30-pmucru";
+   reg = <0x0 0xff2bc000 0x0 0x1000>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
+   cru: clock-controller@ff2b {
+   compatible = "rockchip,px30-cru";
+   reg = <0x0 0xff2b 0x0 0x1000>;
+   rockchip,grf = <>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
+Example: UART controller node that consumes the clock generated by the clock
+  controller:
+
+   uart0: serial@ff03 {
+   compatible = "rockchip,px30-uart", "snps,dw-apb-uart";
+   reg = <0x0 0xff03 0x0 0x100>;
+   interrupts = ;
+   clocks = < SCLK_UART0_PMU>, < PCLK_UART0_PMU>;
+   clock-names = "baudclk", "apb_pclk";
+   reg-shift = <2>;
+   reg-io-width = <4>;
+   };
+
-- 
1.9.1




[PATCH v3 4/4] clk: rockchip: add clock controller for px30

2018-06-14 Thread Elaine Zhang
Add the clock tree definition for the new px30 SoC.

Signed-off-by: Elaine Zhang 
---
 drivers/clk/rockchip/Makefile   |1 +
 drivers/clk/rockchip/clk-px30.c | 1080 +++
 drivers/clk/rockchip/clk.h  |   41 +-
 3 files changed, 1121 insertions(+), 1 deletion(-)
 create mode 100644 drivers/clk/rockchip/clk-px30.c

diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 2b380fafd232..e8fb6069649c 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -14,6 +14,7 @@ obj-y += clk-muxgrf.o
 obj-y  += clk-ddr.o
 obj-$(CONFIG_RESET_CONTROLLER) += softrst.o
 
+obj-y  += clk-px30.o
 obj-y  += clk-rv1108.o
 obj-y  += clk-rk3036.o
 obj-y  += clk-rk3128.o
diff --git a/drivers/clk/rockchip/clk-px30.c b/drivers/clk/rockchip/clk-px30.c
new file mode 100644
index ..b4a0223e83ca
--- /dev/null
+++ b/drivers/clk/rockchip/clk-px30.c
@@ -0,0 +1,1080 @@
+/*
+ * Copyright (c) 2018 Rockchip Electronics Co. Ltd.
+ * Author: Elaine Zhang
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "clk.h"
+
+#define PX30_GRF_SOC_STATUS0   0x480
+
+enum px30_plls {
+   apll, dpll, cpll, npll, apll_b_h, apll_b_l,
+};
+
+enum px30_pmu_plls {
+   gpll,
+};
+
+static struct rockchip_pll_rate_table px30_pll_rates[] = {
+   /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
+   RK3036_PLL_RATE(160800, 1, 67, 1, 1, 1, 0),
+   RK3036_PLL_RATE(158400, 1, 66, 1, 1, 1, 0),
+   RK3036_PLL_RATE(156000, 1, 65, 1, 1, 1, 0),
+   RK3036_PLL_RATE(153600, 1, 64, 1, 1, 1, 0),
+   RK3036_PLL_RATE(151200, 1, 63, 1, 1, 1, 0),
+   RK3036_PLL_RATE(148800, 1, 62, 1, 1, 1, 0),
+   RK3036_PLL_RATE(146400, 1, 61, 1, 1, 1, 0),
+   RK3036_PLL_RATE(144000, 1, 60, 1, 1, 1, 0),
+   RK3036_PLL_RATE(141600, 1, 59, 1, 1, 1, 0),
+   RK3036_PLL_RATE(139200, 1, 58, 1, 1, 1, 0),
+   RK3036_PLL_RATE(136800, 1, 57, 1, 1, 1, 0),
+   RK3036_PLL_RATE(134400, 1, 56, 1, 1, 1, 0),
+   RK3036_PLL_RATE(132000, 1, 55, 1, 1, 1, 0),
+   RK3036_PLL_RATE(129600, 1, 54, 1, 1, 1, 0),
+   RK3036_PLL_RATE(127200, 1, 53, 1, 1, 1, 0),
+   RK3036_PLL_RATE(124800, 1, 52, 1, 1, 1, 0),
+   RK3036_PLL_RATE(12, 1, 50, 1, 1, 1, 0),
+   RK3036_PLL_RATE(118800, 2, 99, 1, 1, 1, 0),
+   RK3036_PLL_RATE(110400, 1, 46, 1, 1, 1, 0),
+   RK3036_PLL_RATE(11, 12, 550, 1, 1, 1, 0),
+   RK3036_PLL_RATE(100800, 1, 84, 2, 1, 1, 0),
+   RK3036_PLL_RATE(10, 6, 500, 2, 1, 1, 0),
+   RK3036_PLL_RATE(98400, 1, 82, 2, 1, 1, 0),
+   RK3036_PLL_RATE(96000, 1, 80, 2, 1, 1, 0),
+   RK3036_PLL_RATE(93600, 1, 78, 2, 1, 1, 0),
+   RK3036_PLL_RATE(91200, 1, 76, 2, 1, 1, 0),
+   RK3036_PLL_RATE(9, 4, 300, 2, 1, 1, 0),
+   RK3036_PLL_RATE(88800, 1, 74, 2, 1, 1, 0),
+   RK3036_PLL_RATE(86400, 1, 72, 2, 1, 1, 0),
+   RK3036_PLL_RATE(84000, 1, 70, 2, 1, 1, 0),
+   RK3036_PLL_RATE(81600, 1, 68, 2, 1, 1, 0),
+   RK3036_PLL_RATE(8, 6, 400, 2, 1, 1, 0),
+   RK3036_PLL_RATE(7, 6, 350, 2, 1, 1, 0),
+   RK3036_PLL_RATE(69600, 1, 58, 2, 1, 1, 0),
+   RK3036_PLL_RATE(62400, 1, 52, 2, 1, 1, 0),
+   RK3036_PLL_RATE(6, 1, 75, 3, 1, 1, 0),
+   RK3036_PLL_RATE(59400, 2, 99, 2, 1, 1, 0),
+   RK3036_PLL_RATE(50400, 1, 63, 3, 1, 1, 0),
+   RK3036_PLL_RATE(5, 6, 250, 2, 1, 1, 0),
+   RK3036_PLL_RATE(40800, 1, 68, 2, 2, 1, 0),
+   RK3036_PLL_RATE(31200, 1, 52, 2, 2, 1, 0),
+   RK3036_PLL_RATE(21600, 1, 72, 4, 2, 1, 0),
+   RK3036_PLL_RATE(9600, 1, 64, 4, 4, 1, 0),
+   { /* sentinel */ },
+};
+
+#define PX30_DIV_ACLKM_MASK0x7
+#define PX30_DIV_ACLKM_SHIFT   12
+#define PX30_DIV_PCLK_DBG_MASK 0xf
+#define PX30_DIV_PCLK_DBG_SHIFT8
+
+#define PX30_CLKSEL0(_aclk_core, _pclk_dbg)\
+{  \
+   .reg = PX30_CLKSEL_CON(0),  \
+   .val = HIWORD_UPDATE(_aclk_core, PX30_DIV_ACLKM_MASK,   \
+PX30_DIV_ACLKM_SHIFT) |\
+  HIWORD_UPDATE(_pclk_dbg, PX30_DIV_PCLK_DBG_MASK, \
+

[PATCH v3 4/4] clk: rockchip: add clock controller for px30

2018-06-14 Thread Elaine Zhang
Add the clock tree definition for the new px30 SoC.

Signed-off-by: Elaine Zhang 
---
 drivers/clk/rockchip/Makefile   |1 +
 drivers/clk/rockchip/clk-px30.c | 1080 +++
 drivers/clk/rockchip/clk.h  |   41 +-
 3 files changed, 1121 insertions(+), 1 deletion(-)
 create mode 100644 drivers/clk/rockchip/clk-px30.c

diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 2b380fafd232..e8fb6069649c 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -14,6 +14,7 @@ obj-y += clk-muxgrf.o
 obj-y  += clk-ddr.o
 obj-$(CONFIG_RESET_CONTROLLER) += softrst.o
 
+obj-y  += clk-px30.o
 obj-y  += clk-rv1108.o
 obj-y  += clk-rk3036.o
 obj-y  += clk-rk3128.o
diff --git a/drivers/clk/rockchip/clk-px30.c b/drivers/clk/rockchip/clk-px30.c
new file mode 100644
index ..b4a0223e83ca
--- /dev/null
+++ b/drivers/clk/rockchip/clk-px30.c
@@ -0,0 +1,1080 @@
+/*
+ * Copyright (c) 2018 Rockchip Electronics Co. Ltd.
+ * Author: Elaine Zhang
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "clk.h"
+
+#define PX30_GRF_SOC_STATUS0   0x480
+
+enum px30_plls {
+   apll, dpll, cpll, npll, apll_b_h, apll_b_l,
+};
+
+enum px30_pmu_plls {
+   gpll,
+};
+
+static struct rockchip_pll_rate_table px30_pll_rates[] = {
+   /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
+   RK3036_PLL_RATE(160800, 1, 67, 1, 1, 1, 0),
+   RK3036_PLL_RATE(158400, 1, 66, 1, 1, 1, 0),
+   RK3036_PLL_RATE(156000, 1, 65, 1, 1, 1, 0),
+   RK3036_PLL_RATE(153600, 1, 64, 1, 1, 1, 0),
+   RK3036_PLL_RATE(151200, 1, 63, 1, 1, 1, 0),
+   RK3036_PLL_RATE(148800, 1, 62, 1, 1, 1, 0),
+   RK3036_PLL_RATE(146400, 1, 61, 1, 1, 1, 0),
+   RK3036_PLL_RATE(144000, 1, 60, 1, 1, 1, 0),
+   RK3036_PLL_RATE(141600, 1, 59, 1, 1, 1, 0),
+   RK3036_PLL_RATE(139200, 1, 58, 1, 1, 1, 0),
+   RK3036_PLL_RATE(136800, 1, 57, 1, 1, 1, 0),
+   RK3036_PLL_RATE(134400, 1, 56, 1, 1, 1, 0),
+   RK3036_PLL_RATE(132000, 1, 55, 1, 1, 1, 0),
+   RK3036_PLL_RATE(129600, 1, 54, 1, 1, 1, 0),
+   RK3036_PLL_RATE(127200, 1, 53, 1, 1, 1, 0),
+   RK3036_PLL_RATE(124800, 1, 52, 1, 1, 1, 0),
+   RK3036_PLL_RATE(12, 1, 50, 1, 1, 1, 0),
+   RK3036_PLL_RATE(118800, 2, 99, 1, 1, 1, 0),
+   RK3036_PLL_RATE(110400, 1, 46, 1, 1, 1, 0),
+   RK3036_PLL_RATE(11, 12, 550, 1, 1, 1, 0),
+   RK3036_PLL_RATE(100800, 1, 84, 2, 1, 1, 0),
+   RK3036_PLL_RATE(10, 6, 500, 2, 1, 1, 0),
+   RK3036_PLL_RATE(98400, 1, 82, 2, 1, 1, 0),
+   RK3036_PLL_RATE(96000, 1, 80, 2, 1, 1, 0),
+   RK3036_PLL_RATE(93600, 1, 78, 2, 1, 1, 0),
+   RK3036_PLL_RATE(91200, 1, 76, 2, 1, 1, 0),
+   RK3036_PLL_RATE(9, 4, 300, 2, 1, 1, 0),
+   RK3036_PLL_RATE(88800, 1, 74, 2, 1, 1, 0),
+   RK3036_PLL_RATE(86400, 1, 72, 2, 1, 1, 0),
+   RK3036_PLL_RATE(84000, 1, 70, 2, 1, 1, 0),
+   RK3036_PLL_RATE(81600, 1, 68, 2, 1, 1, 0),
+   RK3036_PLL_RATE(8, 6, 400, 2, 1, 1, 0),
+   RK3036_PLL_RATE(7, 6, 350, 2, 1, 1, 0),
+   RK3036_PLL_RATE(69600, 1, 58, 2, 1, 1, 0),
+   RK3036_PLL_RATE(62400, 1, 52, 2, 1, 1, 0),
+   RK3036_PLL_RATE(6, 1, 75, 3, 1, 1, 0),
+   RK3036_PLL_RATE(59400, 2, 99, 2, 1, 1, 0),
+   RK3036_PLL_RATE(50400, 1, 63, 3, 1, 1, 0),
+   RK3036_PLL_RATE(5, 6, 250, 2, 1, 1, 0),
+   RK3036_PLL_RATE(40800, 1, 68, 2, 2, 1, 0),
+   RK3036_PLL_RATE(31200, 1, 52, 2, 2, 1, 0),
+   RK3036_PLL_RATE(21600, 1, 72, 4, 2, 1, 0),
+   RK3036_PLL_RATE(9600, 1, 64, 4, 4, 1, 0),
+   { /* sentinel */ },
+};
+
+#define PX30_DIV_ACLKM_MASK0x7
+#define PX30_DIV_ACLKM_SHIFT   12
+#define PX30_DIV_PCLK_DBG_MASK 0xf
+#define PX30_DIV_PCLK_DBG_SHIFT8
+
+#define PX30_CLKSEL0(_aclk_core, _pclk_dbg)\
+{  \
+   .reg = PX30_CLKSEL_CON(0),  \
+   .val = HIWORD_UPDATE(_aclk_core, PX30_DIV_ACLKM_MASK,   \
+PX30_DIV_ACLKM_SHIFT) |\
+  HIWORD_UPDATE(_pclk_dbg, PX30_DIV_PCLK_DBG_MASK, \
+

[PATCH v3 2/4] clk: rockchip: add dt-binding header for px30

2018-06-14 Thread Elaine Zhang
Add the dt-bindings header for the px30, that gets shared between
the clock controller and the clock references in the dts.
Add softreset ID for px30.

Signed-off-by: Elaine Zhang 
Reviewed-by: Rob Herring 
---
 include/dt-bindings/clock/px30-cru.h | 389 +++
 1 file changed, 389 insertions(+)
 create mode 100644 include/dt-bindings/clock/px30-cru.h

diff --git a/include/dt-bindings/clock/px30-cru.h 
b/include/dt-bindings/clock/px30-cru.h
new file mode 100644
index ..00101479f7c4
--- /dev/null
+++ b/include/dt-bindings/clock/px30-cru.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_PX30_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_PX30_H
+
+/* core clocks */
+#define PLL_APLL   1
+#define PLL_DPLL   2
+#define PLL_CPLL   3
+#define PLL_NPLL   4
+#define APLL_BOOST_H   5
+#define APLL_BOOST_L   6
+#define ARMCLK 7
+
+/* sclk gates (special clocks) */
+#define USB480M14
+#define SCLK_PDM   15
+#define SCLK_I2S0_TX   16
+#define SCLK_I2S0_TX_OUT   17
+#define SCLK_I2S0_RX   18
+#define SCLK_I2S0_RX_OUT   19
+#define SCLK_I2S1  20
+#define SCLK_I2S1_OUT  21
+#define SCLK_I2S2  22
+#define SCLK_I2S2_OUT  23
+#define SCLK_UART1 24
+#define SCLK_UART2 25
+#define SCLK_UART3 26
+#define SCLK_UART4 27
+#define SCLK_UART5 28
+#define SCLK_I2C0  29
+#define SCLK_I2C1  30
+#define SCLK_I2C2  31
+#define SCLK_I2C3  32
+#define SCLK_I2C4  33
+#define SCLK_PWM0  34
+#define SCLK_PWM1  35
+#define SCLK_SPI0  36
+#define SCLK_SPI1  37
+#define SCLK_TIMER038
+#define SCLK_TIMER139
+#define SCLK_TIMER240
+#define SCLK_TIMER341
+#define SCLK_TIMER442
+#define SCLK_TIMER543
+#define SCLK_TSADC 44
+#define SCLK_SARADC45
+#define SCLK_OTP   46
+#define SCLK_OTP_USR   47
+#define SCLK_CRYPTO48
+#define SCLK_CRYPTO_APK49
+#define SCLK_DDRC  50
+#define SCLK_ISP   51
+#define SCLK_CIF_OUT   52
+#define SCLK_RGA_CORE  53
+#define SCLK_VOPB_PWM  54
+#define SCLK_NANDC 55
+#define SCLK_SDIO  56
+#define SCLK_EMMC  57
+#define SCLK_SFC   58
+#define SCLK_SDMMC 59
+#define SCLK_OTG_ADP   60
+#define SCLK_GMAC_SRC  61
+#define SCLK_GMAC  62
+#define SCLK_GMAC_RX_TX63
+#define SCLK_MAC_REF   64
+#define SCLK_MAC_REFOUT65
+#define SCLK_MAC_OUT   66
+#define SCLK_SDMMC_DRV 67
+#define SCLK_SDMMC_SAMPLE  68
+#define SCLK_SDIO_DRV  69
+#define SCLK_SDIO_SAMPLE   70
+#define SCLK_EMMC_DRV  71
+#define SCLK_EMMC_SAMPLE   72
+#define SCLK_GPU   73
+#define SCLK_PVTM  74
+#define SCLK_CORE_VPU  75
+#define SCLK_GMAC_RMII 76
+#define SCLK_UART2_SRC 77
+#define SCLK_NANDC_DIV 78
+#define SCLK_NANDC_DIV50   79
+#define SCLK_SDIO_DIV  80
+#define SCLK_SDIO_DIV5081
+#define SCLK_EMMC_DIV  82
+#define SCLK_EMMC_DIV5083
+#define SCLK_DDRCLK84
+#define SCLK_UART1_SRC 85
+
+/* dclk gates */
+#define DCLK_VOPB  150
+#define DCLK_VOPL  151
+
+/* aclk gates */
+#define ACLK_GPU   170
+#define ACLK_BUS_PRE   171
+#define ACLK_CRYPTO172
+#define ACLK_VI_PRE173
+#define ACLK_VO_PRE174
+#define ACLK_VPU   175
+#define ACLK_PERI_PRE  176
+#define ACLK_GMAC  178
+#define ACLK_CIF   179
+#define ACLK_ISP   180
+#define ACLK_VOPB  181
+#define ACLK_VOPL  182
+#define ACLK_RGA   183
+#define ACLK_GIC   184
+#define ACLK_DCF   186
+#define ACLK_DMAC  187
+#define ACLK_BUS_SRC   188
+#define ACLK_PERI_SRC  189
+
+/* hclk gates */
+#define HCLK_BUS_PRE   240
+#define HCLK_CRYPTO241
+#define HCLK_VI_PRE242
+#define HCLK_VO_PRE243
+#define HCLK_VPU   244
+#define HCLK_PERI_PRE  245
+#define HCLK_MMC_NAND  246
+#define HCLK_SDMMC 247
+#define HCLK_USB   248
+#define HCLK_CIF   249
+#define HCLK_ISP   250
+#define HCLK_VOPB  251
+#define HCLK_VOPL  252
+#define HCLK_RGA   253
+#define HCLK_NANDC 254
+#define HCLK_SDIO  255
+#define HCLK_EMMC  256
+#define 

[PATCH v3 3/4] clk: rockchip: add support for half divider

2018-06-14 Thread Elaine Zhang
The new Rockchip socs have optional half divider:
The formula is shown as:
freq_out = 2*freq_in / (2*div + 3)
Is this the same for all of new SoCs.

So we use "branch_half_divider" + "COMPOSITE_NOMUX_HALFDIV \
DIV_HALF \ COMPOSITE_HALFDIV \ CMPOSITE_NOGATE_HALFDIV"
to hook that special divider clock-type into our clock-tree.

Signed-off-by: Elaine Zhang 
---
 drivers/clk/rockchip/Makefile   |   1 +
 drivers/clk/rockchip/clk-half-divider.c | 230 
 drivers/clk/rockchip/clk.c  |  10 ++
 drivers/clk/rockchip/clk.h  |  85 
 4 files changed, 326 insertions(+)
 create mode 100644 drivers/clk/rockchip/clk-half-divider.c

diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 59b8d320960a..2b380fafd232 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -7,6 +7,7 @@ obj-y   += clk-rockchip.o
 obj-y  += clk.o
 obj-y  += clk-pll.o
 obj-y  += clk-cpu.o
+obj-y  += clk-half-divider.o
 obj-y  += clk-inverter.o
 obj-y  += clk-mmc-phase.o
 obj-y  += clk-muxgrf.o
diff --git a/drivers/clk/rockchip/clk-half-divider.c 
b/drivers/clk/rockchip/clk-half-divider.c
new file mode 100644
index ..fb7a6501e0c1
--- /dev/null
+++ b/drivers/clk/rockchip/clk-half-divider.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "clk.h"
+
+#define div_mask(width)((1 << (width)) - 1)
+
+static bool _is_best_half_div(unsigned long rate, unsigned long now,
+ unsigned long best, unsigned long flags)
+{
+   if (flags & CLK_DIVIDER_ROUND_CLOSEST)
+   return abs(rate - now) < abs(rate - best);
+
+   return now <= rate && now > best;
+}
+
+static unsigned long clk_half_divider_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+   struct clk_divider *divider = to_clk_divider(hw);
+   unsigned int val;
+
+   val = clk_readl(divider->reg) >> divider->shift;
+   val &= div_mask(divider->width);
+   val = val * 2 + 3;
+
+   return DIV_ROUND_UP_ULL(((u64)parent_rate * 2), val);
+}
+
+static int clk_half_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
+   unsigned long *best_parent_rate, u8 width,
+   unsigned long flags)
+{
+   unsigned int i, bestdiv = 0;
+   unsigned long parent_rate, best = 0, now, maxdiv;
+   unsigned long parent_rate_saved = *best_parent_rate;
+
+   if (!rate)
+   rate = 1;
+
+   maxdiv = div_mask(width);
+
+   if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
+   parent_rate = *best_parent_rate;
+   bestdiv = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), rate);
+   if (bestdiv < 3)
+   bestdiv = 0;
+   else
+   bestdiv = (bestdiv - 3) / 2;
+   bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv;
+   return bestdiv;
+   }
+
+   /*
+* The maximum divider we can use without overflowing
+* unsigned long in rate * i below
+*/
+   maxdiv = min(ULONG_MAX / rate, maxdiv);
+
+   for (i = 0; i <= maxdiv; i++) {
+   if (((u64)rate * (i * 2 + 3)) == ((u64)parent_rate_saved * 2)) {
+   /*
+* It's the most ideal case if the requested rate can be
+* divided from parent clock without needing to change
+* parent rate, so return the divider immediately.
+*/
+   *best_parent_rate = parent_rate_saved;
+   return i;
+   }
+   parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
+   ((u64)rate * (i * 2 + 3)) / 2);
+   now = DIV_ROUND_UP_ULL(((u64)parent_rate * 2),
+  (i * 2 + 3));
+
+   if (_is_best_half_div(rate, now, best, flags)) {
+   bestdiv = i;
+   best = now;
+   *best_parent_rate = parent_rate;
+   }
+   }
+
+   if (!bestdiv) {
+   bestdiv = div_mask(width);
+   *best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), 1);
+   }
+
+   return bestdiv;
+}
+
+static long clk_half_divider_round_rate(struct clk_hw *hw, unsigned long rate,
+   unsigned long *prate)
+{
+   struct clk_divider *divider = to_clk_divider(hw);
+   int div;
+
+   div = clk_half_divider_bestdiv(hw, rate, prate,
+  divider->width,
+  divider->flags);
+
+   return 

[PATCH v3 2/4] clk: rockchip: add dt-binding header for px30

2018-06-14 Thread Elaine Zhang
Add the dt-bindings header for the px30, that gets shared between
the clock controller and the clock references in the dts.
Add softreset ID for px30.

Signed-off-by: Elaine Zhang 
Reviewed-by: Rob Herring 
---
 include/dt-bindings/clock/px30-cru.h | 389 +++
 1 file changed, 389 insertions(+)
 create mode 100644 include/dt-bindings/clock/px30-cru.h

diff --git a/include/dt-bindings/clock/px30-cru.h 
b/include/dt-bindings/clock/px30-cru.h
new file mode 100644
index ..00101479f7c4
--- /dev/null
+++ b/include/dt-bindings/clock/px30-cru.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_PX30_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_PX30_H
+
+/* core clocks */
+#define PLL_APLL   1
+#define PLL_DPLL   2
+#define PLL_CPLL   3
+#define PLL_NPLL   4
+#define APLL_BOOST_H   5
+#define APLL_BOOST_L   6
+#define ARMCLK 7
+
+/* sclk gates (special clocks) */
+#define USB480M14
+#define SCLK_PDM   15
+#define SCLK_I2S0_TX   16
+#define SCLK_I2S0_TX_OUT   17
+#define SCLK_I2S0_RX   18
+#define SCLK_I2S0_RX_OUT   19
+#define SCLK_I2S1  20
+#define SCLK_I2S1_OUT  21
+#define SCLK_I2S2  22
+#define SCLK_I2S2_OUT  23
+#define SCLK_UART1 24
+#define SCLK_UART2 25
+#define SCLK_UART3 26
+#define SCLK_UART4 27
+#define SCLK_UART5 28
+#define SCLK_I2C0  29
+#define SCLK_I2C1  30
+#define SCLK_I2C2  31
+#define SCLK_I2C3  32
+#define SCLK_I2C4  33
+#define SCLK_PWM0  34
+#define SCLK_PWM1  35
+#define SCLK_SPI0  36
+#define SCLK_SPI1  37
+#define SCLK_TIMER038
+#define SCLK_TIMER139
+#define SCLK_TIMER240
+#define SCLK_TIMER341
+#define SCLK_TIMER442
+#define SCLK_TIMER543
+#define SCLK_TSADC 44
+#define SCLK_SARADC45
+#define SCLK_OTP   46
+#define SCLK_OTP_USR   47
+#define SCLK_CRYPTO48
+#define SCLK_CRYPTO_APK49
+#define SCLK_DDRC  50
+#define SCLK_ISP   51
+#define SCLK_CIF_OUT   52
+#define SCLK_RGA_CORE  53
+#define SCLK_VOPB_PWM  54
+#define SCLK_NANDC 55
+#define SCLK_SDIO  56
+#define SCLK_EMMC  57
+#define SCLK_SFC   58
+#define SCLK_SDMMC 59
+#define SCLK_OTG_ADP   60
+#define SCLK_GMAC_SRC  61
+#define SCLK_GMAC  62
+#define SCLK_GMAC_RX_TX63
+#define SCLK_MAC_REF   64
+#define SCLK_MAC_REFOUT65
+#define SCLK_MAC_OUT   66
+#define SCLK_SDMMC_DRV 67
+#define SCLK_SDMMC_SAMPLE  68
+#define SCLK_SDIO_DRV  69
+#define SCLK_SDIO_SAMPLE   70
+#define SCLK_EMMC_DRV  71
+#define SCLK_EMMC_SAMPLE   72
+#define SCLK_GPU   73
+#define SCLK_PVTM  74
+#define SCLK_CORE_VPU  75
+#define SCLK_GMAC_RMII 76
+#define SCLK_UART2_SRC 77
+#define SCLK_NANDC_DIV 78
+#define SCLK_NANDC_DIV50   79
+#define SCLK_SDIO_DIV  80
+#define SCLK_SDIO_DIV5081
+#define SCLK_EMMC_DIV  82
+#define SCLK_EMMC_DIV5083
+#define SCLK_DDRCLK84
+#define SCLK_UART1_SRC 85
+
+/* dclk gates */
+#define DCLK_VOPB  150
+#define DCLK_VOPL  151
+
+/* aclk gates */
+#define ACLK_GPU   170
+#define ACLK_BUS_PRE   171
+#define ACLK_CRYPTO172
+#define ACLK_VI_PRE173
+#define ACLK_VO_PRE174
+#define ACLK_VPU   175
+#define ACLK_PERI_PRE  176
+#define ACLK_GMAC  178
+#define ACLK_CIF   179
+#define ACLK_ISP   180
+#define ACLK_VOPB  181
+#define ACLK_VOPL  182
+#define ACLK_RGA   183
+#define ACLK_GIC   184
+#define ACLK_DCF   186
+#define ACLK_DMAC  187
+#define ACLK_BUS_SRC   188
+#define ACLK_PERI_SRC  189
+
+/* hclk gates */
+#define HCLK_BUS_PRE   240
+#define HCLK_CRYPTO241
+#define HCLK_VI_PRE242
+#define HCLK_VO_PRE243
+#define HCLK_VPU   244
+#define HCLK_PERI_PRE  245
+#define HCLK_MMC_NAND  246
+#define HCLK_SDMMC 247
+#define HCLK_USB   248
+#define HCLK_CIF   249
+#define HCLK_ISP   250
+#define HCLK_VOPB  251
+#define HCLK_VOPL  252
+#define HCLK_RGA   253
+#define HCLK_NANDC 254
+#define HCLK_SDIO  255
+#define HCLK_EMMC  256
+#define 

[PATCH v3 3/4] clk: rockchip: add support for half divider

2018-06-14 Thread Elaine Zhang
The new Rockchip socs have optional half divider:
The formula is shown as:
freq_out = 2*freq_in / (2*div + 3)
Is this the same for all of new SoCs.

So we use "branch_half_divider" + "COMPOSITE_NOMUX_HALFDIV \
DIV_HALF \ COMPOSITE_HALFDIV \ CMPOSITE_NOGATE_HALFDIV"
to hook that special divider clock-type into our clock-tree.

Signed-off-by: Elaine Zhang 
---
 drivers/clk/rockchip/Makefile   |   1 +
 drivers/clk/rockchip/clk-half-divider.c | 230 
 drivers/clk/rockchip/clk.c  |  10 ++
 drivers/clk/rockchip/clk.h  |  85 
 4 files changed, 326 insertions(+)
 create mode 100644 drivers/clk/rockchip/clk-half-divider.c

diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 59b8d320960a..2b380fafd232 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -7,6 +7,7 @@ obj-y   += clk-rockchip.o
 obj-y  += clk.o
 obj-y  += clk-pll.o
 obj-y  += clk-cpu.o
+obj-y  += clk-half-divider.o
 obj-y  += clk-inverter.o
 obj-y  += clk-mmc-phase.o
 obj-y  += clk-muxgrf.o
diff --git a/drivers/clk/rockchip/clk-half-divider.c 
b/drivers/clk/rockchip/clk-half-divider.c
new file mode 100644
index ..fb7a6501e0c1
--- /dev/null
+++ b/drivers/clk/rockchip/clk-half-divider.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "clk.h"
+
+#define div_mask(width)((1 << (width)) - 1)
+
+static bool _is_best_half_div(unsigned long rate, unsigned long now,
+ unsigned long best, unsigned long flags)
+{
+   if (flags & CLK_DIVIDER_ROUND_CLOSEST)
+   return abs(rate - now) < abs(rate - best);
+
+   return now <= rate && now > best;
+}
+
+static unsigned long clk_half_divider_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+   struct clk_divider *divider = to_clk_divider(hw);
+   unsigned int val;
+
+   val = clk_readl(divider->reg) >> divider->shift;
+   val &= div_mask(divider->width);
+   val = val * 2 + 3;
+
+   return DIV_ROUND_UP_ULL(((u64)parent_rate * 2), val);
+}
+
+static int clk_half_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
+   unsigned long *best_parent_rate, u8 width,
+   unsigned long flags)
+{
+   unsigned int i, bestdiv = 0;
+   unsigned long parent_rate, best = 0, now, maxdiv;
+   unsigned long parent_rate_saved = *best_parent_rate;
+
+   if (!rate)
+   rate = 1;
+
+   maxdiv = div_mask(width);
+
+   if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
+   parent_rate = *best_parent_rate;
+   bestdiv = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), rate);
+   if (bestdiv < 3)
+   bestdiv = 0;
+   else
+   bestdiv = (bestdiv - 3) / 2;
+   bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv;
+   return bestdiv;
+   }
+
+   /*
+* The maximum divider we can use without overflowing
+* unsigned long in rate * i below
+*/
+   maxdiv = min(ULONG_MAX / rate, maxdiv);
+
+   for (i = 0; i <= maxdiv; i++) {
+   if (((u64)rate * (i * 2 + 3)) == ((u64)parent_rate_saved * 2)) {
+   /*
+* It's the most ideal case if the requested rate can be
+* divided from parent clock without needing to change
+* parent rate, so return the divider immediately.
+*/
+   *best_parent_rate = parent_rate_saved;
+   return i;
+   }
+   parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
+   ((u64)rate * (i * 2 + 3)) / 2);
+   now = DIV_ROUND_UP_ULL(((u64)parent_rate * 2),
+  (i * 2 + 3));
+
+   if (_is_best_half_div(rate, now, best, flags)) {
+   bestdiv = i;
+   best = now;
+   *best_parent_rate = parent_rate;
+   }
+   }
+
+   if (!bestdiv) {
+   bestdiv = div_mask(width);
+   *best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), 1);
+   }
+
+   return bestdiv;
+}
+
+static long clk_half_divider_round_rate(struct clk_hw *hw, unsigned long rate,
+   unsigned long *prate)
+{
+   struct clk_divider *divider = to_clk_divider(hw);
+   int div;
+
+   div = clk_half_divider_bestdiv(hw, rate, prate,
+  divider->width,
+  divider->flags);
+
+   return 

[PATCH v1 0/2] perf: Drop leaked kernel samples

2018-06-14 Thread Jin Yao
On workloads that do a lot of kernel entry/exits we see kernel
samples, even though :u is specified. This is due to skid existing.

This might be a security issue because it can leak kernel addresses even
though kernel sampling support is disabled.

One patch "perf/core: Drop kernel samples even though :u is specified"
was posted in last year but it was reverted because it introduced a
regression issue that broke the rr-project.

Now this patch set uses sysctl to control the dropping of leaked
kernel samples.

/sys/devices/cpu/perf_allow_sample_leakage:

0 - default, drop the leaked kernel samples.
1 - don't drop the leaked kernel samples.

For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage to
keep original system behavior.

Jin Yao (2):
  perf/core: Use sysctl to turn on/off dropping leaked kernel samples
  perf Documentation: Introduce the sysctl perf_allow_sample_leakage

 kernel/events/core.c | 58 
 tools/perf/Documentation/perf-record.txt | 14 
 2 files changed, 72 insertions(+)

-- 
2.7.4



[PATCH v1 1/2] perf/core: Use sysctl to turn on/off dropping leaked kernel samples

2018-06-14 Thread Jin Yao
When doing sampling, for example:

perf record -e cycles:u ...

On workloads that do a lot of kernel entry/exits we see kernel
samples, even though :u is specified. This is due to skid existing.

This might be a security issue because it can leak kernel addresses even
though kernel sampling support is disabled.

One patch "perf/core: Drop kernel samples even though :u is specified"
was posted in last year but it was reverted because it introduced a
regression issue that broke the rr-project, which used sampling
events to receive a signal on overflow. These signals were critical
to the correct operation of rr.

See '6a8a75f32357 ("Revert "perf/core: Drop kernel samples even
though :u is specified"")' for detail.

Now the idea is to use sysctl to control the dropping of leaked
kernel samples.

/sys/devices/cpu/perf_allow_sample_leakage:

0 - default, drop the leaked kernel samples.
1 - don't drop the leaked kernel samples.

For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage.

For example,

root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
0
root@skl:/tmp# perf record -e cycles:u ./div
root@skl:/tmp# perf report --stdio

  ...  .  

47.01%  div  div[.] main
20.74%  div  libc-2.23.so   [.] __random_r
15.59%  div  libc-2.23.so   [.] __random
 8.68%  div  div[.] compute_flag
 4.48%  div  libc-2.23.so   [.] rand
 3.50%  div  div[.] rand@plt
 0.00%  div  ld-2.23.so [.] do_lookup_x
 0.00%  div  ld-2.23.so [.] memcmp
 0.00%  div  ld-2.23.so [.] _dl_start
 0.00%  div  ld-2.23.so [.] _start

There is no kernel symbol reported.

root@skl:/tmp# echo 1 > /sys/devices/cpu/perf_allow_sample_leakage
root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
1
root@skl:/tmp# perf record -e cycles:u ./div
root@skl:/tmp# perf report --stdio

  ...    .

47.53%  div  div   [.] main
20.62%  div  libc-2.23.so  [.] __random_r
15.32%  div  libc-2.23.so  [.] __random
 8.66%  div  div   [.] compute_flag
 4.53%  div  libc-2.23.so  [.] rand
 3.34%  div  div   [.] rand@plt
 0.00%  div  [kernel.vmlinux]  [k] apic_timer_interrupt
 0.00%  div  libc-2.23.so  [.] intel_check_word
 0.00%  div  ld-2.23.so[.] brk
 0.00%  div  [kernel.vmlinux]  [k] page_fault
 0.00%  div  ld-2.23.so[.] _start

We can see the kernel symbols apic_timer_interrupt and page_fault.

Signed-off-by: Jin Yao 
---
 kernel/events/core.c | 58 
 1 file changed, 58 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80cca2b..7867541 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7721,6 +7721,28 @@ int perf_event_account_interrupt(struct perf_event 
*event)
return __perf_event_account_interrupt(event, 1);
 }
 
+static int perf_allow_sample_leakage __read_mostly;
+
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+   int allow_leakage = READ_ONCE(perf_allow_sample_leakage);
+
+   if (allow_leakage)
+   return true;
+
+   /*
+* Due to interrupt latency (AKA "skid"), we may enter the
+* kernel before taking an overflow, even if the PMU is only
+* counting user events.
+* To avoid leaking information to userspace, we must always
+* reject kernel samples when exclude_kernel is set.
+*/
+   if (event->attr.exclude_kernel && !user_mode(regs))
+   return false;
+
+   return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7742,6 +7764,12 @@ static int __perf_event_overflow(struct perf_event 
*event,
ret = __perf_event_account_interrupt(event, throttle);
 
/*
+* For security, drop the skid kernel samples if necessary.
+*/
+   if (!sample_is_allowed(event, regs))
+   return ret;
+
+   /*
 * XXX event_limit might not quite work as expected on inherited
 * events
 */
@@ -9500,9 +9528,39 @@ perf_event_mux_interval_ms_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
 
+static ssize_t
+perf_allow_sample_leakage_show(struct device *dev,
+  struct device_attribute *attr, char *page)
+{
+   int allow_leakage = READ_ONCE(perf_allow_sample_leakage);
+
+   return snprintf(page, PAGE_SIZE-1, "%d\n", allow_leakage);
+}
+
+static ssize_t
+perf_allow_sample_leakage_store(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   int allow_leakage, ret;
+
+   ret = kstrtoint(buf, 0, _leakage);
+   if (ret)
+   

[PATCH v1 0/2] perf: Drop leaked kernel samples

2018-06-14 Thread Jin Yao
On workloads that do a lot of kernel entry/exits we see kernel
samples, even though :u is specified. This is due to skid existing.

This might be a security issue because it can leak kernel addresses even
though kernel sampling support is disabled.

One patch "perf/core: Drop kernel samples even though :u is specified"
was posted in last year but it was reverted because it introduced a
regression issue that broke the rr-project.

Now this patch set uses sysctl to control the dropping of leaked
kernel samples.

/sys/devices/cpu/perf_allow_sample_leakage:

0 - default, drop the leaked kernel samples.
1 - don't drop the leaked kernel samples.

For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage to
keep original system behavior.

Jin Yao (2):
  perf/core: Use sysctl to turn on/off dropping leaked kernel samples
  perf Documentation: Introduce the sysctl perf_allow_sample_leakage

 kernel/events/core.c | 58 
 tools/perf/Documentation/perf-record.txt | 14 
 2 files changed, 72 insertions(+)

-- 
2.7.4



[PATCH v1 1/2] perf/core: Use sysctl to turn on/off dropping leaked kernel samples

2018-06-14 Thread Jin Yao
When doing sampling, for example:

perf record -e cycles:u ...

On workloads that do a lot of kernel entry/exits we see kernel
samples, even though :u is specified. This is due to skid existing.

This might be a security issue because it can leak kernel addresses even
though kernel sampling support is disabled.

One patch "perf/core: Drop kernel samples even though :u is specified"
was posted in last year but it was reverted because it introduced a
regression issue that broke the rr-project, which used sampling
events to receive a signal on overflow. These signals were critical
to the correct operation of rr.

See '6a8a75f32357 ("Revert "perf/core: Drop kernel samples even
though :u is specified"")' for detail.

Now the idea is to use sysctl to control the dropping of leaked
kernel samples.

/sys/devices/cpu/perf_allow_sample_leakage:

0 - default, drop the leaked kernel samples.
1 - don't drop the leaked kernel samples.

For rr it can write 1 to /sys/devices/cpu/perf_allow_sample_leakage.

For example,

root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
0
root@skl:/tmp# perf record -e cycles:u ./div
root@skl:/tmp# perf report --stdio

  ...  .  

47.01%  div  div[.] main
20.74%  div  libc-2.23.so   [.] __random_r
15.59%  div  libc-2.23.so   [.] __random
 8.68%  div  div[.] compute_flag
 4.48%  div  libc-2.23.so   [.] rand
 3.50%  div  div[.] rand@plt
 0.00%  div  ld-2.23.so [.] do_lookup_x
 0.00%  div  ld-2.23.so [.] memcmp
 0.00%  div  ld-2.23.so [.] _dl_start
 0.00%  div  ld-2.23.so [.] _start

There is no kernel symbol reported.

root@skl:/tmp# echo 1 > /sys/devices/cpu/perf_allow_sample_leakage
root@skl:/tmp# cat /sys/devices/cpu/perf_allow_sample_leakage
1
root@skl:/tmp# perf record -e cycles:u ./div
root@skl:/tmp# perf report --stdio

  ...    .

47.53%  div  div   [.] main
20.62%  div  libc-2.23.so  [.] __random_r
15.32%  div  libc-2.23.so  [.] __random
 8.66%  div  div   [.] compute_flag
 4.53%  div  libc-2.23.so  [.] rand
 3.34%  div  div   [.] rand@plt
 0.00%  div  [kernel.vmlinux]  [k] apic_timer_interrupt
 0.00%  div  libc-2.23.so  [.] intel_check_word
 0.00%  div  ld-2.23.so[.] brk
 0.00%  div  [kernel.vmlinux]  [k] page_fault
 0.00%  div  ld-2.23.so[.] _start

We can see the kernel symbols apic_timer_interrupt and page_fault.

Signed-off-by: Jin Yao 
---
 kernel/events/core.c | 58 
 1 file changed, 58 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80cca2b..7867541 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7721,6 +7721,28 @@ int perf_event_account_interrupt(struct perf_event 
*event)
return __perf_event_account_interrupt(event, 1);
 }
 
+static int perf_allow_sample_leakage __read_mostly;
+
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+   int allow_leakage = READ_ONCE(perf_allow_sample_leakage);
+
+   if (allow_leakage)
+   return true;
+
+   /*
+* Due to interrupt latency (AKA "skid"), we may enter the
+* kernel before taking an overflow, even if the PMU is only
+* counting user events.
+* To avoid leaking information to userspace, we must always
+* reject kernel samples when exclude_kernel is set.
+*/
+   if (event->attr.exclude_kernel && !user_mode(regs))
+   return false;
+
+   return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7742,6 +7764,12 @@ static int __perf_event_overflow(struct perf_event 
*event,
ret = __perf_event_account_interrupt(event, throttle);
 
/*
+* For security, drop the skid kernel samples if necessary.
+*/
+   if (!sample_is_allowed(event, regs))
+   return ret;
+
+   /*
 * XXX event_limit might not quite work as expected on inherited
 * events
 */
@@ -9500,9 +9528,39 @@ perf_event_mux_interval_ms_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
 
+static ssize_t
+perf_allow_sample_leakage_show(struct device *dev,
+  struct device_attribute *attr, char *page)
+{
+   int allow_leakage = READ_ONCE(perf_allow_sample_leakage);
+
+   return snprintf(page, PAGE_SIZE-1, "%d\n", allow_leakage);
+}
+
+static ssize_t
+perf_allow_sample_leakage_store(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   int allow_leakage, ret;
+
+   ret = kstrtoint(buf, 0, _leakage);
+   if (ret)
+   

[PATCH v1 2/2] perf Documentation: Introduce the sysctl perf_allow_sample_leakage

2018-06-14 Thread Jin Yao
Introduce a new sysctl /sys/devices/cpu/perf_allow_sample_leakage, which
turns on/off dropping leaked kernel samples.

Signed-off-by: Jin Yao 
---
 tools/perf/Documentation/perf-record.txt | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 04168da..97fb0f8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -93,6 +93,20 @@ OPTIONS
  prevent the shell interpretation.  You also need to use --group on
  "perf report" to view group events together.
 
+   Note that if workload does a lot of kernel entry/exit we may see
+   kernel samples even if :u is specified. That is due to skid existing.
+   This might be a security issue because it can leak kernel address even
+   though kernel sampling support is disabled. We have a sysctl to turn
+   on/off the dropping of leaked kernel samples.
+
+   /sys/devices/cpu/perf_allow_sample_leakage
+
+   0 - drop the leaked kernel samples, default option.
+   1 - don't drop the leaked kernel samples.
+
+   For example, write 1 to perf_allow_sample_leakage
+   echo 1 > /sys/devices/cpu/perf_allow_sample_leakage
+
 --filter=::
 Event filter. This option should follow a event selector (-e) which
selects either tracepoint event(s) or a hardware trace PMU
-- 
2.7.4



[PATCH v1 2/2] perf Documentation: Introduce the sysctl perf_allow_sample_leakage

2018-06-14 Thread Jin Yao
Introduce a new sysctl /sys/devices/cpu/perf_allow_sample_leakage, which
turns on/off dropping leaked kernel samples.

Signed-off-by: Jin Yao 
---
 tools/perf/Documentation/perf-record.txt | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 04168da..97fb0f8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -93,6 +93,20 @@ OPTIONS
  prevent the shell interpretation.  You also need to use --group on
  "perf report" to view group events together.
 
+   Note that if workload does a lot of kernel entry/exit we may see
+   kernel samples even if :u is specified. That is due to skid existing.
+   This might be a security issue because it can leak kernel address even
+   though kernel sampling support is disabled. We have a sysctl to turn
+   on/off the dropping of leaked kernel samples.
+
+   /sys/devices/cpu/perf_allow_sample_leakage
+
+   0 - drop the leaked kernel samples, default option.
+   1 - don't drop the leaked kernel samples.
+
+   For example, write 1 to perf_allow_sample_leakage
+   echo 1 > /sys/devices/cpu/perf_allow_sample_leakage
+
 --filter=::
 Event filter. This option should follow a event selector (-e) which
selects either tracepoint event(s) or a hardware trace PMU
-- 
2.7.4



[PATCH V2 6/7] mmc: sdhci-sprd: added Spreadtrum's initial host controller

2018-06-14 Thread Chunyan Zhang
From: Chunyan Zhang 

This patch adds the initial support of Secure Digital Host Controller
Interface compliant controller found in some latest Spreadtrum chipsets.
This patch has been tested on the version of SPRD-R11 controller.

R11 is a variant based on SD v4.0 specification.

With this driver, R11 mmc can be initialized, can be mounted, read and
written.

Original-by: Billows Wu 
Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/Kconfig  |  13 ++
 drivers/mmc/host/Makefile |   1 +
 drivers/mmc/host/sdhci-sprd.c | 426 ++
 3 files changed, 440 insertions(+)
 create mode 100644 drivers/mmc/host/sdhci-sprd.c

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 9589f9c..1b0ee11 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -584,6 +584,19 @@ config MMC_SDRICOH_CS
  To compile this driver as a module, choose M here: the
  module will be called sdricoh_cs.
 
+config MMC_SDHCI_SPRD
+   tristate "Spreadtrum SDIO host Controller"
+   depends on ARCH_SPRD
+   depends on MMC_SDHCI_PLTFM
+   select MMC_SDHCI_IO_ACCESSORS
+   help
+ This selects the SDIO Host Controller in Spreadtrum
+ SoCs, this driver supports R11(IP version: R11P0).
+
+ If you have a controller with this interface, say Y or M here.
+
+ If unsure, say N.
+
 config MMC_TMIO_CORE
tristate
 
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 6aead24..5835bc4 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_MMC_SDHCI_ST)+= sdhci-st.o
 obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)+= sdhci-pic32.o
 obj-$(CONFIG_MMC_SDHCI_BRCMSTB)+= sdhci-brcmstb.o
 obj-$(CONFIG_MMC_SDHCI_OMAP)   += sdhci-omap.o
+obj-$(CONFIG_MMC_SDHCI_SPRD)   += sdhci-sprd.o
 obj-$(CONFIG_MMC_CQHCI)+= cqhci.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
new file mode 100644
index 000..f1b0f2b
--- /dev/null
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Secure Digital Host Controller
+//
+// Copyright (C) 2018 Spreadtrum, Inc.
+// Author: Chunyan Zhang 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "sdhci-pltfm.h"
+
+#define SDHCI_SPRD_REG_32_DLL_DLY_OFFSET   0x208
+#define  SDHCIBSPRD_IT_WR_DLY_INV  (1 << 5)
+#define  SDHCI_SPRD_BIT_CMD_DLY_INV(1 << 13)
+#define  SDHCI_SPRD_BIT_POSRD_DLY_INV  (1 << 21)
+#define  SDHCI_SPRD_BIT_NEGRD_DLY_INV  (1 << 29)
+
+#define SDHCI_SPRD_REG_32_BUSY_POSI0x250
+#define  SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN   (1 << 25)
+#define  SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN   (1 << 24)
+
+#define SDHCI_SPRD_REG_DEBOUNCE0x28C
+#define  SDHCI_SPRD_BIT_DLL_BAK(1 << 0)
+#define  SDHCI_SPRD_BIT_DLL_VAL(1 << 1)
+
+#define  SDHCI_SPRD_INT_SIGNAL_MASK0x1B7F410B
+
+/* SDHCI_HOST_CONTROL2 */
+#define  SDHCI_SPRD_CTRL_HS200 0x0005
+#define  SDHCI_SPRD_CTRL_HS400 0x0006
+
+/* SDHCI_SOFTWARE_RESET */
+#define  SDHCI_HW_RESET_CARD   0x8 /* For Spreadtrum's design */
+
+#define SDHCI_SPRD_MAX_CUR 0xFF
+#define SDHCI_SPRD_CLK_MAX_DIV 0x3FF
+
+#define SDHCI_SPRD_CLK_DEF_RATE2600
+
+struct sdhci_sprd_host {
+   u32 version;
+   struct clk *clk_sdio;
+   struct clk *clk_enable;
+   u32 base_rate;
+};
+
+#define TO_SPRD_HOST(host) sdhci_pltfm_priv(sdhci_priv(host))
+
+static void sdhci_sprd_init_config(struct sdhci_host *host)
+{
+   u16 val;
+
+   /* set 64-bit addressing modes */
+   val = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+   val |= SDHCI_CTRL_64BIT_ADDR;
+   sdhci_writew(host, val, SDHCI_HOST_CONTROL2);
+
+   /* set dll backup mode */
+   val = sdhci_readl(host, SDHCI_SPRD_REG_DEBOUNCE);
+   val |= SDHCI_SPRD_BIT_DLL_BAK | SDHCI_SPRD_BIT_DLL_VAL;
+   sdhci_writel(host, val, SDHCI_SPRD_REG_DEBOUNCE);
+}
+
+static inline u32 sdhci_sprd_readl(struct sdhci_host *host, int reg)
+{
+   if (unlikely(reg == SDHCI_MAX_CURRENT))
+   return SDHCI_SPRD_MAX_CUR;
+
+   return readl_relaxed(host->ioaddr + reg);
+}
+
+static inline void sdhci_sprd_writel(struct sdhci_host *host, u32 val, int reg)
+{
+   /* SDHCI_MAX_CURRENT is reserved on Spreadtrum's platform */
+   if (unlikely(reg == SDHCI_MAX_CURRENT))
+   return;
+
+   if (unlikely(reg == SDHCI_SIGNAL_ENABLE || reg == SDHCI_INT_ENABLE))
+   val = val & SDHCI_SPRD_INT_SIGNAL_MASK;
+
+   return writel_relaxed(val, host->ioaddr + reg);
+}
+
+static inline void sdhci_sprd_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+   if 

[PATCH V2 4/7] mmc: sdhci: add 32-bit block count support for v4 mode

2018-06-14 Thread Chunyan Zhang
When Host Version 4 is enabled, SDMA System Address register is
re-defined as 32-bit Block Count, and SDMA uses ADMA System
Address register (05Fh-058h) instead.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 3 ++-
 drivers/mmc/host/sdhci.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5d3b0d8..b8ee124 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -943,7 +943,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, 
struct mmc_command *cmd)
/* Set the DMA boundary value and block size */
sdhci_writew(host, SDHCI_MAKE_BLKSZ(host->sdma_boundary, data->blksz),
 SDHCI_BLOCK_SIZE);
-   sdhci_writew(host, data->blocks, SDHCI_BLOCK_COUNT);
+   reg = host->v4_mode ? SDHCI_32BIT_BLK_CNT : SDHCI_BLOCK_COUNT;
+   sdhci_writew(host, data->blocks, reg);
 }
 
 static inline bool sdhci_auto_cmd12(struct sdhci_host *host,
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 820a863..1e84539 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -28,6 +28,7 @@
 
 #define SDHCI_DMA_ADDRESS  0x00
 #define SDHCI_ARGUMENT2SDHCI_DMA_ADDRESS
+#define SDHCI_32BIT_BLK_CNTSDHCI_DMA_ADDRESS
 
 #define SDHCI_BLOCK_SIZE   0x04
 #define  SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
-- 
2.7.4



[PATCH V2 5/7] mmc: sdhci: add CMD23 support for v4 mode

2018-06-14 Thread Chunyan Zhang
Host Driver Version 4.10 adds a new bit in Host Control 2 Register
for selecting Auto CMD23 or Auto CMD12 for ADMA3 data transfer.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 16 +++-
 drivers/mmc/host/sdhci.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b8ee124..3b2af7e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -954,6 +954,20 @@ static inline bool sdhci_auto_cmd12(struct sdhci_host 
*host,
   !mrq->cap_cmd_during_tfr;
 }
 
+static inline void sdhci_set_auto_cmd23(struct sdhci_host *host,
+   struct mmc_command *cmd)
+{
+   u16 ctrl2;
+
+   if (host->v4_mode) {
+   ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+   ctrl2 |= SDHCI_CMD23_ENABLE;
+   sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2);
+   } else {
+   sdhci_writel(host, cmd->mrq->sbc->arg, SDHCI_ARGUMENT2);
+   }
+}
+
 static void sdhci_set_transfer_mode(struct sdhci_host *host,
struct mmc_command *cmd)
 {
@@ -989,7 +1003,7 @@ static void sdhci_set_transfer_mode(struct sdhci_host 
*host,
mode |= SDHCI_TRNS_AUTO_CMD12;
else if (cmd->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
mode |= SDHCI_TRNS_AUTO_CMD23;
-   sdhci_writel(host, cmd->mrq->sbc->arg, SDHCI_ARGUMENT2);
+   sdhci_set_auto_cmd23(host, cmd);
}
}
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 1e84539..d5e1c10 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -185,6 +185,7 @@
 #define   SDHCI_CTRL_DRV_TYPE_D0x0030
 #define  SDHCI_CTRL_EXEC_TUNING0x0040
 #define  SDHCI_CTRL_TUNED_CLK  0x0080
+#define  SDHCI_CMD23_ENABLE0x0800
 #define  SDHCI_CTRL_V4_MODE0x1000
 #define  SDHCI_CTRL_64BIT_ADDR 0x2000
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE  0x8000
-- 
2.7.4



[PATCH V2 7/7] dt-bindings: sdhci-sprd: Add bindings for the sdhci-sprd controller

2018-06-14 Thread Chunyan Zhang
From: Chunyan Zhang 

This patch adds the device-tree binding documentation for Spreadtrum
SDHCI driver.

Signed-off-by: Chunyan Zhang 
---
 .../devicetree/bindings/mmc/sdhci-sprd.txt | 41 ++
 1 file changed, 41 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt 
b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
new file mode 100644
index 000..45c9978
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
@@ -0,0 +1,41 @@
+* Spreadtrum SDHCI controller (sdhci-sprd)
+
+The Secure Digital (SD) Host controller on Spreadtrum SoCs provides an 
interface
+for MMC, SD and SDIO types of cards.
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-sprd driver.
+
+Required properties:
+- compatible: Should contain "sprd,sdhci-r11".
+- reg: physical base address of the controller and length.
+- interrupts: Interrupts used by the SDHCI controller.
+- clocks: Should contain phandle for the clock feeding the SDHCI controller
+- clock-names: Should contain the following:
+   "sdio" - SDIO source clock (required)
+   "enable" - gate clock which used for enabling/disabling the device 
(required)
+
+Optional properties:
+- assigned-clocks: the same with "sdio" clock
+- assigned-clock-parents: the default parent of "sdio" clock
+
+Examples:
+
+sdio0: sdio@2060 {
+   compatible  = "sprd,sdhci-r11";
+   reg = <0 0x2060 0 0x1000>;
+   interrupts = ;
+
+   clock-names = "sdio", "enable";
+   clocks = <_clk CLK_EMMC_2X>,
+<_gate CLK_EMMC_EB>;
+   assigned-clocks = <_clk CLK_EMMC_2X>;
+   assigned-clock-parents = < CLK_RPLL_390M>;
+
+   bus-width = <8>;
+   non-removable;
+   no-sdio;
+   no-sd;
+   cap-mmc-hw-reset;
+   status = "okay";
+};
-- 
2.7.4



[PATCH V2 6/7] mmc: sdhci-sprd: added Spreadtrum's initial host controller

2018-06-14 Thread Chunyan Zhang
From: Chunyan Zhang 

This patch adds the initial support of Secure Digital Host Controller
Interface compliant controller found in some latest Spreadtrum chipsets.
This patch has been tested on the version of SPRD-R11 controller.

R11 is a variant based on SD v4.0 specification.

With this driver, R11 mmc can be initialized, can be mounted, read and
written.

Original-by: Billows Wu 
Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/Kconfig  |  13 ++
 drivers/mmc/host/Makefile |   1 +
 drivers/mmc/host/sdhci-sprd.c | 426 ++
 3 files changed, 440 insertions(+)
 create mode 100644 drivers/mmc/host/sdhci-sprd.c

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 9589f9c..1b0ee11 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -584,6 +584,19 @@ config MMC_SDRICOH_CS
  To compile this driver as a module, choose M here: the
  module will be called sdricoh_cs.
 
+config MMC_SDHCI_SPRD
+   tristate "Spreadtrum SDIO host Controller"
+   depends on ARCH_SPRD
+   depends on MMC_SDHCI_PLTFM
+   select MMC_SDHCI_IO_ACCESSORS
+   help
+ This selects the SDIO Host Controller in Spreadtrum
+ SoCs, this driver supports R11(IP version: R11P0).
+
+ If you have a controller with this interface, say Y or M here.
+
+ If unsure, say N.
+
 config MMC_TMIO_CORE
tristate
 
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 6aead24..5835bc4 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_MMC_SDHCI_ST)+= sdhci-st.o
 obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)+= sdhci-pic32.o
 obj-$(CONFIG_MMC_SDHCI_BRCMSTB)+= sdhci-brcmstb.o
 obj-$(CONFIG_MMC_SDHCI_OMAP)   += sdhci-omap.o
+obj-$(CONFIG_MMC_SDHCI_SPRD)   += sdhci-sprd.o
 obj-$(CONFIG_MMC_CQHCI)+= cqhci.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
new file mode 100644
index 000..f1b0f2b
--- /dev/null
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Secure Digital Host Controller
+//
+// Copyright (C) 2018 Spreadtrum, Inc.
+// Author: Chunyan Zhang 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "sdhci-pltfm.h"
+
+#define SDHCI_SPRD_REG_32_DLL_DLY_OFFSET   0x208
+#define  SDHCIBSPRD_IT_WR_DLY_INV  (1 << 5)
+#define  SDHCI_SPRD_BIT_CMD_DLY_INV(1 << 13)
+#define  SDHCI_SPRD_BIT_POSRD_DLY_INV  (1 << 21)
+#define  SDHCI_SPRD_BIT_NEGRD_DLY_INV  (1 << 29)
+
+#define SDHCI_SPRD_REG_32_BUSY_POSI0x250
+#define  SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN   (1 << 25)
+#define  SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN   (1 << 24)
+
+#define SDHCI_SPRD_REG_DEBOUNCE0x28C
+#define  SDHCI_SPRD_BIT_DLL_BAK(1 << 0)
+#define  SDHCI_SPRD_BIT_DLL_VAL(1 << 1)
+
+#define  SDHCI_SPRD_INT_SIGNAL_MASK0x1B7F410B
+
+/* SDHCI_HOST_CONTROL2 */
+#define  SDHCI_SPRD_CTRL_HS200 0x0005
+#define  SDHCI_SPRD_CTRL_HS400 0x0006
+
+/* SDHCI_SOFTWARE_RESET */
+#define  SDHCI_HW_RESET_CARD   0x8 /* For Spreadtrum's design */
+
+#define SDHCI_SPRD_MAX_CUR 0xFF
+#define SDHCI_SPRD_CLK_MAX_DIV 0x3FF
+
+#define SDHCI_SPRD_CLK_DEF_RATE2600
+
+struct sdhci_sprd_host {
+   u32 version;
+   struct clk *clk_sdio;
+   struct clk *clk_enable;
+   u32 base_rate;
+};
+
+#define TO_SPRD_HOST(host) sdhci_pltfm_priv(sdhci_priv(host))
+
+static void sdhci_sprd_init_config(struct sdhci_host *host)
+{
+   u16 val;
+
+   /* set 64-bit addressing modes */
+   val = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+   val |= SDHCI_CTRL_64BIT_ADDR;
+   sdhci_writew(host, val, SDHCI_HOST_CONTROL2);
+
+   /* set dll backup mode */
+   val = sdhci_readl(host, SDHCI_SPRD_REG_DEBOUNCE);
+   val |= SDHCI_SPRD_BIT_DLL_BAK | SDHCI_SPRD_BIT_DLL_VAL;
+   sdhci_writel(host, val, SDHCI_SPRD_REG_DEBOUNCE);
+}
+
+static inline u32 sdhci_sprd_readl(struct sdhci_host *host, int reg)
+{
+   if (unlikely(reg == SDHCI_MAX_CURRENT))
+   return SDHCI_SPRD_MAX_CUR;
+
+   return readl_relaxed(host->ioaddr + reg);
+}
+
+static inline void sdhci_sprd_writel(struct sdhci_host *host, u32 val, int reg)
+{
+   /* SDHCI_MAX_CURRENT is reserved on Spreadtrum's platform */
+   if (unlikely(reg == SDHCI_MAX_CURRENT))
+   return;
+
+   if (unlikely(reg == SDHCI_SIGNAL_ENABLE || reg == SDHCI_INT_ENABLE))
+   val = val & SDHCI_SPRD_INT_SIGNAL_MASK;
+
+   return writel_relaxed(val, host->ioaddr + reg);
+}
+
+static inline void sdhci_sprd_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+   if 

[PATCH V2 4/7] mmc: sdhci: add 32-bit block count support for v4 mode

2018-06-14 Thread Chunyan Zhang
When Host Version 4 is enabled, SDMA System Address register is
re-defined as 32-bit Block Count, and SDMA uses ADMA System
Address register (05Fh-058h) instead.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 3 ++-
 drivers/mmc/host/sdhci.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5d3b0d8..b8ee124 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -943,7 +943,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, 
struct mmc_command *cmd)
/* Set the DMA boundary value and block size */
sdhci_writew(host, SDHCI_MAKE_BLKSZ(host->sdma_boundary, data->blksz),
 SDHCI_BLOCK_SIZE);
-   sdhci_writew(host, data->blocks, SDHCI_BLOCK_COUNT);
+   reg = host->v4_mode ? SDHCI_32BIT_BLK_CNT : SDHCI_BLOCK_COUNT;
+   sdhci_writew(host, data->blocks, reg);
 }
 
 static inline bool sdhci_auto_cmd12(struct sdhci_host *host,
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 820a863..1e84539 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -28,6 +28,7 @@
 
 #define SDHCI_DMA_ADDRESS  0x00
 #define SDHCI_ARGUMENT2SDHCI_DMA_ADDRESS
+#define SDHCI_32BIT_BLK_CNTSDHCI_DMA_ADDRESS
 
 #define SDHCI_BLOCK_SIZE   0x04
 #define  SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
-- 
2.7.4



[PATCH V2 5/7] mmc: sdhci: add CMD23 support for v4 mode

2018-06-14 Thread Chunyan Zhang
Host Driver Version 4.10 adds a new bit in Host Control 2 Register
for selecting Auto CMD23 or Auto CMD12 for ADMA3 data transfer.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 16 +++-
 drivers/mmc/host/sdhci.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b8ee124..3b2af7e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -954,6 +954,20 @@ static inline bool sdhci_auto_cmd12(struct sdhci_host 
*host,
   !mrq->cap_cmd_during_tfr;
 }
 
+static inline void sdhci_set_auto_cmd23(struct sdhci_host *host,
+   struct mmc_command *cmd)
+{
+   u16 ctrl2;
+
+   if (host->v4_mode) {
+   ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+   ctrl2 |= SDHCI_CMD23_ENABLE;
+   sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2);
+   } else {
+   sdhci_writel(host, cmd->mrq->sbc->arg, SDHCI_ARGUMENT2);
+   }
+}
+
 static void sdhci_set_transfer_mode(struct sdhci_host *host,
struct mmc_command *cmd)
 {
@@ -989,7 +1003,7 @@ static void sdhci_set_transfer_mode(struct sdhci_host 
*host,
mode |= SDHCI_TRNS_AUTO_CMD12;
else if (cmd->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
mode |= SDHCI_TRNS_AUTO_CMD23;
-   sdhci_writel(host, cmd->mrq->sbc->arg, SDHCI_ARGUMENT2);
+   sdhci_set_auto_cmd23(host, cmd);
}
}
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 1e84539..d5e1c10 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -185,6 +185,7 @@
 #define   SDHCI_CTRL_DRV_TYPE_D0x0030
 #define  SDHCI_CTRL_EXEC_TUNING0x0040
 #define  SDHCI_CTRL_TUNED_CLK  0x0080
+#define  SDHCI_CMD23_ENABLE0x0800
 #define  SDHCI_CTRL_V4_MODE0x1000
 #define  SDHCI_CTRL_64BIT_ADDR 0x2000
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE  0x8000
-- 
2.7.4



[PATCH V2 7/7] dt-bindings: sdhci-sprd: Add bindings for the sdhci-sprd controller

2018-06-14 Thread Chunyan Zhang
From: Chunyan Zhang 

This patch adds the device-tree binding documentation for Spreadtrum
SDHCI driver.

Signed-off-by: Chunyan Zhang 
---
 .../devicetree/bindings/mmc/sdhci-sprd.txt | 41 ++
 1 file changed, 41 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt 
b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
new file mode 100644
index 000..45c9978
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
@@ -0,0 +1,41 @@
+* Spreadtrum SDHCI controller (sdhci-sprd)
+
+The Secure Digital (SD) Host controller on Spreadtrum SoCs provides an 
interface
+for MMC, SD and SDIO types of cards.
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-sprd driver.
+
+Required properties:
+- compatible: Should contain "sprd,sdhci-r11".
+- reg: physical base address of the controller and length.
+- interrupts: Interrupts used by the SDHCI controller.
+- clocks: Should contain phandle for the clock feeding the SDHCI controller
+- clock-names: Should contain the following:
+   "sdio" - SDIO source clock (required)
+   "enable" - gate clock which used for enabling/disabling the device 
(required)
+
+Optional properties:
+- assigned-clocks: the same with "sdio" clock
+- assigned-clock-parents: the default parent of "sdio" clock
+
+Examples:
+
+sdio0: sdio@2060 {
+   compatible  = "sprd,sdhci-r11";
+   reg = <0 0x2060 0 0x1000>;
+   interrupts = ;
+
+   clock-names = "sdio", "enable";
+   clocks = <_clk CLK_EMMC_2X>,
+<_gate CLK_EMMC_EB>;
+   assigned-clocks = <_clk CLK_EMMC_2X>;
+   assigned-clock-parents = < CLK_RPLL_390M>;
+
+   bus-width = <8>;
+   non-removable;
+   no-sdio;
+   no-sd;
+   cap-mmc-hw-reset;
+   status = "okay";
+};
-- 
2.7.4



[PATCH V2 2/7] mmc: sdhci: made changes for System Address register of SDMA

2018-06-14 Thread Chunyan Zhang
According to the SD host controller specification version 4.10, when
Host Version 4 is enabled, SDMA uses ADMA System Address register
(05Fh-058h) instead of using SDMA System Address register to
support both 32-bit and 64-bit addressing.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cf5695f..f57201f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -805,6 +805,7 @@ static void sdhci_set_timeout(struct sdhci_host *host, 
struct mmc_command *cmd)
 static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command 
*cmd)
 {
u8 ctrl;
+   u32 reg;
struct mmc_data *data = cmd->data;
 
if (sdhci_data_line_cmd(cmd))
@@ -894,8 +895,10 @@ static void sdhci_prepare_data(struct sdhci_host *host, 
struct mmc_command *cmd)
 SDHCI_ADMA_ADDRESS_HI);
} else {
WARN_ON(sg_cnt != 1);
+   reg = host->v4_mode ? SDHCI_ADMA_ADDRESS :
+   SDHCI_DMA_ADDRESS;
sdhci_writel(host, sdhci_sdma_address(host),
-SDHCI_DMA_ADDRESS);
+reg);
}
}
 
@@ -2721,6 +2724,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 
intmask)
 */
if (intmask & SDHCI_INT_DMA_END) {
u32 dmastart, dmanow;
+   u32 reg;
 
dmastart = sdhci_sdma_address(host);
dmanow = dmastart + host->data->bytes_xfered;
@@ -2733,7 +2737,9 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 
intmask)
host->data->bytes_xfered = dmanow - dmastart;
DBG("DMA base 0x%08x, transferred 0x%06x bytes, next 
0x%08x\n",
dmastart, host->data->bytes_xfered, dmanow);
-   sdhci_writel(host, dmanow, SDHCI_DMA_ADDRESS);
+   reg = host->v4_mode ? SDHCI_ADMA_ADDRESS :
+   SDHCI_DMA_ADDRESS;
+   sdhci_writel(host, dmanow, reg);
}
 
if (intmask & SDHCI_INT_DATA_END) {
-- 
2.7.4



[PATCH V2 3/7] mmc: sdhci: add ADMA2 64-bit addressing support for V4 mode

2018-06-14 Thread Chunyan Zhang
ADMA2 64-bit addressing support is divided into V3 mode and V4 mode.
So there are two kinds of descriptors for ADMA2 64-bit addressing
i.e. 96-bit Descriptor for V3 mode, and 128-bit Descriptor for V4
mode. 128-bit Descriptor is aligned to 8-byte.

For V4 mode, ADMA2 64-bit addressing is enabled via Host Control 2
register.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 50 +++-
 drivers/mmc/host/sdhci.h | 23 +-
 2 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index f57201f..5d3b0d8 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -585,6 +585,8 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
void *desc, *align;
char *buffer;
int len, offset, i;
+   unsigned int adma2_align = SDHCI_ADMA2_ALIGN(host);
+   unsigned int adma2_mask = SDHCI_ADMA2_MASK(host);
 
/*
 * The spec does not specify endianness of descriptor table.
@@ -608,8 +610,8 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
 * buffer for the (up to three) bytes that screw up the
 * alignment.
 */
-   offset = (SDHCI_ADMA2_ALIGN - (addr & SDHCI_ADMA2_MASK)) &
-SDHCI_ADMA2_MASK;
+   offset = (adma2_align - (addr & adma2_align)) &
+adma2_mask;
if (offset) {
if (data->flags & MMC_DATA_WRITE) {
buffer = sdhci_kmap_atomic(sg, );
@@ -623,8 +625,8 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
 
BUG_ON(offset > 65536);
 
-   align += SDHCI_ADMA2_ALIGN;
-   align_addr += SDHCI_ADMA2_ALIGN;
+   align += adma2_align;
+   align_addr += adma2_align;
 
desc += host->desc_sz;
 
@@ -668,13 +670,15 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
void *align;
char *buffer;
unsigned long flags;
+   unsigned int adma2_align = SDHCI_ADMA2_ALIGN(host);
+   unsigned int adma2_mask = SDHCI_ADMA2_MASK(host);
 
if (data->flags & MMC_DATA_READ) {
bool has_unaligned = false;
 
/* Do a quick scan of the SG list for any unaligned mappings */
for_each_sg(data->sg, sg, host->sg_count, i)
-   if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
+   if (sg_dma_address(sg) & adma2_mask) {
has_unaligned = true;
break;
}
@@ -686,15 +690,15 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
align = host->align_buffer;
 
for_each_sg(data->sg, sg, host->sg_count, i) {
-   if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
-   size = SDHCI_ADMA2_ALIGN -
-  (sg_dma_address(sg) & 
SDHCI_ADMA2_MASK);
+   if (sg_dma_address(sg) & adma2_mask) {
+   size = adma2_align -
+  (sg_dma_address(sg) & 
adma2_mask);
 
buffer = sdhci_kmap_atomic(sg, );
memcpy(buffer, align, size);
sdhci_kunmap_atomic(buffer, );
 
-   align += SDHCI_ADMA2_ALIGN;
+   align += adma2_align;
}
}
}
@@ -3400,6 +3404,26 @@ static int sdhci_allocate_bounce_buffer(struct 
sdhci_host *host)
return 0;
 }
 
+static inline bool sdhci_use_64bit_dma(struct sdhci_host *host)
+{
+   u32 addr64bit_en;
+
+   /*
+* According to SD Host Controller spec v4.10, bit[27] added from
+* version 4.10 in Capabilities Register is used as 64-bit System
+* Address support for V4 mode, 64-bit DMA Addressing for V4 mode
+* is enabled only if 64-bit Addressing =1 in the Host Control 2
+* register.
+*/
+   if (host->version == SDHCI_SPEC_410 && host->v4_mode) {
+   addr64bit_en = (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
+   SDHCI_CTRL_64BIT_ADDR);
+   return addr64bit_en && (host->caps & SDHCI_CAN_64BIT_V4);
+   }
+
+   return host->caps & SDHCI_CAN_64BIT;
+}
+
 int sdhci_setup_host(struct sdhci_host *host)
 {
struct mmc_host *mmc;
@@ -3471,7 +3495,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
 * implement.
 */
-   if 

[PATCH V2 2/7] mmc: sdhci: made changes for System Address register of SDMA

2018-06-14 Thread Chunyan Zhang
According to the SD host controller specification version 4.10, when
Host Version 4 is enabled, SDMA uses ADMA System Address register
(05Fh-058h) instead of using SDMA System Address register to
support both 32-bit and 64-bit addressing.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cf5695f..f57201f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -805,6 +805,7 @@ static void sdhci_set_timeout(struct sdhci_host *host, 
struct mmc_command *cmd)
 static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command 
*cmd)
 {
u8 ctrl;
+   u32 reg;
struct mmc_data *data = cmd->data;
 
if (sdhci_data_line_cmd(cmd))
@@ -894,8 +895,10 @@ static void sdhci_prepare_data(struct sdhci_host *host, 
struct mmc_command *cmd)
 SDHCI_ADMA_ADDRESS_HI);
} else {
WARN_ON(sg_cnt != 1);
+   reg = host->v4_mode ? SDHCI_ADMA_ADDRESS :
+   SDHCI_DMA_ADDRESS;
sdhci_writel(host, sdhci_sdma_address(host),
-SDHCI_DMA_ADDRESS);
+reg);
}
}
 
@@ -2721,6 +2724,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 
intmask)
 */
if (intmask & SDHCI_INT_DMA_END) {
u32 dmastart, dmanow;
+   u32 reg;
 
dmastart = sdhci_sdma_address(host);
dmanow = dmastart + host->data->bytes_xfered;
@@ -2733,7 +2737,9 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 
intmask)
host->data->bytes_xfered = dmanow - dmastart;
DBG("DMA base 0x%08x, transferred 0x%06x bytes, next 
0x%08x\n",
dmastart, host->data->bytes_xfered, dmanow);
-   sdhci_writel(host, dmanow, SDHCI_DMA_ADDRESS);
+   reg = host->v4_mode ? SDHCI_ADMA_ADDRESS :
+   SDHCI_DMA_ADDRESS;
+   sdhci_writel(host, dmanow, reg);
}
 
if (intmask & SDHCI_INT_DATA_END) {
-- 
2.7.4



[PATCH V2 3/7] mmc: sdhci: add ADMA2 64-bit addressing support for V4 mode

2018-06-14 Thread Chunyan Zhang
ADMA2 64-bit addressing support is divided into V3 mode and V4 mode.
So there are two kinds of descriptors for ADMA2 64-bit addressing
i.e. 96-bit Descriptor for V3 mode, and 128-bit Descriptor for V4
mode. 128-bit Descriptor is aligned to 8-byte.

For V4 mode, ADMA2 64-bit addressing is enabled via Host Control 2
register.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 50 +++-
 drivers/mmc/host/sdhci.h | 23 +-
 2 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index f57201f..5d3b0d8 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -585,6 +585,8 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
void *desc, *align;
char *buffer;
int len, offset, i;
+   unsigned int adma2_align = SDHCI_ADMA2_ALIGN(host);
+   unsigned int adma2_mask = SDHCI_ADMA2_MASK(host);
 
/*
 * The spec does not specify endianness of descriptor table.
@@ -608,8 +610,8 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
 * buffer for the (up to three) bytes that screw up the
 * alignment.
 */
-   offset = (SDHCI_ADMA2_ALIGN - (addr & SDHCI_ADMA2_MASK)) &
-SDHCI_ADMA2_MASK;
+   offset = (adma2_align - (addr & adma2_align)) &
+adma2_mask;
if (offset) {
if (data->flags & MMC_DATA_WRITE) {
buffer = sdhci_kmap_atomic(sg, );
@@ -623,8 +625,8 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
 
BUG_ON(offset > 65536);
 
-   align += SDHCI_ADMA2_ALIGN;
-   align_addr += SDHCI_ADMA2_ALIGN;
+   align += adma2_align;
+   align_addr += adma2_align;
 
desc += host->desc_sz;
 
@@ -668,13 +670,15 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
void *align;
char *buffer;
unsigned long flags;
+   unsigned int adma2_align = SDHCI_ADMA2_ALIGN(host);
+   unsigned int adma2_mask = SDHCI_ADMA2_MASK(host);
 
if (data->flags & MMC_DATA_READ) {
bool has_unaligned = false;
 
/* Do a quick scan of the SG list for any unaligned mappings */
for_each_sg(data->sg, sg, host->sg_count, i)
-   if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
+   if (sg_dma_address(sg) & adma2_mask) {
has_unaligned = true;
break;
}
@@ -686,15 +690,15 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
align = host->align_buffer;
 
for_each_sg(data->sg, sg, host->sg_count, i) {
-   if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
-   size = SDHCI_ADMA2_ALIGN -
-  (sg_dma_address(sg) & 
SDHCI_ADMA2_MASK);
+   if (sg_dma_address(sg) & adma2_mask) {
+   size = adma2_align -
+  (sg_dma_address(sg) & 
adma2_mask);
 
buffer = sdhci_kmap_atomic(sg, );
memcpy(buffer, align, size);
sdhci_kunmap_atomic(buffer, );
 
-   align += SDHCI_ADMA2_ALIGN;
+   align += adma2_align;
}
}
}
@@ -3400,6 +3404,26 @@ static int sdhci_allocate_bounce_buffer(struct 
sdhci_host *host)
return 0;
 }
 
+static inline bool sdhci_use_64bit_dma(struct sdhci_host *host)
+{
+   u32 addr64bit_en;
+
+   /*
+* According to SD Host Controller spec v4.10, bit[27] added from
+* version 4.10 in Capabilities Register is used as 64-bit System
+* Address support for V4 mode, 64-bit DMA Addressing for V4 mode
+* is enabled only if 64-bit Addressing =1 in the Host Control 2
+* register.
+*/
+   if (host->version == SDHCI_SPEC_410 && host->v4_mode) {
+   addr64bit_en = (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
+   SDHCI_CTRL_64BIT_ADDR);
+   return addr64bit_en && (host->caps & SDHCI_CAN_64BIT_V4);
+   }
+
+   return host->caps & SDHCI_CAN_64BIT;
+}
+
 int sdhci_setup_host(struct sdhci_host *host)
 {
struct mmc_host *mmc;
@@ -3471,7 +3495,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
 * implement.
 */
-   if 

[PATCH V2 0/7] mmc: add support for sdhci 4.0

2018-06-14 Thread Chunyan Zhang
From: Chunyan Zhang 

>From the SD host controller version 4.0 on, SDHCI implementation either
is version 3 compatible or version 4 mode. This patch-set covers those
changes which are common for SDHCI 4.0 version, regardless of whether
they are used with SD or eMMC storage devices.

This patchset also added a new sdhci driver for Spreadtrum's controller
which supports v4.0 mode.

This patchset has been tested on Spreadtrum's mobile phone, emmc can be
initialized, mounted, read and written, with these changes for common
sdhci framework and sdhci-sprd driver.

Changes from v1:
* Addressed comments from Ulf:
 - Add dt-bindings for Spreadtrum sdhci;
 - Use assigned-clocks* DT bindings to set default source of sdio clock;
 - Removed unuseful print;
 - Removed two functions which are not used;
 - Add back the missing pm_runtime_put_autosuspend() after adding sdhci host.

* Changed Spreadtrum sdhci driver name to sdhci-sprd.

Chunyan Zhang (7):
  mmc: sdhci: add sd host v4 mode
  mmc: sdhci: made changes for System Address register of SDMA
  mmc: sdhci: add ADMA2 64-bit addressing support for V4 mode
  mmc: sdhci: add 32-bit block count support for v4 mode
  mmc: sdhci: add CMD23 support for v4 mode
  mmc: sdhci-sprd: added Spreadtrum's initial host controller
  dt-bindings: sdhci-sprd: Add bindings for the sdhci-sprd controller

 .../devicetree/bindings/mmc/sdhci-sprd.txt |  41 ++
 drivers/mmc/host/Kconfig   |  13 +
 drivers/mmc/host/Makefile  |   1 +
 drivers/mmc/host/sdhci-sprd.c  | 426 +
 drivers/mmc/host/sdhci.c   |  85 +++-
 drivers/mmc/host/sdhci.h   |  31 +-
 6 files changed, 575 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
 create mode 100644 drivers/mmc/host/sdhci-sprd.c

-- 
2.7.4



[PATCH V2 0/7] mmc: add support for sdhci 4.0

2018-06-14 Thread Chunyan Zhang
From: Chunyan Zhang 

>From the SD host controller version 4.0 on, SDHCI implementation either
is version 3 compatible or version 4 mode. This patch-set covers those
changes which are common for SDHCI 4.0 version, regardless of whether
they are used with SD or eMMC storage devices.

This patchset also added a new sdhci driver for Spreadtrum's controller
which supports v4.0 mode.

This patchset has been tested on Spreadtrum's mobile phone, emmc can be
initialized, mounted, read and written, with these changes for common
sdhci framework and sdhci-sprd driver.

Changes from v1:
* Addressed comments from Ulf:
 - Add dt-bindings for Spreadtrum sdhci;
 - Use assigned-clocks* DT bindings to set default source of sdio clock;
 - Removed unuseful print;
 - Removed two functions which are not used;
 - Add back the missing pm_runtime_put_autosuspend() after adding sdhci host.

* Changed Spreadtrum sdhci driver name to sdhci-sprd.

Chunyan Zhang (7):
  mmc: sdhci: add sd host v4 mode
  mmc: sdhci: made changes for System Address register of SDMA
  mmc: sdhci: add ADMA2 64-bit addressing support for V4 mode
  mmc: sdhci: add 32-bit block count support for v4 mode
  mmc: sdhci: add CMD23 support for v4 mode
  mmc: sdhci-sprd: added Spreadtrum's initial host controller
  dt-bindings: sdhci-sprd: Add bindings for the sdhci-sprd controller

 .../devicetree/bindings/mmc/sdhci-sprd.txt |  41 ++
 drivers/mmc/host/Kconfig   |  13 +
 drivers/mmc/host/Makefile  |   1 +
 drivers/mmc/host/sdhci-sprd.c  | 426 +
 drivers/mmc/host/sdhci.c   |  85 +++-
 drivers/mmc/host/sdhci.h   |  31 +-
 6 files changed, 575 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mmc/sdhci-sprd.txt
 create mode 100644 drivers/mmc/host/sdhci-sprd.c

-- 
2.7.4



[PATCH V2 1/7] mmc: sdhci: add sd host v4 mode

2018-06-14 Thread Chunyan Zhang
For SD host controller version 4.00 or later ones, there're two
modes of implementation - Version 3.00 compatible mode or
Version 4 mode.  This patch introduces a flag to record this.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 6 ++
 drivers/mmc/host/sdhci.h | 6 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2ededa7f..cf5695f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3302,6 +3302,12 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 
*ver, u32 *caps, u32 *caps1)
v = ver ? *ver : sdhci_readw(host, SDHCI_HOST_VERSION);
host->version = (v & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT;
 
+   if (host->version >= SDHCI_SPEC_400) {
+   if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
+   SDHCI_CTRL_V4_MODE)
+   host->v4_mode = true;
+   }
+
if (host->quirks & SDHCI_QUIRK_MISSING_CAPS)
return;
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c95b0a4..128b0ba 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -184,6 +184,7 @@
 #define   SDHCI_CTRL_DRV_TYPE_D0x0030
 #define  SDHCI_CTRL_EXEC_TUNING0x0040
 #define  SDHCI_CTRL_TUNED_CLK  0x0080
+#define  SDHCI_CTRL_V4_MODE0x1000
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE  0x8000
 
 #define SDHCI_CAPABILITIES 0x40
@@ -270,6 +271,8 @@
 #define   SDHCI_SPEC_100   0
 #define   SDHCI_SPEC_200   1
 #define   SDHCI_SPEC_300   2
+#define   SDHCI_SPEC_400   3
+#define   SDHCI_SPEC_410   4
 
 /*
  * End of controller registers.
@@ -551,6 +554,9 @@ struct sdhci_host {
u32 sdma_boundary;
 
unsigned long private[0] cacheline_aligned;
+
+   /* Host Version 4 Enable */
+   boolv4_mode;
 };
 
 struct sdhci_ops {
-- 
2.7.4



[PATCH V2 1/7] mmc: sdhci: add sd host v4 mode

2018-06-14 Thread Chunyan Zhang
For SD host controller version 4.00 or later ones, there're two
modes of implementation - Version 3.00 compatible mode or
Version 4 mode.  This patch introduces a flag to record this.

Signed-off-by: Chunyan Zhang 
---
 drivers/mmc/host/sdhci.c | 6 ++
 drivers/mmc/host/sdhci.h | 6 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2ededa7f..cf5695f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3302,6 +3302,12 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 
*ver, u32 *caps, u32 *caps1)
v = ver ? *ver : sdhci_readw(host, SDHCI_HOST_VERSION);
host->version = (v & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT;
 
+   if (host->version >= SDHCI_SPEC_400) {
+   if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
+   SDHCI_CTRL_V4_MODE)
+   host->v4_mode = true;
+   }
+
if (host->quirks & SDHCI_QUIRK_MISSING_CAPS)
return;
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c95b0a4..128b0ba 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -184,6 +184,7 @@
 #define   SDHCI_CTRL_DRV_TYPE_D0x0030
 #define  SDHCI_CTRL_EXEC_TUNING0x0040
 #define  SDHCI_CTRL_TUNED_CLK  0x0080
+#define  SDHCI_CTRL_V4_MODE0x1000
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE  0x8000
 
 #define SDHCI_CAPABILITIES 0x40
@@ -270,6 +271,8 @@
 #define   SDHCI_SPEC_100   0
 #define   SDHCI_SPEC_200   1
 #define   SDHCI_SPEC_300   2
+#define   SDHCI_SPEC_400   3
+#define   SDHCI_SPEC_410   4
 
 /*
  * End of controller registers.
@@ -551,6 +554,9 @@ struct sdhci_host {
u32 sdma_boundary;
 
unsigned long private[0] cacheline_aligned;
+
+   /* Host Version 4 Enable */
+   boolv4_mode;
 };
 
 struct sdhci_ops {
-- 
2.7.4



Re: [PATCH 4.4 128/268] scsi: sd: Keep disk read-only when re-reading partition

2018-06-14 Thread Martin K. Petersen


Hi Ben,

> (The log message about Write Protect status also reports the
> underlying SCSI device flag and not the combined ro flag, but maybe
> that was intentional.)

I'd prefer for the printk in question to reflect the device-reported
state, not the state of the block device.

> I think this commit should be reverted, both in stable and upstream.
> A proper fix would involve splitting the ro flag into two flags—one
> controlled by user-space and one read from the device—with the
> effective read-only status being the logical-or of those two.

I don't have a problem with distinguishing between current state and an
override flag in the block layer. However, I think an incremental patch
to fix that up is fine. SCSI devices don't typically switch write
protection state on a whim.

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH 4.4 128/268] scsi: sd: Keep disk read-only when re-reading partition

2018-06-14 Thread Martin K. Petersen


Hi Ben,

> (The log message about Write Protect status also reports the
> underlying SCSI device flag and not the combined ro flag, but maybe
> that was intentional.)

I'd prefer for the printk in question to reflect the device-reported
state, not the state of the block device.

> I think this commit should be reverted, both in stable and upstream.
> A proper fix would involve splitting the ro flag into two flags—one
> controlled by user-space and one read from the device—with the
> effective read-only status being the logical-or of those two.

I don't have a problem with distinguishing between current state and an
override flag in the block layer. However, I think an incremental patch
to fix that up is fine. SCSI devices don't typically switch write
protection state on a whim.

-- 
Martin K. Petersen  Oracle Linux Engineering


[PATCH v3 1/3] mtd: rawnand: denali_dt: add more clocks based on IP datasheet

2018-06-14 Thread Masahiro Yamada
According to the Denali User's Guide, this IP needs three clocks:

 - clk: controller core clock

 - clk_x: bus interface clock

 - ecc_clk: clock at which ECC circuitry is run

Currently, denali_dt.c requires a single anonymous clock and its
frequency.  However, the driver needs to get the frequency of "clk_x"
not "clk".  This is confusing because people tend to assume the
anonymous clock means the core clock.  In fact, I got a report of
SOCFPGA breakage because the timing parameters are calculated based
on a wrong frequency.

Instead of the cheesy implementation, the clocks in the real hardware
should be represented in the driver and the DT-binding.

However, adding new clocks would break the existing platforms.  For the
backward compatibility, the driver still accepts a single clock just as
before.  If clk_x is missing, clk_x_rate is set to a hardcoded value.
This is fine for existing DT of Socionext UniPhier, and also fixes the
issue of Altera (Intel) SOCFPGA because both platforms use 200 MHz for
the bus interface clock.

Fixes: 1bb88666775e ("mtd: nand: denali: handle timing parameters by 
setup_data_interface()")
Cc: linux-stable  #4.14+
Reported-by: Richard Weinberger 
Signed-off-by: Masahiro Yamada 
---

Changes in v3:
  - Change the patch order so that the bug-fix one comes the first

Changes in v2:
  - Split patches into sensible chunks

 .../devicetree/bindings/mtd/denali-nand.txt|  5 +++
 drivers/mtd/nand/raw/denali_dt.c   | 49 --
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt 
b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index 0ee8edb..f33da87 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -8,6 +8,9 @@ Required properties:
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
+  - clocks: should contain phandle of the controller core clock, the bus
+interface clock, and the ECC circuit clock.
+  - clock-names: should contain "nand", "nand_x", "ecc"
 
 Optional properties:
   - nand-ecc-step-size: see nand.txt for details.  If present, the value must 
be
@@ -31,5 +34,7 @@ nand: nand@ff90 {
compatible = "altr,socfpga-denali-nand";
reg = <0xff90 0x20>, <0xffb8 0x1000>;
reg-names = "nand_data", "denali_reg";
+   clocks = <_clk>, <_x_clk>, <_ecc_clk>;
+   clock-names = "nand", "nand_x", "ecc";
interrupts = <0 144 4>;
 };
diff --git a/drivers/mtd/nand/raw/denali_dt.c b/drivers/mtd/nand/raw/denali_dt.c
index cfd33e6..ce6239d 100644
--- a/drivers/mtd/nand/raw/denali_dt.c
+++ b/drivers/mtd/nand/raw/denali_dt.c
@@ -27,7 +27,9 @@
 
 struct denali_dt {
struct denali_nand_info denali;
-   struct clk  *clk;
+   struct clk *clk;/* core clock */
+   struct clk *clk_x;  /* bus interface clock */
+   struct clk *clk_ecc;/* ECC circuit clock */
 };
 
 struct denali_dt_data {
@@ -114,24 +116,61 @@ static int denali_dt_probe(struct platform_device *pdev)
if (IS_ERR(denali->host))
return PTR_ERR(denali->host);
 
-   dt->clk = devm_clk_get(>dev, NULL);
+   /*
+* A single anonymous clock is supported for the backward compatibility.
+* New platforms should support all the named clocks.
+*/
+   dt->clk = devm_clk_get(>dev, "nand");
+   if (IS_ERR(dt->clk))
+   dt->clk = devm_clk_get(>dev, NULL);
if (IS_ERR(dt->clk)) {
dev_err(>dev, "no clk available\n");
return PTR_ERR(dt->clk);
}
+
+   dt->clk_x = devm_clk_get(>dev, "nand_x");
+   if (IS_ERR(dt->clk_x))
+   dt->clk_x = NULL;
+
+   dt->clk_ecc = devm_clk_get(>dev, "ecc");
+   if (IS_ERR(dt->clk_ecc))
+   dt->clk_ecc = NULL;
+
ret = clk_prepare_enable(dt->clk);
if (ret)
return ret;
 
-   denali->clk_x_rate = clk_get_rate(dt->clk);
+   ret = clk_prepare_enable(dt->clk_x);
+   if (ret)
+   goto out_disable_clk;
+
+   ret = clk_prepare_enable(dt->clk_ecc);
+   if (ret)
+   goto out_disable_clk_x;
+
+   if (dt->clk_x) {
+   denali->clk_x_rate = clk_get_rate(dt->clk_x);
+   } else {
+   /*
+* Hardcode the clock rates for the backward compatibility.
+* This works for both SOCFPGA and UniPhier.
+*/
+   dev_notice(>dev,
+  "necessary clock is missing. default clock rates are 
used.\n");
+   denali->clk_x_rate = 2;
+   }
 
ret = denali_init(denali);
if (ret)
-   goto out_disable_clk;
+   goto out_disable_clk_ecc;
 
   

[PATCH v3 0/3] mtd: rawnand: denali: add new clocks and improve setup_data_interface

2018-06-14 Thread Masahiro Yamada


The ->setup_data_interface() hook needs to know the clock frequency.
In fact, this IP needs three clocks, bus "which clock?" is really
confusing.  (It is not described in the DT-binding at all.)

This series adds more clocks.  In the new binding, three clocks
are required: core clock, bus interface clock, ECC engine clock.

This series also takes care of the backward compatibility by
providing hardcoded values in case the new clocks are missing.
So, existing DT should work.


Changes in v3:
  - Change the patch order so that the bug-fix one comes the first

Changes in v2:
  - Split patches into sensible chunks
  - Split patches into sensible chunks

Masahiro Yamada (3):
  mtd: rawnand: denali_dt: add more clocks based on IP datasheet
  mtd: rawnand: denali_dt: use dev as a shorthand of >dev
  mtd: rawnand: denali: optimize timing parameters for data interface

 .../devicetree/bindings/mtd/denali-nand.txt|  5 ++
 drivers/mtd/nand/raw/denali.c  | 49 
 drivers/mtd/nand/raw/denali.h  |  1 +
 drivers/mtd/nand/raw/denali_dt.c   | 66 ++
 drivers/mtd/nand/raw/denali_pci.c  |  1 +
 5 files changed, 86 insertions(+), 36 deletions(-)

-- 
2.7.4



[PATCH v3 1/3] mtd: rawnand: denali_dt: add more clocks based on IP datasheet

2018-06-14 Thread Masahiro Yamada
According to the Denali User's Guide, this IP needs three clocks:

 - clk: controller core clock

 - clk_x: bus interface clock

 - ecc_clk: clock at which ECC circuitry is run

Currently, denali_dt.c requires a single anonymous clock and its
frequency.  However, the driver needs to get the frequency of "clk_x"
not "clk".  This is confusing because people tend to assume the
anonymous clock means the core clock.  In fact, I got a report of
SOCFPGA breakage because the timing parameters are calculated based
on a wrong frequency.

Instead of the cheesy implementation, the clocks in the real hardware
should be represented in the driver and the DT-binding.

However, adding new clocks would break the existing platforms.  For the
backward compatibility, the driver still accepts a single clock just as
before.  If clk_x is missing, clk_x_rate is set to a hardcoded value.
This is fine for existing DT of Socionext UniPhier, and also fixes the
issue of Altera (Intel) SOCFPGA because both platforms use 200 MHz for
the bus interface clock.

Fixes: 1bb88666775e ("mtd: nand: denali: handle timing parameters by 
setup_data_interface()")
Cc: linux-stable  #4.14+
Reported-by: Richard Weinberger 
Signed-off-by: Masahiro Yamada 
---

Changes in v3:
  - Change the patch order so that the bug-fix one comes the first

Changes in v2:
  - Split patches into sensible chunks

 .../devicetree/bindings/mtd/denali-nand.txt|  5 +++
 drivers/mtd/nand/raw/denali_dt.c   | 49 --
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt 
b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index 0ee8edb..f33da87 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -8,6 +8,9 @@ Required properties:
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
+  - clocks: should contain phandle of the controller core clock, the bus
+interface clock, and the ECC circuit clock.
+  - clock-names: should contain "nand", "nand_x", "ecc"
 
 Optional properties:
   - nand-ecc-step-size: see nand.txt for details.  If present, the value must 
be
@@ -31,5 +34,7 @@ nand: nand@ff90 {
compatible = "altr,socfpga-denali-nand";
reg = <0xff90 0x20>, <0xffb8 0x1000>;
reg-names = "nand_data", "denali_reg";
+   clocks = <_clk>, <_x_clk>, <_ecc_clk>;
+   clock-names = "nand", "nand_x", "ecc";
interrupts = <0 144 4>;
 };
diff --git a/drivers/mtd/nand/raw/denali_dt.c b/drivers/mtd/nand/raw/denali_dt.c
index cfd33e6..ce6239d 100644
--- a/drivers/mtd/nand/raw/denali_dt.c
+++ b/drivers/mtd/nand/raw/denali_dt.c
@@ -27,7 +27,9 @@
 
 struct denali_dt {
struct denali_nand_info denali;
-   struct clk  *clk;
+   struct clk *clk;/* core clock */
+   struct clk *clk_x;  /* bus interface clock */
+   struct clk *clk_ecc;/* ECC circuit clock */
 };
 
 struct denali_dt_data {
@@ -114,24 +116,61 @@ static int denali_dt_probe(struct platform_device *pdev)
if (IS_ERR(denali->host))
return PTR_ERR(denali->host);
 
-   dt->clk = devm_clk_get(>dev, NULL);
+   /*
+* A single anonymous clock is supported for the backward compatibility.
+* New platforms should support all the named clocks.
+*/
+   dt->clk = devm_clk_get(>dev, "nand");
+   if (IS_ERR(dt->clk))
+   dt->clk = devm_clk_get(>dev, NULL);
if (IS_ERR(dt->clk)) {
dev_err(>dev, "no clk available\n");
return PTR_ERR(dt->clk);
}
+
+   dt->clk_x = devm_clk_get(>dev, "nand_x");
+   if (IS_ERR(dt->clk_x))
+   dt->clk_x = NULL;
+
+   dt->clk_ecc = devm_clk_get(>dev, "ecc");
+   if (IS_ERR(dt->clk_ecc))
+   dt->clk_ecc = NULL;
+
ret = clk_prepare_enable(dt->clk);
if (ret)
return ret;
 
-   denali->clk_x_rate = clk_get_rate(dt->clk);
+   ret = clk_prepare_enable(dt->clk_x);
+   if (ret)
+   goto out_disable_clk;
+
+   ret = clk_prepare_enable(dt->clk_ecc);
+   if (ret)
+   goto out_disable_clk_x;
+
+   if (dt->clk_x) {
+   denali->clk_x_rate = clk_get_rate(dt->clk_x);
+   } else {
+   /*
+* Hardcode the clock rates for the backward compatibility.
+* This works for both SOCFPGA and UniPhier.
+*/
+   dev_notice(>dev,
+  "necessary clock is missing. default clock rates are 
used.\n");
+   denali->clk_x_rate = 2;
+   }
 
ret = denali_init(denali);
if (ret)
-   goto out_disable_clk;
+   goto out_disable_clk_ecc;
 
   

[PATCH v3 0/3] mtd: rawnand: denali: add new clocks and improve setup_data_interface

2018-06-14 Thread Masahiro Yamada


The ->setup_data_interface() hook needs to know the clock frequency.
In fact, this IP needs three clocks, bus "which clock?" is really
confusing.  (It is not described in the DT-binding at all.)

This series adds more clocks.  In the new binding, three clocks
are required: core clock, bus interface clock, ECC engine clock.

This series also takes care of the backward compatibility by
providing hardcoded values in case the new clocks are missing.
So, existing DT should work.


Changes in v3:
  - Change the patch order so that the bug-fix one comes the first

Changes in v2:
  - Split patches into sensible chunks
  - Split patches into sensible chunks

Masahiro Yamada (3):
  mtd: rawnand: denali_dt: add more clocks based on IP datasheet
  mtd: rawnand: denali_dt: use dev as a shorthand of >dev
  mtd: rawnand: denali: optimize timing parameters for data interface

 .../devicetree/bindings/mtd/denali-nand.txt|  5 ++
 drivers/mtd/nand/raw/denali.c  | 49 
 drivers/mtd/nand/raw/denali.h  |  1 +
 drivers/mtd/nand/raw/denali_dt.c   | 66 ++
 drivers/mtd/nand/raw/denali_pci.c  |  1 +
 5 files changed, 86 insertions(+), 36 deletions(-)

-- 
2.7.4



[PATCH v3 3/3] mtd: rawnand: denali: optimize timing parameters for data interface

2018-06-14 Thread Masahiro Yamada
This commit improves the ->setup_data_interface() hook.

The denali_setup_data_interface() needs the frequency of clk_x
and the ratio of clk_x / clk.

The latter is currently hardcoded in the driver, like this:

  #define DENALI_CLK_X_MULT   6

The IP datasheet requires that clk_x / clk be 4, 5, or 6.  I just
chose 6 because it is the most defensive value, but it is not optimal.
By getting the clock rate of both "clk" and "clk_x", the driver can
compute the timing values more precisely.

To not break the existing platforms, the fallback value, 50 MHz is
provided.  It is true for all upstreamed platforms.

Signed-off-by: Masahiro Yamada 
---

Changes in v3: None
Changes in v2:
  - Split patches into sensible chunks

 drivers/mtd/nand/raw/denali.c | 49 +++
 drivers/mtd/nand/raw/denali.h |  1 +
 drivers/mtd/nand/raw/denali_dt.c  |  2 ++
 drivers/mtd/nand/raw/denali_pci.c |  1 +
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 2a302a1..2de46d4 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -51,14 +51,6 @@ MODULE_LICENSE("GPL");
 #define DENALI_INVALID_BANK-1
 #define DENALI_NR_BANKS4
 
-/*
- * The bus interface clock, clk_x, is phase aligned with the core clock.  The
- * clk_x is an integral multiple N of the core clk.  The value N is configured
- * at IP delivery time, and its available value is 4, 5, or 6.  We need to 
align
- * to the largest value to make it work with any possible configuration.
- */
-#define DENALI_CLK_X_MULT  6
-
 static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 {
return container_of(mtd_to_nand(mtd), struct denali_nand_info, nand);
@@ -954,7 +946,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
 {
struct denali_nand_info *denali = mtd_to_denali(mtd);
const struct nand_sdr_timings *timings;
-   unsigned long t_clk;
+   unsigned long t_x, mult_x;
int acc_clks, re_2_we, re_2_re, we_2_re, addr_2_data;
int rdwr_en_lo, rdwr_en_hi, rdwr_en_lo_hi, cs_setup;
int addr_2_data_mask;
@@ -965,15 +957,24 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
return PTR_ERR(timings);
 
/* clk_x period in picoseconds */
-   t_clk = DIV_ROUND_DOWN_ULL(1ULL, denali->clk_x_rate);
-   if (!t_clk)
+   t_x = DIV_ROUND_DOWN_ULL(1ULL, denali->clk_x_rate);
+   if (!t_x)
+   return -EINVAL;
+
+   /*
+* The bus interface clock, clk_x, is phase aligned with the core clock.
+* The clk_x is an integral multiple N of the core clk.  The value N is
+* configured at IP delivery time, and its available value is 4, 5, 6.
+*/
+   mult_x = DIV_ROUND_CLOSEST_ULL(denali->clk_x_rate, denali->clk_rate);
+   if (mult_x < 4 || mult_x > 6)
return -EINVAL;
 
if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
return 0;
 
/* tREA -> ACC_CLKS */
-   acc_clks = DIV_ROUND_UP(timings->tREA_max, t_clk);
+   acc_clks = DIV_ROUND_UP(timings->tREA_max, t_x);
acc_clks = min_t(int, acc_clks, ACC_CLKS__VALUE);
 
tmp = ioread32(denali->reg + ACC_CLKS);
@@ -982,7 +983,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
iowrite32(tmp, denali->reg + ACC_CLKS);
 
/* tRWH -> RE_2_WE */
-   re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_clk);
+   re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_x);
re_2_we = min_t(int, re_2_we, RE_2_WE__VALUE);
 
tmp = ioread32(denali->reg + RE_2_WE);
@@ -991,7 +992,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
iowrite32(tmp, denali->reg + RE_2_WE);
 
/* tRHZ -> RE_2_RE */
-   re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_clk);
+   re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_x);
re_2_re = min_t(int, re_2_re, RE_2_RE__VALUE);
 
tmp = ioread32(denali->reg + RE_2_RE);
@@ -1005,8 +1006,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
 * With WE_2_RE properly set, the Denali controller automatically takes
 * care of the delay; the driver need not set NAND_WAIT_TCCS.
 */
-   we_2_re = DIV_ROUND_UP(max(timings->tCCS_min, timings->tWHR_min),
-  t_clk);
+   we_2_re = DIV_ROUND_UP(max(timings->tCCS_min, timings->tWHR_min), t_x);
we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
 
tmp = ioread32(denali->reg + TWHR2_AND_WE_2_RE);
@@ -1021,7 +1021,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
if (denali->revision < 0x0501)
addr_2_data_mask >>= 1;
 
-   addr_2_data = DIV_ROUND_UP(timings->tADL_min, t_clk);
+   addr_2_data = 

[PATCH v3 3/3] mtd: rawnand: denali: optimize timing parameters for data interface

2018-06-14 Thread Masahiro Yamada
This commit improves the ->setup_data_interface() hook.

The denali_setup_data_interface() needs the frequency of clk_x
and the ratio of clk_x / clk.

The latter is currently hardcoded in the driver, like this:

  #define DENALI_CLK_X_MULT   6

The IP datasheet requires that clk_x / clk be 4, 5, or 6.  I just
chose 6 because it is the most defensive value, but it is not optimal.
By getting the clock rate of both "clk" and "clk_x", the driver can
compute the timing values more precisely.

To not break the existing platforms, the fallback value, 50 MHz is
provided.  It is true for all upstreamed platforms.

Signed-off-by: Masahiro Yamada 
---

Changes in v3: None
Changes in v2:
  - Split patches into sensible chunks

 drivers/mtd/nand/raw/denali.c | 49 +++
 drivers/mtd/nand/raw/denali.h |  1 +
 drivers/mtd/nand/raw/denali_dt.c  |  2 ++
 drivers/mtd/nand/raw/denali_pci.c |  1 +
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 2a302a1..2de46d4 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -51,14 +51,6 @@ MODULE_LICENSE("GPL");
 #define DENALI_INVALID_BANK-1
 #define DENALI_NR_BANKS4
 
-/*
- * The bus interface clock, clk_x, is phase aligned with the core clock.  The
- * clk_x is an integral multiple N of the core clk.  The value N is configured
- * at IP delivery time, and its available value is 4, 5, or 6.  We need to 
align
- * to the largest value to make it work with any possible configuration.
- */
-#define DENALI_CLK_X_MULT  6
-
 static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 {
return container_of(mtd_to_nand(mtd), struct denali_nand_info, nand);
@@ -954,7 +946,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
 {
struct denali_nand_info *denali = mtd_to_denali(mtd);
const struct nand_sdr_timings *timings;
-   unsigned long t_clk;
+   unsigned long t_x, mult_x;
int acc_clks, re_2_we, re_2_re, we_2_re, addr_2_data;
int rdwr_en_lo, rdwr_en_hi, rdwr_en_lo_hi, cs_setup;
int addr_2_data_mask;
@@ -965,15 +957,24 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
return PTR_ERR(timings);
 
/* clk_x period in picoseconds */
-   t_clk = DIV_ROUND_DOWN_ULL(1ULL, denali->clk_x_rate);
-   if (!t_clk)
+   t_x = DIV_ROUND_DOWN_ULL(1ULL, denali->clk_x_rate);
+   if (!t_x)
+   return -EINVAL;
+
+   /*
+* The bus interface clock, clk_x, is phase aligned with the core clock.
+* The clk_x is an integral multiple N of the core clk.  The value N is
+* configured at IP delivery time, and its available value is 4, 5, 6.
+*/
+   mult_x = DIV_ROUND_CLOSEST_ULL(denali->clk_x_rate, denali->clk_rate);
+   if (mult_x < 4 || mult_x > 6)
return -EINVAL;
 
if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
return 0;
 
/* tREA -> ACC_CLKS */
-   acc_clks = DIV_ROUND_UP(timings->tREA_max, t_clk);
+   acc_clks = DIV_ROUND_UP(timings->tREA_max, t_x);
acc_clks = min_t(int, acc_clks, ACC_CLKS__VALUE);
 
tmp = ioread32(denali->reg + ACC_CLKS);
@@ -982,7 +983,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
iowrite32(tmp, denali->reg + ACC_CLKS);
 
/* tRWH -> RE_2_WE */
-   re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_clk);
+   re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_x);
re_2_we = min_t(int, re_2_we, RE_2_WE__VALUE);
 
tmp = ioread32(denali->reg + RE_2_WE);
@@ -991,7 +992,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
iowrite32(tmp, denali->reg + RE_2_WE);
 
/* tRHZ -> RE_2_RE */
-   re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_clk);
+   re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_x);
re_2_re = min_t(int, re_2_re, RE_2_RE__VALUE);
 
tmp = ioread32(denali->reg + RE_2_RE);
@@ -1005,8 +1006,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
 * With WE_2_RE properly set, the Denali controller automatically takes
 * care of the delay; the driver need not set NAND_WAIT_TCCS.
 */
-   we_2_re = DIV_ROUND_UP(max(timings->tCCS_min, timings->tWHR_min),
-  t_clk);
+   we_2_re = DIV_ROUND_UP(max(timings->tCCS_min, timings->tWHR_min), t_x);
we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
 
tmp = ioread32(denali->reg + TWHR2_AND_WE_2_RE);
@@ -1021,7 +1021,7 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
if (denali->revision < 0x0501)
addr_2_data_mask >>= 1;
 
-   addr_2_data = DIV_ROUND_UP(timings->tADL_min, t_clk);
+   addr_2_data = 

  1   2   3   4   5   6   7   8   9   10   >