[PATCH v2 3/3] mm/memory_hotplug: fix unpaired mem_hotplug_begin/done

2020-07-06 Thread Jia He
When check_memblock_offlined_cb() returns failed rc(e.g. the memblock is
online at that time), mem_hotplug_begin/done is unpaired in such case.

Therefore a warning:
 Call Trace:
  percpu_up_write+0x33/0x40
  try_remove_memory+0x66/0x120
  ? _cond_resched+0x19/0x30
  remove_memory+0x2b/0x40
  dev_dax_kmem_remove+0x36/0x72 [kmem]
  device_release_driver_internal+0xf0/0x1c0
  device_release_driver+0x12/0x20
  bus_remove_device+0xe1/0x150
  device_del+0x17b/0x3e0
  unregister_dev_dax+0x29/0x60
  devm_action_release+0x15/0x20
  release_nodes+0x19a/0x1e0
  devres_release_all+0x3f/0x50
  device_release_driver_internal+0x100/0x1c0
  driver_detach+0x4c/0x8f
  bus_remove_driver+0x5c/0xd0
  driver_unregister+0x31/0x50
  dax_pmem_exit+0x10/0xfe0 [dax_pmem]

Fixes: f1037ec0cc8a ("mm/memory_hotplug: fix remove_memory() lockdep splat")
Cc: sta...@vger.kernel.org # v5.6+
Signed-off-by: Jia He 
---
 mm/memory_hotplug.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index da374cd3d45b..76c75a599da3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1742,7 +1742,7 @@ static int __ref try_remove_memory(int nid, u64 start, 
u64 size)
 */
rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
if (rc)
-   goto done;
+   return rc;
 
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
@@ -1766,9 +1766,8 @@ static int __ref try_remove_memory(int nid, u64 start, 
u64 size)
 
try_offline_node(nid);
 
-done:
mem_hotplug_done();
-   return rc;
+   return 0;
 }
 
 /**
-- 
2.17.1



Re: [PATCH v3 0/6] powerpc: queued spinlocks and rwlocks

2020-07-06 Thread Nicholas Piggin
Excerpts from Waiman Long's message of July 7, 2020 4:39 am:
> On 7/6/20 12:35 AM, Nicholas Piggin wrote:
>> v3 is updated to use __pv_queued_spin_unlock, noticed by Waiman (thank you).
>>
>> Thanks,
>> Nick
>>
>> Nicholas Piggin (6):
>>powerpc/powernv: must include hvcall.h to get PAPR defines
>>powerpc/pseries: move some PAPR paravirt functions to their own file
>>powerpc: move spinlock implementation to simple_spinlock
>>powerpc/64s: implement queued spinlocks and rwlocks
>>powerpc/pseries: implement paravirt qspinlocks for SPLPAR
>>powerpc/qspinlock: optimised atomic_try_cmpxchg_lock that adds the
>>  lock hint
>>
>>   arch/powerpc/Kconfig  |  13 +
>>   arch/powerpc/include/asm/Kbuild   |   2 +
>>   arch/powerpc/include/asm/atomic.h |  28 ++
>>   arch/powerpc/include/asm/paravirt.h   |  89 +
>>   arch/powerpc/include/asm/qspinlock.h  |  91 ++
>>   arch/powerpc/include/asm/qspinlock_paravirt.h |   7 +
>>   arch/powerpc/include/asm/simple_spinlock.h| 292 +
>>   .../include/asm/simple_spinlock_types.h   |  21 ++
>>   arch/powerpc/include/asm/spinlock.h   | 308 +-
>>   arch/powerpc/include/asm/spinlock_types.h |  17 +-
>>   arch/powerpc/lib/Makefile |   3 +
>>   arch/powerpc/lib/locks.c  |  12 +-
>>   arch/powerpc/platforms/powernv/pci-ioda-tce.c |   1 +
>>   arch/powerpc/platforms/pseries/Kconfig|   5 +
>>   arch/powerpc/platforms/pseries/setup.c|   6 +-
>>   include/asm-generic/qspinlock.h   |   4 +
>>   16 files changed, 577 insertions(+), 322 deletions(-)
>>   create mode 100644 arch/powerpc/include/asm/paravirt.h
>>   create mode 100644 arch/powerpc/include/asm/qspinlock.h
>>   create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
>>   create mode 100644 arch/powerpc/include/asm/simple_spinlock.h
>>   create mode 100644 arch/powerpc/include/asm/simple_spinlock_types.h
>>
> This patch looks OK to me.

Thanks for reviewing and testing.

> I had run some microbenchmark on powerpc system with or w/o the patch.
> 
> On a 2-socket 160-thread SMT4 POWER9 system (not virtualized):
> 
> 5.8.0-rc4
> =
> 
> Running locktest with spinlock [runtime = 10s, load = 1]
> Threads = 160, Min/Mean/Max = 77,665/90,153/106,895
> Threads = 160, Total Rate = 1,441,759 op/s; Percpu Rate = 9,011 op/s
> 
> Running locktest with rwlock [runtime = 10s, r% = 50%, load = 1]
> Threads = 160, Min/Mean/Max = 47,879/53,807/63,689
> Threads = 160, Total Rate = 860,192 op/s; Percpu Rate = 5,376 op/s
> 
> Running locktest with spinlock [runtime = 10s, load = 1]
> Threads = 80, Min/Mean/Max = 242,907/319,514/463,161
> Threads = 80, Total Rate = 2,555 kop/s; Percpu Rate = 32 kop/s
> 
> Running locktest with rwlock [runtime = 10s, r% = 50%, load = 1]
> Threads = 80, Min/Mean/Max = 146,161/187,474/259,270
> Threads = 80, Total Rate = 1,498 kop/s; Percpu Rate = 19 kop/s
> 
> Running locktest with spinlock [runtime = 10s, load = 1]
> Threads = 40, Min/Mean/Max = 646,639/1,000,817/1,455,205
> Threads = 40, Total Rate = 4,001 kop/s; Percpu Rate = 100 kop/s
> 
> Running locktest with rwlock [runtime = 10s, r% = 50%, load = 1]
> Threads = 40, Min/Mean/Max = 402,165/597,132/814,555
> Threads = 40, Total Rate = 2,388 kop/s; Percpu Rate = 60 kop/s
> 
> 5.8.0-rc4-qlock+
> 
> 
> Running locktest with spinlock [runtime = 10s, load = 1]
> Threads = 160, Min/Mean/Max = 123,835/124,580/124,587
> Threads = 160, Total Rate = 1,992 kop/s; Percpu Rate = 12 kop/s
> 
> Running locktest with rwlock [runtime = 10s, r% = 50%, load = 1]
> Threads = 160, Min/Mean/Max = 254,210/264,714/276,784
> Threads = 160, Total Rate = 4,231 kop/s; Percpu Rate = 26 kop/s
> 
> Running locktest with spinlock [runtime = 10s, load = 1]
> Threads = 80, Min/Mean/Max = 599,715/603,397/603,450
> Threads = 80, Total Rate = 4,825 kop/s; Percpu Rate = 60 kop/s
> 
> Running locktest with rwlock [runtime = 10s, r% = 50%, load = 1]
> Threads = 80, Min/Mean/Max = 492,687/525,224/567,456
> Threads = 80, Total Rate = 4,199 kop/s; Percpu Rate = 52 kop/s
> 
> Running locktest with spinlock [runtime = 10s, load = 1]
> Threads = 40, Min/Mean/Max = 1,325,623/1,325,628/1,325,636
> Threads = 40, Total Rate = 5,299 kop/s; Percpu Rate = 132 kop/s
> 
> Running locktest with rwlock [runtime = 10s, r% = 50%, load = 1]
> Threads = 40, Min/Mean/Max = 1,249,731/1,292,977/1,342,815
> Threads = 40, Total Rate = 5,168 kop/s; Percpu Rate = 129 kop/s
> 
> On systems on large number of cpus, qspinlock lock is faster and more fair.
> 
> With some tuning, we may be able to squeeze out more performance.

Yes, powerpc could certainly get more performance out of the slow
paths, and then there are a few parameters to tune.

We don't have a good alternate patching for function calls yet, but
that would be something to do for native vs pv.

And then there seem to be one or 

Re: [Ksummit-discuss] [PATCH] CodingStyle: Inclusive Terminology

2020-07-06 Thread Kees Cook
On Mon, Jul 06, 2020 at 09:29:46AM -0700, Andy Lutomirski wrote:
> Is most contexts where 'whitelist' or 'blacklist' might be used, a
> descriptive phrase could be used instead.  For example, a seccomp
> filter could have a 'list of allowed syscalls' or a 'list of
> disallowed syscalls', and just lists could be the 'allowed' or
> 'accepted' lists and the 'disallowed', 'rejected', or 'blocked' lists.
> If a single word replacement for 'whitelist' or 'blacklist' is needed,
> 'allowlist', 'blocklist', or 'denylist' could be used.

Yup. See:
https://lore.kernel.org/lkml/202007041703.51F4059CA@keescook/
specifically the terminology for seccomp is already "allow-list" and
"deny-list":
https://github.com/mkerrisk/man-pages/commit/462ce23d491904a0b46252dc97c8cb42391c093e
 (last year)
https://github.com/seccomp/libseccomp/commit/0e762521d604612bb4dca8867d4a428a5e6cae54
 (last month)

> Second, I realize that I grew up thinking that 'whitelist' and
> 'blacklist' are the common terms for lists of things to be accepted
> and rejected and that this biases my perception of what sounds good,
> but writing a seccomp "denylist" or "blocklist" doesn't seem to roll
> off the tongue.  Perhaps this language would be better:

I have struggled with this as well. The parts of speech change, and my
grammar senses go weird. whitelist = adjective noun. allow-list = verb
noun. verbing the adj/noun combo feels okay, but verbing a verb/noun is
weird.

And just using "allowed" and "denied" doesn't impart whether it refers
to a _single_ instance or a _list_ of instances.

But that's all fine. The change is easy to do and is more descriptive
even if I can't find terms that don't collide with my internal grammar
checker. ;)

-- 
Kees Cook


Re: [PATCH] powerpc/pseries: detect secure and trusted boot state of the system.

2020-07-06 Thread Michael Ellerman
Nayna Jain  writes:
> The device-tree property to check secure and trusted boot state is
> different for guests(pseries) compared to baremetal(powernv).
>
> This patch updates the existing is_ppc_secureboot_enabled() and
> is_ppc_trustedboot_enabled() function to add support for pseries.
>
> Signed-off-by: Nayna Jain 
> ---
>  arch/powerpc/kernel/secure_boot.c | 31 +--
>  1 file changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kernel/secure_boot.c 
> b/arch/powerpc/kernel/secure_boot.c
> index 4b982324d368..43fc6607c7a5 100644
> --- a/arch/powerpc/kernel/secure_boot.c
> +++ b/arch/powerpc/kernel/secure_boot.c
> @@ -6,6 +6,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  static struct device_node *get_ppc_fw_sb_node(void)
>  {
> @@ -23,11 +24,20 @@ bool is_ppc_secureboot_enabled(void)
>  {
>   struct device_node *node;
>   bool enabled = false;
> + const u32 *secureboot;
>  
> - node = get_ppc_fw_sb_node();
> - enabled = of_property_read_bool(node, "os-secureboot-enforcing");
> + if (machine_is(powernv)) {
> + node = get_ppc_fw_sb_node();
> + enabled =
> + of_property_read_bool(node, "os-secureboot-enforcing");
> + of_node_put(node);
> + }

We generally try to avoid adding new machine_is() checks if we can.

In a case like this I think you can just check for both properties
regardless of what platform you're on.
  
> - of_node_put(node);
> + if (machine_is(pseries)) {
> + secureboot = of_get_property(of_root, "ibm,secure-boot", NULL);
> + if (secureboot)
> + enabled = (*secureboot > 1) ? true : false;
> + }

Please don't use of_get_property() in new code. Use one of the properly
typed accessors that handles endian conversion for you.

cheers


[PATCH] [v2] PCI: qcom: Fix runtime PM imbalance on error

2020-07-06 Thread Dinghao Liu
pm_runtime_get_sync() increments the runtime PM usage counter even
it returns an error code. Thus a pairing decrement is needed on
the error handling path to keep the counter balanced.

Signed-off-by: Dinghao Liu 
---

Changelog:

v2: - Remove redundant brackets.
---
 drivers/pci/controller/dwc/pcie-qcom.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c 
b/drivers/pci/controller/dwc/pcie-qcom.c
index 138e1a2d21cc..12abdfbff5ca 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1339,10 +1339,8 @@ static int qcom_pcie_probe(struct platform_device *pdev)
 
pm_runtime_enable(dev);
ret = pm_runtime_get_sync(dev);
-   if (ret < 0) {
-   pm_runtime_disable(dev);
-   return ret;
-   }
+   if (ret < 0)
+   goto err_pm_runtime_put;
 
pci->dev = dev;
pci->ops = _pcie_ops;
-- 
2.17.1



Re: [Proposal] drm: amd: Convert logging to drm_* functions with drm_device parameter

2020-07-06 Thread Daniel Vetter
On Mon, Jul 06, 2020 at 04:21:38PM +0530, Suraj Upadhyay wrote:
> Hii Maintainers,
>   I recently came across this list of janatorial tasks
> for starters on DRM subsystem [1]. One of the tasks is replacing
> conventional dmesg macros (like dev_info(), dev_warn() and dev_err())
> with DRM dmesg macros [2]. And I need your input whether the
> conversions to DRM dmesg macros are worth it or not.
> I would like to start working on this task if this needs the change.

For any core code I'm happy to merge such patches. If you're changing a
specific driver (all the subdirectories under drivers/gpu/drm/*) then
please ping the specific driver maintainer first. They should be all
listed in the MAINTAINERS file.

Cheers, Daniel

> 
> Thank you,
> Suraj Upadhyay.
> 
> [1] https://dri.freedesktop.org/docs/drm/gpu/todo.html.
> [2] 
> https://dri.freedesktop.org/docs/drm/gpu/todo.html#convert-logging-to-drm-functions-with-drm-device-paramater
> 



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: mmotm 2020-07-06-18-53 uploaded (sound/soc/amd/renoir/rn-pci-acp3x.c:)

2020-07-06 Thread Randy Dunlap
On 7/6/20 6:53 PM, Andrew Morton wrote:
> The mm-of-the-moment snapshot 2020-07-06-18-53 has been uploaded to
> 
>http://www.ozlabs.org/~akpm/mmotm/
> 
> mmotm-readme.txt says
> 
> README for mm-of-the-moment:
> 
> http://www.ozlabs.org/~akpm/mmotm/
> 
> This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
> more than once a week.
> 
> You will need quilt to apply these patches to the latest Linus release (5.x
> or 5.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
> http://ozlabs.org/~akpm/mmotm/series
> 

on i386:

when CONFIG_ACPI is not set/enabled:

../sound/soc/amd/renoir/rn-pci-acp3x.c: In function ‘snd_rn_acp_probe’:
../sound/soc/amd/renoir/rn-pci-acp3x.c:222:9: error: implicit declaration of 
function ‘acpi_evaluate_integer’; did you mean ‘acpi_evaluate_object’? 
[-Werror=implicit-function-declaration]
   ret = acpi_evaluate_integer(handle, "_WOV", NULL, _status);
 ^
 acpi_evaluate_object



-- 
~Randy
Reported-by: Randy Dunlap 


Re: [mm] 4e2c82a409: ltp.overcommit_memory01.fail

2020-07-06 Thread Feng Tang
On Tue, Jul 07, 2020 at 12:00:09PM +0800, Huang, Ying wrote:
> Feng Tang  writes:
> 
> > On Mon, Jul 06, 2020 at 06:34:34AM -0700, Andi Kleen wrote:
> >> >  ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> >> > -if (ret == 0 && write)
> >> > +if (ret == 0 && write) {
> >> > +if (sysctl_overcommit_memory == OVERCOMMIT_NEVER)
> >> > +schedule_on_each_cpu(sync_overcommit_as);
> >> 
> >> The schedule_on_each_cpu is not atomic, so the problem could still happen
> >> in that window.
> >> 
> >> I think it may be ok if it eventually resolves, but certainly needs
> >> a comment explaining it. Can you do some stress testing toggling the
> >> policy all the time on different CPUs and running the test on
> >> other CPUs and see if the test fails?
> >
> > For the raw test case reported by 0day, this patch passed in 200 times
> > run. And I will read the ltp code and try stress testing it as you
> > suggested.
> >
> >
> >> The other alternative would be to define some intermediate state
> >> for the sysctl variable and only switch to never once the 
> >> schedule_on_each_cpu
> >> returned. But that's more complexity.
> >
> > One thought I had is to put this schedule_on_each_cpu() before
> > the proc_dointvec_minmax() to do the sync before sysctl_overcommit_memory
> > is really changed. But the window still exists, as the batch is
> > still the larger one. 
> 
> Can we change the batch firstly, then sync the global counter, finally
> change the overcommit policy?

These reorderings are really head scratching :)

I've thought about this before when Qian Cai first reported the warning
message, as kernel had a check: 

VM_WARN_ONCE(percpu_counter_read(_committed_as) <
-(s64)vm_committed_as_batch * num_online_cpus(),
"memory commitment underflow");

If the batch is decreased first, the warning will be easier/earlier to be
triggered, so I didn't brought this up when handling the warning message.

But it might work now, as the warning has been removed.

Thanks,
Feng





[PATCH] Driver-API: Documentation: Replace deprecated :c:func: Usage

2020-07-06 Thread Puranjay Mohan
Replace :c:func: with func() as the previous usage is deprecated.

Signed-off-by: Puranjay Mohan 
---
 Documentation/driver-api/device-io.rst | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Documentation/driver-api/device-io.rst 
b/Documentation/driver-api/device-io.rst
index 0e389378f71d..764963876d08 100644
--- a/Documentation/driver-api/device-io.rst
+++ b/Documentation/driver-api/device-io.rst
@@ -36,14 +36,14 @@ are starting with one. Physical addresses are of type 
unsigned long.
 
 This address should not be used directly. Instead, to get an address
 suitable for passing to the accessor functions described below, you
-should call :c:func:`ioremap()`. An address suitable for accessing
+should call ioremap(). An address suitable for accessing
 the device will be returned to you.
 
 After you've finished using the device (say, in your module's exit
-routine), call :c:func:`iounmap()` in order to return the address
+routine), call iounmap() in order to return the address
 space to the kernel. Most architectures allocate new address space each
-time you call :c:func:`ioremap()`, and they can run out unless you
-call :c:func:`iounmap()`.
+time you call ioremap(), and they can run out unless you
+call iounmap().
 
 Accessing the device
 
@@ -60,8 +60,8 @@ readb_relaxed(), readw_relaxed(), readl_relaxed(), 
readq_relaxed(),
 writeb(), writew(), writel() and writeq().
 
 Some devices (such as framebuffers) would like to use larger transfers than
-8 bytes at a time. For these devices, the :c:func:`memcpy_toio()`,
-:c:func:`memcpy_fromio()` and :c:func:`memset_io()` functions are
+8 bytes at a time. For these devices, the memcpy_toio(),
+memcpy_fromio() and memset_io() functions are
 provided. Do not use memset or memcpy on IO addresses; they are not
 guaranteed to copy data in order.
 
@@ -135,15 +135,15 @@ Accessing Port Space
 
 Accesses to this space are provided through a set of functions which
 allow 8-bit, 16-bit and 32-bit accesses; also known as byte, word and
-long. These functions are :c:func:`inb()`, :c:func:`inw()`,
-:c:func:`inl()`, :c:func:`outb()`, :c:func:`outw()` and
-:c:func:`outl()`.
+long. These functions are inb(), inw(),
+inl(), outb(), outw() and
+outl().
 
 Some variants are provided for these functions. Some devices require
 that accesses to their ports are slowed down. This functionality is
 provided by appending a ``_p`` to the end of the function.
-There are also equivalents to memcpy. The :c:func:`ins()` and
-:c:func:`outs()` functions copy bytes, words or longs to the given
+There are also equivalents to memcpy. The ins() and
+outs() functions copy bytes, words or longs to the given
 port.
 
 Public Functions Provided
-- 
2.27.0



Re: [PATCH v12 00/31] Speculative page faults

2020-07-06 Thread Chinwen Chang
On Mon, 2020-07-06 at 14:27 +0200, Laurent Dufour wrote:
> Le 06/07/2020 à 11:25, Chinwen Chang a écrit :
> > On Thu, 2019-06-20 at 16:19 +0800, Haiyan Song wrote:
> >> Hi Laurent,
> >>
> >> I downloaded your script and run it on Intel 2s skylake platform with 
> >> spf-v12 patch
> >> serials.
> >>
> >> Here attached the output results of this script.
> >>
> >> The following comparison result is statistics from the script outputs.
> >>
> >> a). Enable THP
> >>  SPF_0  change   
> >> SPF_1
> >> will-it-scale.page_fault2.per_thread_ops2664190.8  -11.7%   
> >> 2353637.6
> >> will-it-scale.page_fault3.per_thread_ops4480027.2  -14.7%   
> >> 3819331.9
> >>
> >>
> >> b). Disable THP
> >>  SPF_0   change  
> >> SPF_1
> >> will-it-scale.page_fault2.per_thread_ops2653260.7   -10%
> >> 2385165.8
> >> will-it-scale.page_fault3.per_thread_ops4436330.1   -12.4%  
> >> 3886734.2
> >>
> >>
> >> Thanks,
> >> Haiyan Song
> >>
> >>
> >> On Fri, Jun 14, 2019 at 10:44:47AM +0200, Laurent Dufour wrote:
> >>> Le 14/06/2019 à 10:37, Laurent Dufour a écrit :
>  Please find attached the script I run to get these numbers.
>  This would be nice if you could give it a try on your victim node and 
>  share the result.
> >>>
> >>> Sounds that the Intel mail fitering system doesn't like the attached 
> >>> shell script.
> >>> Please find it there: 
> >>> https://urldefense.com/v3/__https://gist.github.com/ldu4/a5cc1a93f293108ea387d43d5d5e7f44__;!!CTRNKA9wMg0ARbw!0lux2FMCbIFxFEl824CdSuSQqT0IVWsvyUqfDVJNEVb9gTWyRltm7cpPZg70N_XhXmMZ$
> >>>  
> >>>
> >>> Thanks,
> >>> Laurent.
> >>>
> > 
> > Hi Laurent,
> > 
> > We merged SPF v11 and some patches from v12 into our platforms. After
> > several experiments, we observed SPF has obvious improvements on the
> > launch time of applications, especially for those high-TLP ones,
> > 
> > # launch time of applications(s):
> > 
> > package   version  w/ SPF  w/o SPF  improve(%)
> > --
> > Baidu maps10.13.3  0.887   0.98 9.49
> > Taobao8.4.0.35 1.227   1.2935.10
> > Meituan   9.12.401 1.107   1.54328.26
> > WeChat7.0.32.353   2.68 12.20
> > Honor of Kings1.43.1.6 6.636.7131.24
> 
> That's great news, thanks for reporting this!
> 
> > 
> > By the way, we have verified our platforms with those patches and
> > achieved the goal of mass production.
> 
> Another good news!
> For my information, what is your targeted hardware?
> 
> Cheers,
> Laurent.

Hi Laurent,

Our targeted hardware belongs to ARM64 multi-core series.

Thanks.
Chinwen
> 



Re: [PATCH v3 3/3] Force all cpus to exit VMX root operation on crash/panic reliably

2020-07-06 Thread Sean Christopherson
On Sun, Jul 05, 2020 at 01:53:39PM -0700, Andy Lutomirski wrote:
> On Sun, Jul 5, 2020 at 1:00 PM David P. Reed  wrote:
> >
> > On Sunday, July 5, 2020 2:26pm, "Andy Lutomirski"  said:
> > > As a minor caveat, doing cr4_clear_bits() in NMI context is not really
> > > okay, but we're about to reboot, so nothing too awful should happen.
> > > And this has very little to do with your patch.
> >
> > I had wondered why the bit is cleared, too. (I assumed it was OK or
> > desirable, because it was being cleared in NMI context before). Happy to
> > submit a separate patch to eliminate that issue as well, since the point of
> > emergency vmxoff is only to get out of VMX root mode - CR4.VMXE's state is
> > irrelevant. Of course, clearing it prevents any future emergency vmxoff
> > attempts. (there seemed to be some confusion about "enabling" VMX vs. "in
> > VMX operation" in the comments)  Should I?
> 
> I have a vague recollection of some firmwares that got upset if rebooted with
> CR4.VMXE set.  Sean?

Hmm, rebooting with CR4.VMXE=1 shouldn't be a problem.  VMXON does all the
special stuff that causes problems with reboot, e.g. blocks INIT, prevents
disabling paging, etc...

That being said, I think it makes sense to keep the clearing of CR4.VMXE out
of paranoia as BIOS will be BIOS, and there is no real downside.  Not only
is the system about to reboot, but the CPUs that call cr4_clear_bits() from
NMI context are also being put into an infinite loop by crash_nmi_callback(),
i.e. they never leave NMI context.  And we rely on that behavior, otherwise
KVM could set CR4.VMXE and do VMXON after the NMI and the whole thing would
be for naught.
 
> The real issue here is that the percpu CR4 machinery uses IRQ-offness as a
> lock, and NMI breaks this.


RE: [PATCH v1 2/2] scsi: ufs: change ufshcd_comp_devman_upiu() to ufshcd_compose_devman_upiu()

2020-07-06 Thread Avri Altman
 
> 
> From: Bean Huo 
> 
> ufshcd_comp_devman_upiu() alwasy make me confuse that it is a request
> completion calling function. Change it to ufshcd_compose_devman_upiu().
> 
> Signed-off-by: Bean Huo 
Acked-by: Avri Altman 


drivers/video/fbdev/sstfb.c:337:9: sparse: sparse: incorrect type in argument 1 (different address spaces)

2020-07-06 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   bfe91da29bfad9941d5d703d45e29f0812a20724
commit: 670d0a4b10704667765f7d18f7592993d02783aa sparse: use identifiers to 
define address spaces
date:   3 weeks ago
config: s390-randconfig-s032-20200707 (attached as .config)
compiler: s390-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.2-31-gabbfd661-dirty
git checkout 670d0a4b10704667765f7d18f7592993d02783aa
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=s390 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)

>> drivers/video/fbdev/sstfb.c:337:9: sparse: sparse: incorrect type in 
>> argument 1 (different address spaces) @@ expected void *s @@ got 
>> char [noderef] __iomem *screen_base @@
   drivers/video/fbdev/sstfb.c:337:9: sparse: expected void *s
   drivers/video/fbdev/sstfb.c:337:9: sparse: got char [noderef] __iomem 
*screen_base
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
 

Re: [PATCH] pinctrl: qcom: ipq8074: route gpio interrupts to APPS

2020-07-06 Thread Bjorn Andersson
On Mon 06 Jul 21:58 PDT 2020, Kathiravan T wrote:

> set target proc as APPS to route the gpio interrupts to APPS
> 
> Signed-off-by: Rajkumar Ayyasamy 
> Signed-off-by: Kathiravan T 

This says "first Rajkumar certified the patch's origin, then you picked
it up and certified its origin". As such I would expect that Rajkumar is
the author of the patch.

If you both came up with the patch add a Co-developed-by: tag.

Regards,
Bjorn

> ---
>  drivers/pinctrl/qcom/pinctrl-ipq8074.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8074.c 
> b/drivers/pinctrl/qcom/pinctrl-ipq8074.c
> index 0edd41c..aec68b1 100644
> --- a/drivers/pinctrl/qcom/pinctrl-ipq8074.c
> +++ b/drivers/pinctrl/qcom/pinctrl-ipq8074.c
> @@ -50,6 +50,7 @@
>   .intr_enable_bit = 0,   \
>   .intr_status_bit = 0,   \
>   .intr_target_bit = 5,   \
> + .intr_target_kpss_val = 3,  \
>   .intr_raw_status_bit = 4,   \
>   .intr_polarity_bit = 1, \
>   .intr_detection_bit = 2,\
> -- 
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
> Code Aurora Forum, hosted by The Linux Foundation
> 


[PATCH V6 6/6] clk: imx6sl: Fix build warning reported by kernel test robot

2020-07-06 Thread Anson Huang
Use readl_relaxed() instead of __raw_readl(), and use BIT(x)
instead of (1 << X) to fix below build warning reported by kernel
test robot:

drivers/clk/imx/clk-imx6sl.c:149:49: warning: Shifting signed 32-bit
value by 31 bits is undefined behaviour [shiftTooManyBitsSigned]
 while (!(__raw_readl(anatop_base + PLL_ARM) & BM_PLL_ARM_LOCK))

Signed-off-by: Anson Huang 
Reported-by: kernel test robot 
---
New patch.
---
 drivers/clk/imx/clk-imx6sl.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/clk/imx/clk-imx6sl.c b/drivers/clk/imx/clk-imx6sl.c
index 0f647d1..e69dba1 100644
--- a/drivers/clk/imx/clk-imx6sl.c
+++ b/drivers/clk/imx/clk-imx6sl.c
@@ -3,6 +3,7 @@
  * Copyright 2013-2014 Freescale Semiconductor, Inc.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -14,19 +15,19 @@
 #include "clk.h"
 
 #define CCSR   0xc
-#define BM_CCSR_PLL1_SW_CLK_SEL(1 << 2)
+#define BM_CCSR_PLL1_SW_CLK_SELBIT(2)
 #define CACRR  0x10
 #define CDHIPR 0x48
-#define BM_CDHIPR_ARM_PODF_BUSY(1 << 16)
+#define BM_CDHIPR_ARM_PODF_BUSYBIT(16)
 #define ARM_WAIT_DIV_396M  2
 #define ARM_WAIT_DIV_792M  4
 #define ARM_WAIT_DIV_996M  6
 
 #define PLL_ARM0x0
-#define BM_PLL_ARM_DIV_SELECT  (0x7f << 0)
-#define BM_PLL_ARM_POWERDOWN   (1 << 12)
-#define BM_PLL_ARM_ENABLE  (1 << 13)
-#define BM_PLL_ARM_LOCK(1 << 31)
+#define BM_PLL_ARM_DIV_SELECT  0x7f
+#define BM_PLL_ARM_POWERDOWN   BIT(12)
+#define BM_PLL_ARM_ENABLE  BIT(13)
+#define BM_PLL_ARM_LOCKBIT(31)
 #define PLL_ARM_DIV_792M   66
 
 static const char *step_sels[] = { "osc", "pll2_pfd2", };
@@ -145,7 +146,7 @@ static void imx6sl_enable_pll_arm(bool enable)
val |= BM_PLL_ARM_ENABLE;
val &= ~BM_PLL_ARM_POWERDOWN;
writel_relaxed(val, anatop_base + PLL_ARM);
-   while (!(__raw_readl(anatop_base + PLL_ARM) & BM_PLL_ARM_LOCK))
+   while (!(readl_relaxed(anatop_base + PLL_ARM) & 
BM_PLL_ARM_LOCK))
;
} else {
 writel_relaxed(saved_pll_arm, anatop_base + PLL_ARM);
-- 
2.7.4



[PATCH V6 3/6] clk: imx: Add clock configuration for ARMv7 platforms

2020-07-06 Thread Anson Huang
Add CONFIG_CLK_xxx for i.MX ARMv7 platforms, and use it as build option
instead of CONFIG_SOC_xxx, the CONFIG_CLK_xxx will be selected by default
according to CONFIG_SOC_xxx.

Signed-off-by: Anson Huang 
Reviewed-by: Dong Aisheng 
---
Changes since V5:
- make i.MX6/7 platforms clock driver NOT support COMPILE_TEST and
  ONLY support built-in, as they depend on ARCH code/config.
---
 drivers/clk/imx/Kconfig  | 62 +++-
 drivers/clk/imx/Makefile | 30 +++
 2 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig
index ee854ac..e96bd38 100644
--- a/drivers/clk/imx/Kconfig
+++ b/drivers/clk/imx/Kconfig
@@ -2,12 +2,72 @@
 # common clock support for NXP i.MX SoC family.
 config MXC_CLK
tristate "IMX clock"
-   depends on ARCH_MXC
+   depends on ARCH_MXC || COMPILE_TEST
 
 config MXC_CLK_SCU
bool
depends on IMX_SCU
 
+config CLK_IMX1
+   def_bool SOC_IMX1
+   select MXC_CLK
+
+config CLK_IMX21
+   def_bool SOC_IMX21
+   select MXC_CLK
+
+config CLK_IMX25
+   def_bool SOC_IMX25
+   select MXC_CLK
+
+config CLK_IMX27
+   def_bool SOC_IMX27
+   select MXC_CLK
+
+config CLK_IMX31
+   def_bool SOC_IMX31
+   select MXC_CLK
+
+config CLK_IMX35
+   def_bool SOC_IMX35
+   select MXC_CLK
+
+config CLK_IMX5
+   def_bool SOC_IMX5
+   select MXC_CLK
+
+config CLK_IMX6Q
+   def_bool SOC_IMX6Q
+   select MXC_CLK
+
+config CLK_IMX6SL
+   def_bool SOC_IMX6SL
+   select MXC_CLK
+
+config CLK_IMX6SLL
+   def_bool SOC_IMX6SLL
+   select MXC_CLK
+
+config CLK_IMX6SX
+   def_bool SOC_IMX6SX
+   select MXC_CLK
+
+config CLK_IMX6UL
+   def_bool SOC_IMX6UL
+   select MXC_CLK
+
+config CLK_IMX7D
+   def_bool SOC_IMX7D
+   select MXC_CLK
+
+config CLK_IMX7ULP
+   def_bool SOC_IMX7ULP
+   select MXC_CLK
+
+config CLK_VF610
+   def_bool SOC_VF610
+   select MXC_CLK
+
 config CLK_IMX8MM
bool "IMX8MM CCM Clock Driver"
depends on ARCH_MXC
diff --git a/drivers/clk/imx/Makefile b/drivers/clk/imx/Makefile
index 687207d..17f5d12 100644
--- a/drivers/clk/imx/Makefile
+++ b/drivers/clk/imx/Makefile
@@ -31,18 +31,18 @@ obj-$(CONFIG_CLK_IMX8MP) += clk-imx8mp.o
 obj-$(CONFIG_CLK_IMX8MQ) += clk-imx8mq.o
 obj-$(CONFIG_CLK_IMX8QXP) += clk-imx8qxp.o clk-imx8qxp-lpcg.o
 
-obj-$(CONFIG_SOC_IMX1)   += clk-imx1.o
-obj-$(CONFIG_SOC_IMX21)  += clk-imx21.o
-obj-$(CONFIG_SOC_IMX25)  += clk-imx25.o
-obj-$(CONFIG_SOC_IMX27)  += clk-imx27.o
-obj-$(CONFIG_SOC_IMX31)  += clk-imx31.o
-obj-$(CONFIG_SOC_IMX35)  += clk-imx35.o
-obj-$(CONFIG_SOC_IMX5)   += clk-imx5.o
-obj-$(CONFIG_SOC_IMX6Q)  += clk-imx6q.o
-obj-$(CONFIG_SOC_IMX6SL) += clk-imx6sl.o
-obj-$(CONFIG_SOC_IMX6SLL) += clk-imx6sll.o
-obj-$(CONFIG_SOC_IMX6SX) += clk-imx6sx.o
-obj-$(CONFIG_SOC_IMX6UL) += clk-imx6ul.o
-obj-$(CONFIG_SOC_IMX7D)  += clk-imx7d.o
-obj-$(CONFIG_SOC_IMX7ULP) += clk-imx7ulp.o
-obj-$(CONFIG_SOC_VF610)  += clk-vf610.o
+obj-$(CONFIG_CLK_IMX1)   += clk-imx1.o
+obj-$(CONFIG_CLK_IMX21)  += clk-imx21.o
+obj-$(CONFIG_CLK_IMX25)  += clk-imx25.o
+obj-$(CONFIG_CLK_IMX27)  += clk-imx27.o
+obj-$(CONFIG_CLK_IMX31)  += clk-imx31.o
+obj-$(CONFIG_CLK_IMX35)  += clk-imx35.o
+obj-$(CONFIG_CLK_IMX5)   += clk-imx5.o
+obj-$(CONFIG_CLK_IMX6Q)  += clk-imx6q.o
+obj-$(CONFIG_CLK_IMX6SL) += clk-imx6sl.o
+obj-$(CONFIG_CLK_IMX6SLL) += clk-imx6sll.o
+obj-$(CONFIG_CLK_IMX6SX) += clk-imx6sx.o
+obj-$(CONFIG_CLK_IMX6UL) += clk-imx6ul.o
+obj-$(CONFIG_CLK_IMX7D)  += clk-imx7d.o
+obj-$(CONFIG_CLK_IMX7ULP) += clk-imx7ulp.o
+obj-$(CONFIG_CLK_VF610)  += clk-vf610.o
-- 
2.7.4



[PATCH V6 2/6] clk: imx: Support building i.MX common clock driver as module

2020-07-06 Thread Anson Huang
There are more and more requirements of building SoC specific drivers
as modules, add support for building i.MX common clock driver as module
to meet the requirement.

Signed-off-by: Anson Huang 
---
No change.
---
 drivers/clk/imx/Kconfig|  8 ++--
 drivers/clk/imx/Makefile   | 40 +++---
 drivers/clk/imx/clk-composite-8m.c |  2 ++
 drivers/clk/imx/clk-cpu.c  |  2 ++
 drivers/clk/imx/clk-frac-pll.c |  2 ++
 drivers/clk/imx/clk-gate2.c|  2 ++
 drivers/clk/imx/clk-pll14xx.c  |  5 +
 drivers/clk/imx/clk-sscg-pll.c |  2 ++
 drivers/clk/imx/clk.c  | 17 
 drivers/clk/imx/clk.h  |  6 ++
 10 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig
index db0253f..ee854ac 100644
--- a/drivers/clk/imx/Kconfig
+++ b/drivers/clk/imx/Kconfig
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # common clock support for NXP i.MX SoC family.
 config MXC_CLK
-   bool
-   def_bool ARCH_MXC
+   tristate "IMX clock"
+   depends on ARCH_MXC
 
 config MXC_CLK_SCU
bool
@@ -11,24 +11,28 @@ config MXC_CLK_SCU
 config CLK_IMX8MM
bool "IMX8MM CCM Clock Driver"
depends on ARCH_MXC
+   select MXC_CLK
help
Build the driver for i.MX8MM CCM Clock Driver
 
 config CLK_IMX8MN
bool "IMX8MN CCM Clock Driver"
depends on ARCH_MXC
+   select MXC_CLK
help
Build the driver for i.MX8MN CCM Clock Driver
 
 config CLK_IMX8MP
bool "IMX8MP CCM Clock Driver"
depends on ARCH_MXC
+   select MXC_CLK
help
Build the driver for i.MX8MP CCM Clock Driver
 
 config CLK_IMX8MQ
bool "IMX8MQ CCM Clock Driver"
depends on ARCH_MXC
+   select MXC_CLK
help
Build the driver for i.MX8MQ CCM Clock Driver
 
diff --git a/drivers/clk/imx/Makefile b/drivers/clk/imx/Makefile
index 928f874..687207d 100644
--- a/drivers/clk/imx/Makefile
+++ b/drivers/clk/imx/Makefile
@@ -1,25 +1,25 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_MXC_CLK) += \
-   clk.o \
-   clk-busy.o \
-   clk-composite-8m.o \
-   clk-cpu.o \
-   clk-composite-7ulp.o \
-   clk-divider-gate.o \
-   clk-fixup-div.o \
-   clk-fixup-mux.o \
-   clk-frac-pll.o \
-   clk-gate-exclusive.o \
-   clk-gate2.o \
-   clk-pfd.o \
-   clk-pfdv2.o \
-   clk-pllv1.o \
-   clk-pllv2.o \
-   clk-pllv3.o \
-   clk-pllv4.o \
-   clk-sscg-pll.o \
-   clk-pll14xx.o
+mxc-clk-objs += clk.o
+mxc-clk-objs += clk-busy.o
+mxc-clk-objs += clk-composite-7ulp.o
+mxc-clk-objs += clk-composite-8m.o
+mxc-clk-objs += clk-cpu.o
+mxc-clk-objs += clk-divider-gate.o
+mxc-clk-objs += clk-fixup-div.o
+mxc-clk-objs += clk-fixup-mux.o
+mxc-clk-objs += clk-frac-pll.o
+mxc-clk-objs += clk-gate2.o
+mxc-clk-objs += clk-gate-exclusive.o
+mxc-clk-objs += clk-pfd.o
+mxc-clk-objs += clk-pfdv2.o
+mxc-clk-objs += clk-pllv1.o
+mxc-clk-objs += clk-pllv2.o
+mxc-clk-objs += clk-pllv3.o
+mxc-clk-objs += clk-pllv4.o
+mxc-clk-objs += clk-pll14xx.o
+mxc-clk-objs += clk-sscg-pll.o
+obj-$(CONFIG_MXC_CLK) += mxc-clk.o
 
 obj-$(CONFIG_MXC_CLK_SCU) += \
clk-scu.o \
diff --git a/drivers/clk/imx/clk-composite-8m.c 
b/drivers/clk/imx/clk-composite-8m.c
index d2b5af8..78fb7e5 100644
--- a/drivers/clk/imx/clk-composite-8m.c
+++ b/drivers/clk/imx/clk-composite-8m.c
@@ -5,6 +5,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -243,3 +244,4 @@ struct clk_hw *imx8m_clk_hw_composite_flags(const char 
*name,
kfree(mux);
return ERR_CAST(hw);
 }
+EXPORT_SYMBOL_GPL(imx8m_clk_hw_composite_flags);
diff --git a/drivers/clk/imx/clk-cpu.c b/drivers/clk/imx/clk-cpu.c
index cb182be..cb6ca4c 100644
--- a/drivers/clk/imx/clk-cpu.c
+++ b/drivers/clk/imx/clk-cpu.c
@@ -5,6 +5,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include "clk.h"
 
@@ -104,3 +105,4 @@ struct clk_hw *imx_clk_hw_cpu(const char *name, const char 
*parent_name,
 
return hw;
 }
+EXPORT_SYMBOL_GPL(imx_clk_hw_cpu);
diff --git a/drivers/clk/imx/clk-frac-pll.c b/drivers/clk/imx/clk-frac-pll.c
index 101e0a3..c703056 100644
--- a/drivers/clk/imx/clk-frac-pll.c
+++ b/drivers/clk/imx/clk-frac-pll.c
@@ -10,6 +10,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -233,3 +234,4 @@ struct clk_hw *imx_clk_hw_frac_pll(const char *name,
 
return hw;
 }
+EXPORT_SYMBOL_GPL(imx_clk_hw_frac_pll);
diff --git a/drivers/clk/imx/clk-gate2.c b/drivers/clk/imx/clk-gate2.c
index b87ab3c..512f675 100644
--- a/drivers/clk/imx/clk-gate2.c
+++ b/drivers/clk/imx/clk-gate2.c
@@ -7,6 +7,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -177,3 +178,4 @@ struct clk_hw *clk_hw_register_gate2(struct device *dev, 
const char *name,
 
return hw;
 }
+EXPORT_SYMBOL_GPL(clk_hw_register_gate2);
diff --git 

[PATCH V6 1/6] clk: composite: Export clk_hw_register_composite()

2020-07-06 Thread Anson Huang
Export clk_hw_register_composite() to support user built as module.

ERROR: modpost: "clk_hw_register_composite" [drivers/clk/imx/mxc-clk.ko]
undefined!

Signed-off-by: Anson Huang 
Reviewed-by: Stephen Boyd 
---
No change.
---
 drivers/clk/clk-composite.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index 7376f57..2ddb54f 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -328,6 +328,7 @@ struct clk_hw *clk_hw_register_composite(struct device 
*dev, const char *name,
   rate_hw, rate_ops, gate_hw,
   gate_ops, flags);
 }
+EXPORT_SYMBOL_GPL(clk_hw_register_composite);
 
 struct clk_hw *clk_hw_register_composite_pdata(struct device *dev,
const char *name,
-- 
2.7.4



[PATCH V6 4/6] clk: imx8m: Support module build

2020-07-06 Thread Anson Huang
Change configuration to "tristate", add module author, description
and license to support building i.MX8M SoCs clock driver as module.

Signed-off-by: Anson Huang 
Reviewed-by: Dong Aisheng 
---
No change.
---
 drivers/clk/imx/Kconfig  | 16 
 drivers/clk/imx/clk-imx8mm.c |  4 
 drivers/clk/imx/clk-imx8mn.c |  4 
 drivers/clk/imx/clk-imx8mp.c |  4 
 drivers/clk/imx/clk-imx8mq.c |  4 
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig
index e96bd38..3897712 100644
--- a/drivers/clk/imx/Kconfig
+++ b/drivers/clk/imx/Kconfig
@@ -69,29 +69,29 @@ config CLK_VF610
select MXC_CLK
 
 config CLK_IMX8MM
-   bool "IMX8MM CCM Clock Driver"
-   depends on ARCH_MXC
+   tristate "IMX8MM CCM Clock Driver"
+   depends on ARCH_MXC || COMPILE_TEST
select MXC_CLK
help
Build the driver for i.MX8MM CCM Clock Driver
 
 config CLK_IMX8MN
-   bool "IMX8MN CCM Clock Driver"
-   depends on ARCH_MXC
+   tristate "IMX8MN CCM Clock Driver"
+   depends on ARCH_MXC || COMPILE_TEST
select MXC_CLK
help
Build the driver for i.MX8MN CCM Clock Driver
 
 config CLK_IMX8MP
-   bool "IMX8MP CCM Clock Driver"
-   depends on ARCH_MXC
+   tristate "IMX8MP CCM Clock Driver"
+   depends on ARCH_MXC || COMPILE_TEST
select MXC_CLK
help
Build the driver for i.MX8MP CCM Clock Driver
 
 config CLK_IMX8MQ
-   bool "IMX8MQ CCM Clock Driver"
-   depends on ARCH_MXC
+   tristate "IMX8MQ CCM Clock Driver"
+   depends on ARCH_MXC || COMPILE_TEST
select MXC_CLK
help
Build the driver for i.MX8MQ CCM Clock Driver
diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c
index b793264..0de0be0 100644
--- a/drivers/clk/imx/clk-imx8mm.c
+++ b/drivers/clk/imx/clk-imx8mm.c
@@ -657,3 +657,7 @@ static struct platform_driver imx8mm_clk_driver = {
},
 };
 module_platform_driver(imx8mm_clk_driver);
+
+MODULE_AUTHOR("Bai Ping ");
+MODULE_DESCRIPTION("NXP i.MX8MM clock driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c
index 213cc37..e984de5 100644
--- a/drivers/clk/imx/clk-imx8mn.c
+++ b/drivers/clk/imx/clk-imx8mn.c
@@ -608,3 +608,7 @@ static struct platform_driver imx8mn_clk_driver = {
},
 };
 module_platform_driver(imx8mn_clk_driver);
+
+MODULE_AUTHOR("Anson Huang ");
+MODULE_DESCRIPTION("NXP i.MX8MN clock driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c
index ca74771..f3cedf2 100644
--- a/drivers/clk/imx/clk-imx8mp.c
+++ b/drivers/clk/imx/clk-imx8mp.c
@@ -773,3 +773,7 @@ static struct platform_driver imx8mp_clk_driver = {
},
 };
 module_platform_driver(imx8mp_clk_driver);
+
+MODULE_AUTHOR("Anson Huang ");
+MODULE_DESCRIPTION("NXP i.MX8MP clock driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c
index a64aace..a06cc21 100644
--- a/drivers/clk/imx/clk-imx8mq.c
+++ b/drivers/clk/imx/clk-imx8mq.c
@@ -643,3 +643,7 @@ static struct platform_driver imx8mq_clk_driver = {
},
 };
 module_platform_driver(imx8mq_clk_driver);
+
+MODULE_AUTHOR("Abel Vesa ");
+MODULE_DESCRIPTION("NXP i.MX8MQ clock driver");
+MODULE_LICENSE("GPL v2");
-- 
2.7.4



[PATCH V6 5/6] clk: imx8qxp: Support building i.MX8QXP clock driver as module

2020-07-06 Thread Anson Huang
Change configuration to "tristate", add module author, description and
license to support building i.MX8QXP clock drivers as module.

Signed-off-by: Anson Huang 
---
No change.
---
 drivers/clk/imx/Kconfig| 10 ++
 drivers/clk/imx/Makefile   |  9 -
 drivers/clk/imx/clk-imx8qxp-lpcg.c |  4 
 drivers/clk/imx/clk-imx8qxp.c  |  4 
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig
index 3897712..e7defba 100644
--- a/drivers/clk/imx/Kconfig
+++ b/drivers/clk/imx/Kconfig
@@ -5,8 +5,9 @@ config MXC_CLK
depends on ARCH_MXC || COMPILE_TEST
 
 config MXC_CLK_SCU
-   bool
-   depends on IMX_SCU
+   tristate "IMX SCU clock"
+   depends on ARCH_MXC || COMPILE_TEST
+   depends on IMX_SCU && HAVE_ARM_SMCCC
 
 config CLK_IMX1
def_bool SOC_IMX1
@@ -97,8 +98,9 @@ config CLK_IMX8MQ
Build the driver for i.MX8MQ CCM Clock Driver
 
 config CLK_IMX8QXP
-   bool "IMX8QXP SCU Clock"
-   depends on ARCH_MXC && IMX_SCU && ARM64
+   tristate "IMX8QXP SCU Clock"
+   depends on (ARCH_MXC && ARM64) || COMPILE_TEST
+   depends on IMX_SCU && HAVE_ARM_SMCCC
select MXC_CLK_SCU
help
  Build the driver for IMX8QXP SCU based clocks.
diff --git a/drivers/clk/imx/Makefile b/drivers/clk/imx/Makefile
index 17f5d12..79e53f2 100644
--- a/drivers/clk/imx/Makefile
+++ b/drivers/clk/imx/Makefile
@@ -21,15 +21,14 @@ mxc-clk-objs += clk-pll14xx.o
 mxc-clk-objs += clk-sscg-pll.o
 obj-$(CONFIG_MXC_CLK) += mxc-clk.o
 
-obj-$(CONFIG_MXC_CLK_SCU) += \
-   clk-scu.o \
-   clk-lpcg-scu.o
-
 obj-$(CONFIG_CLK_IMX8MM) += clk-imx8mm.o
 obj-$(CONFIG_CLK_IMX8MN) += clk-imx8mn.o
 obj-$(CONFIG_CLK_IMX8MP) += clk-imx8mp.o
 obj-$(CONFIG_CLK_IMX8MQ) += clk-imx8mq.o
-obj-$(CONFIG_CLK_IMX8QXP) += clk-imx8qxp.o clk-imx8qxp-lpcg.o
+
+obj-$(CONFIG_MXC_CLK_SCU) += clk-imx-scu.o clk-imx-lpcg-scu.o
+clk-imx-scu-$(CONFIG_CLK_IMX8QXP) += clk-scu.o clk-imx8qxp.o
+clk-imx-lpcg-scu-$(CONFIG_CLK_IMX8QXP) += clk-lpcg-scu.o clk-imx8qxp-lpcg.o
 
 obj-$(CONFIG_CLK_IMX1)   += clk-imx1.o
 obj-$(CONFIG_CLK_IMX21)  += clk-imx21.o
diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c 
b/drivers/clk/imx/clk-imx8qxp-lpcg.c
index 04c8ee3..e947a70 100644
--- a/drivers/clk/imx/clk-imx8qxp-lpcg.c
+++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c
@@ -232,3 +232,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = {
 };
 
 builtin_platform_driver(imx8qxp_lpcg_clk_driver);
+
+MODULE_AUTHOR("Aisheng Dong ");
+MODULE_DESCRIPTION("NXP i.MX8QXP LPCG clock driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c
index 5e2903e..d650ca3 100644
--- a/drivers/clk/imx/clk-imx8qxp.c
+++ b/drivers/clk/imx/clk-imx8qxp.c
@@ -152,3 +152,7 @@ static struct platform_driver imx8qxp_clk_driver = {
.probe = imx8qxp_clk_probe,
 };
 builtin_platform_driver(imx8qxp_clk_driver);
+
+MODULE_AUTHOR("Aisheng Dong ");
+MODULE_DESCRIPTION("NXP i.MX8QXP clock driver");
+MODULE_LICENSE("GPL v2");
-- 
2.7.4



[PATCH V6 0/6] Support building i.MX ARMv8 platforms clock driver as module

2020-07-06 Thread Anson Huang
Nowdays, there are more and more requirements of building SoC specific drivers
as modules, such as Android GKI (generic kernel image), this patch set supports
building i.MX ARMv8 SoCs clock drivers as modules,

The CLK_IMXxxx is introduced for i.MX ARMv7 platforms in order to make the build
options aligned, the reason why i.MX ARMv7 platforms clock driver do NOT support
module build and COMPILE_TEST is because, some drivers like i.MX GPT timer 
driver
depends on clock driver to be ready before it, GPT driver uses 
TIMER_OF_DECLARE(),
while i.MX6/7 clock drivers use CLK_OF_DECLARE(), and GPT driver is critical for
i.MX6/7 platforms kernel boot up, so GPT driver needs to be changed to support
loadable clock driver first, then the i.MX6/7 clock drivers can support loadable
module, this will be done later.

Changes since V5:
- make i.MX ARMv7 platforms clock driver to bool and NOT support 
COMPILT_TEST,
  since they depends on ARCH_MXC or SOC config, which makes the 
COMPILT_TEST
  NOT make enough sense, so just skip the COMPILT_TEST support for i.MX 
ARMv7
  platform clock drivers, leave them same as original implementation.
- add a patch to fix build warning reported by kernel robot test on 
i.MX6SL
  clock driver.

Anson Huang (6):
  clk: composite: Export clk_hw_register_composite()
  clk: imx: Support building i.MX common clock driver as module
  clk: imx: Add clock configuration for ARMv7 platforms
  clk: imx8m: Support module build
  clk: imx8qxp: Support building i.MX8QXP clock driver as module
  clk: imx6sl: Fix build warning reported by kernel test robot

 drivers/clk/clk-composite.c|  1 +
 drivers/clk/imx/Kconfig| 94 --
 drivers/clk/imx/Makefile   | 79 
 drivers/clk/imx/clk-composite-8m.c |  2 +
 drivers/clk/imx/clk-cpu.c  |  2 +
 drivers/clk/imx/clk-frac-pll.c |  2 +
 drivers/clk/imx/clk-gate2.c|  2 +
 drivers/clk/imx/clk-imx6sl.c   | 15 +++---
 drivers/clk/imx/clk-imx8mm.c   |  4 ++
 drivers/clk/imx/clk-imx8mn.c   |  4 ++
 drivers/clk/imx/clk-imx8mp.c   |  4 ++
 drivers/clk/imx/clk-imx8mq.c   |  4 ++
 drivers/clk/imx/clk-imx8qxp-lpcg.c |  4 ++
 drivers/clk/imx/clk-imx8qxp.c  |  4 ++
 drivers/clk/imx/clk-pll14xx.c  |  5 ++
 drivers/clk/imx/clk-sscg-pll.c |  2 +
 drivers/clk/imx/clk.c  | 17 +--
 drivers/clk/imx/clk.h  |  6 +++
 18 files changed, 186 insertions(+), 65 deletions(-)

-- 
2.7.4



Re: [PATCH v7 2/4] usb: dwc3: qcom: Add interconnect support in dwc3 driver

2020-07-06 Thread Sandeep Maheswaram (Temp)



On 7/1/2020 4:12 AM, Matthias Kaehlcke wrote:

On Tue, Jun 16, 2020 at 01:38:49PM -0700, Matthias Kaehlcke wrote:

On Tue, Jun 16, 2020 at 10:22:47AM +0530, Sandeep Maheswaram (Temp) wrote:

On 6/16/2020 1:12 AM, Matthias Kaehlcke wrote:

On Thu, Jun 04, 2020 at 04:16:31AM -0700, Stephen Boyd wrote:

Quoting Sandeep Maheswaram (Temp) (2020-06-04 02:43:09)

On 6/3/2020 11:06 PM, Stephen Boyd wrote:

Quoting Sandeep Maheswaram (2020-03-31 22:15:43)

diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
index 1dfd024..d33ae86 100644
--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -285,6 +307,101 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom)
   return 0;
}
+
+/**
+ * dwc3_qcom_interconnect_init() - Get interconnect path handles
+ * @qcom:  Pointer to the concerned usb core.
+ *
+ */
+static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom)
+{
+   struct device *dev = qcom->dev;
+   int ret;
+
+   if (!device_is_bound(>dwc3->dev))
+   return -EPROBE_DEFER;

How is this supposed to work? I see that this was added in an earlier
revision of this patch series but there isn't any mention of why
device_is_bound() is used here. It would be great if there was a comment
detailing why this is necessary. It sounds like maximum_speed is
important?

Furthermore, dwc3_qcom_interconnect_init() is called by
dwc3_qcom_probe() which is the function that registers the device for
qcom->dwc3->dev. If that device doesn't probe between the time it is
registered by dwc3_qcom_probe() and this function is called then we'll
fail dwc3_qcom_probe() with -EPROBE_DEFER. And that will remove the
qcom->dwc3->dev device from the platform bus because we call
of_platform_depopulate() on the error path of dwc3_qcom_probe().

So isn't this whole thing racy and can potentially lead us to a driver
probe loop where the wrapper (dwc3_qcom) and the core (dwc3) are probing
and we're trying to time it just right so that driver for dwc3 binds
before we setup interconnects? I don't know if dwc3 can communicate to
the wrapper but that would be more of a direct way to do this. Or maybe
the wrapper should try to read the DT property for maximum speed and
fallback to a worst case high bandwidth value if it can't figure it out
itself without help from dwc3 core.


This was added in V4 to address comments from Matthias in V3

https://patchwork.kernel.org/patch/11148587/


Yes, that why I said:

"I see that this was added in an earlier
   revision of this patch series but there isn't any mention of why
   device_is_bound() is used here. It would be great if there was a comment
   detailing why this is necessary. It sounds like maximum_speed is
   important?"

Can you please respond to the rest of my email?

I agree with Stephen that using device_is_bound() isn't a good option
in this case, when I suggested it I wasn't looking at the big picture
of how probing the core driver is triggered, sorry about that.

Reading the speed from the DT with usb_get_maximum_speed() as Stephen
suggests would be an option, the inconvenient is that we then
essentially require the property to be defined, while the core driver
gets a suitable value from hardware registers. Not sure if the wrapper
driver could read from the same registers.

One option could be to poll device_is_bound() for 100 ms (or so), with
sleeps between polls. It's not elegant but would probably work if we
don't find a better solution.

if (np)
         ret = dwc3_qcom_of_register_core(pdev);
     else
         ret = dwc3_qcom_acpi_register_core(pdev);

     if (ret) {
         dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret);
         goto depopulate;
     }

     ret = dwc3_qcom_interconnect_init(qcom);
     if (ret)
         goto depopulate;

     qcom->mode = usb_get_dr_mode(>dwc3->dev);

Before calling dwc3_qcom_interconnect_init we are checking

     if (ret) {
         dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret);
         goto depopulate;
     }

Doesn't  this condition confirm the core driver is probed?

Not really:

// called under the hood by of_platform_populate()
static int really_probe(struct device *dev, struct device_driver *drv)
{
...

if (dev->bus->probe) {
ret = dev->bus->probe(dev);
if (ret)
goto probe_failed;
} else if (drv->probe) {
ret = drv->probe(dev);
if (ret)
goto probe_failed;
 }

...

probe_failed:
...

/*
  * Ignore errors returned by ->probe so that the next driver can try
  * its luck.
  */
 ret = 0;

...

return ret;
}

As a result of_platform_populate() in dwc3_qcom_of_register_core()
returns 0 even when probing the device failed:

[0.244339] dwc3-qcom a6f8800.usb: DBG: populate
[0.244772] dwc3 a60.dwc3: DBG: 

Re: [PATCH v3 2/3] Fix undefined operation fault that can hang a cpu on crash or panic

2020-07-06 Thread Sean Christopherson
On Sat, Jul 04, 2020 at 04:38:08PM -0400, David P. Reed wrote:
> Fix: Mask undefined operation fault during emergency VMXOFF that must be
> attempted to force cpu exit from VMX root operation.
> Explanation: When a cpu may be in VMX root operation (only possible when
> CR4.VMXE is set), crash or panic reboot tries to exit VMX root operation
> using VMXOFF. This is necessary, because any INIT will be masked while cpu
> is in VMX root operation, but that state cannot be reliably
> discerned by the state of the cpu.
> VMXOFF faults if the cpu is not actually in VMX root operation, signalling
> undefined operation.
> Discovered while debugging an out-of-tree x-visor with a race. Can happen
> due to certain kinds of bugs in KVM.
> 
> Fixes: 208067 
> Reported-by: David P. Reed 
> Suggested-by: Thomas Gleixner 
> Suggested-by: Sean Christopherson 
> Suggested-by: Andy Lutomirski 
> Signed-off-by: David P. Reed 
> ---
>  arch/x86/include/asm/virtext.h | 20 ++--
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
> index 0ede8d04535a..0e0900eacb9c 100644
> --- a/arch/x86/include/asm/virtext.h
> +++ b/arch/x86/include/asm/virtext.h
> @@ -30,11 +30,11 @@ static inline int cpu_has_vmx(void)
>  }
>  
>  
> -/* Disable VMX on the current CPU
> +/* Exit VMX root mode and isable VMX on the current CPU.
>   *
>   * vmxoff causes a undefined-opcode exception if vmxon was not run
> - * on the CPU previously. Only call this function if you know VMX
> - * is enabled.
> + * on the CPU previously. Only call this function if you know cpu
> + * is in VMX root mode.
>   */
>  static inline void cpu_vmxoff(void)
>  {
> @@ -47,14 +47,22 @@ static inline int cpu_vmx_enabled(void)
>   return __read_cr4() & X86_CR4_VMXE;
>  }
>  
> -/* Disable VMX if it is enabled on the current CPU
> +/* Safely exit VMX root mode and disable VMX if VMX enabled
> + * on the current CPU. Handle undefined-opcode fault
> + * that can occur if cpu is not in VMX root mode, due
> + * to a race.
>   *
>   * You shouldn't call this if cpu_has_vmx() returns 0.
>   */
>  static inline void __cpu_emergency_vmxoff(void)
>  {
> - if (cpu_vmx_enabled())
> - cpu_vmxoff();
> + if (!cpu_vmx_enabled())
> + return;
> + asm volatile ("1:vmxoff\n\t"
> +   "2:\n\t"
> +   _ASM_EXTABLE(1b, 2b)
> +   ::: "cc", "memory");
> + cr4_clear_bits(X86_CR4_VMXE);

Open coding vmxoff doesn't make sense, and IMO is flat out wrong as it fixes
flows that use __cpu_emergency_vmxoff() but leaves the same bug hanging
around in emergency_vmx_disable_all() until the next patch.

The reason I say it doesn't make sense is that there is no sane scenario
where the generic vmxoff helper should _not_ eat the fault.  All other VMXOFF
faults are mode related, i.e. any fault is guaranteed to be due to the
!post-VMXON check unless we're magically in RM, VM86, compat mode, or at
CPL>0.  Given that the whole point of this series is that it's impossible to
determine whether or not the CPU if post-VMXON if CR4.VMXE=1 without taking a
fault of some form, there's simply no way that anything except the hypervisor
(in normal operation) can know the state of VMX.  And given that the only
in-tree hypervisor (KVM) has its own version of vmxoff, that means there is
no scenario in which cpu_vmxoff() can safely be used.  Case in point, after
the next patch there are no users of cpu_vmxoff().

TL;DR: Just do fixup on cpu_vmxoff().

>  }
>  
>  /* Disable VMX if it is supported and enabled on the current CPU
> -- 
> 2.26.2
> 


Re: [PATCH] phy: qcom: remove ufs qmp phy driver

2020-07-06 Thread Bjorn Andersson
On Mon 29 Jun 21:54 PDT 2020, Vinod Koul wrote:

> Hi Bjorn,
> 
> On 29-06-20, 12:24, Bjorn Andersson wrote:
> > On Mon 29 Jun 07:54 PDT 2020, Vinod Koul wrote:
> > 
> > > UFS QMP phy drivers are duplicate as we are supposed to use common QMP
> > > phy driver which is working fine on various platforms. So remove the
> > > unused driver
> > > 
> > 
> > This describes the current state, but the UFS QMP driver had a purpose
> > not that long ago and I would like the commit message to describe what
> > changed and why it's now fine to remove the driver.
> 
> Would below look better, also feel free to suggest as you have the
> more history on this :)
> 
> "UFS QMP driver is dedicated driver for QMP phy for UFS variant. We
> also have a common QMP phy driver which works not only for UFS but
> USB and PCIe as well, so retire this driver in favour of the common
> driver"
> 

How about:

"The UFS specific QMP PHY driver started off supporting the 14nm and
20nm hardware. With the 20nm support marked broken for a long time and
the 14nm support added to the common QMP PHY, this driver has not been
used in a while. So delete it."

Regards,
Bjorn

> > 
> > I'm happy with the patch itself (i.e. the removal of the driver) though.
> 
> Thanks
> -- 
> ~Vinod


Re: [RFC PATCH v2 1/5] mm: make HPAGE_PxD_{SHIFT,MASK,SIZE} always available

2020-07-06 Thread Hugh Dickins
On Mon, 6 Jul 2020, Mike Rapoport wrote:
> From: Mike Rapoport 
> 
> The definitions of shift, mask and size for the second and the third level
> of the leaf pages are available only when CONFIG_TRANSPARENT_HUGEPAGE is
> set. Otherwise they evaluate to BUILD_BUG().
> 
> There is no explanation neither in the code nor in the changelog why the
> usage of, e.g. HPAGE_PMD_SIZE should be only allowed with THP and forbidden
> otherwise while the definitions of HPAGE_PMD_SIZE and HPAGE_PUD_SIZE
> express the sizes better than ambiguous HPAGE_SIZE.
> 
> Make HPAGE_PxD_{SHIFT,MASK,SIZE} definitions available unconditionally.

Adding Andrea to Cc, he's the one who structured it that way,
and should be consulted.

I'm ambivalent myself.  Many's the time I've been irritated by the
BUILD_BUG() in HPAGE_etc, and it's responsible for very many #ifdef
CONFIG_TRANSPARENT_HUGEPAGEs or IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)s
that you find uglily scattered around the source.

But that's the point of it: it's warning when you write code peculiar
to THP, that is going to bloat the build of kernels without any THP.

So although I've often been tempted to do as you suggest, I've always
ended up respecting Andrea's intention, and worked around it instead
(sometimes with #ifdef or IS_ENABLED(), sometimes with
PMD_{SHIFT,MASK_SIZE}, sometimes with a local definition).

Hugh

> 
> Signed-off-by: Mike Rapoport 
> ---
>  include/linux/huge_mm.h | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 71f20776b06c..1f4b44a76e31 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -115,7 +115,6 @@ extern struct kobj_attribute shmem_enabled_attr;
>  #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
>  #define HPAGE_PMD_NR (1<  
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  #define HPAGE_PMD_SHIFT PMD_SHIFT
>  #define HPAGE_PMD_SIZE   ((1UL) << HPAGE_PMD_SHIFT)
>  #define HPAGE_PMD_MASK   (~(HPAGE_PMD_SIZE - 1))
> @@ -124,6 +123,8 @@ extern struct kobj_attribute shmem_enabled_attr;
>  #define HPAGE_PUD_SIZE   ((1UL) << HPAGE_PUD_SHIFT)
>  #define HPAGE_PUD_MASK   (~(HPAGE_PUD_SIZE - 1))
>  
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +
>  extern unsigned long transparent_hugepage_flags;
>  
>  /*
> @@ -316,13 +317,6 @@ static inline struct list_head 
> *page_deferred_list(struct page *page)
>  }
>  
>  #else /* CONFIG_TRANSPARENT_HUGEPAGE */
> -#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
> -#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
> -#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
> -
> -#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
> -#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
> -#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
>  
>  static inline int hpage_nr_pages(struct page *page)
>  {
> -- 
> 2.26.2


[PATCH] pinctrl: qcom: ipq8074: route gpio interrupts to APPS

2020-07-06 Thread Kathiravan T
set target proc as APPS to route the gpio interrupts to APPS

Signed-off-by: Rajkumar Ayyasamy 
Signed-off-by: Kathiravan T 
---
 drivers/pinctrl/qcom/pinctrl-ipq8074.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8074.c 
b/drivers/pinctrl/qcom/pinctrl-ipq8074.c
index 0edd41c..aec68b1 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq8074.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq8074.c
@@ -50,6 +50,7 @@
.intr_enable_bit = 0,   \
.intr_status_bit = 0,   \
.intr_target_bit = 5,   \
+   .intr_target_kpss_val = 3,  \
.intr_raw_status_bit = 4,   \
.intr_polarity_bit = 1, \
.intr_detection_bit = 2,\
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation



[PATCH v2] MAINTAINERS: Add Shengjiu to reviewer list of sound/soc/fsl

2020-07-06 Thread Nicolin Chen
Add Shengjiu who's actively working on the latest fsl/nxp audio drivers.

Signed-off-by: Nicolin Chen 
Acked-by: Shengjiu Wang 
Reviewed-by: Fabio Estevam 
---
Changelog
v1->v2:
 * Replaced Shengjiu's emaill address with his gmail one
 * Added Acked-by from Shengjiu and Reviewed-by from Fabio

 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 496fd4eafb68..ff97b8cefaea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6956,6 +6956,7 @@ M:Timur Tabi 
 M: Nicolin Chen 
 M: Xiubo Li 
 R: Fabio Estevam 
+R: Shengjiu Wang 
 L: alsa-de...@alsa-project.org (moderated for non-subscribers)
 L: linuxppc-...@lists.ozlabs.org
 S: Maintained
-- 
2.17.1



linux-next: manual merge of the seccomp tree with the kselftest tree

2020-07-06 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the seccomp tree got a conflict in:

  tools/testing/selftests/seccomp/seccomp_bpf.c

between commit:

  9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")

from the kselftest tree and commits:

  aae7d264d68b ("selftests/seccomp: Check for EPOLLHUP for user_notif")
  11b4beaa0d31 ("selftests/seccomp: Make kcmp() less required")
  ef332c970dfa ("selftests/seccomp: Rename user_trap_syscall() to 
user_notif_syscall()")

from the seccomp tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc tools/testing/selftests/seccomp/seccomp_bpf.c
index b878e8379966,b854a6c5bf49..
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@@ -3079,10 -3043,8 +3055,10 @@@ TEST(get_metadata
long ret;
  
/* Only real root can get metadata. */
 -  if (geteuid())
 -  XFAIL(return, "get_metadata test requires real root");
 +  if (geteuid()) {
-   SKIP(return, "get_metadata requires real root");
++  SKIP(return, "get_metadata test requires real root");
 +  return;
 +  }
  
ASSERT_EQ(0, pipe(pipefd));
  


pgpyhv7e1RTH7.pgp
Description: OpenPGP digital signature


[PATCH v3 4/4] PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices

2020-07-06 Thread Rajat Jain
When enabling ACS, enable translation blocking for external facing ports
and untrusted devices.

Signed-off-by: Rajat Jain 
---
v3: print warning if ACS_TB not supported on external-facing/untrusted ports.
Minor code comments fixes.
v2: Commit log change

 drivers/pci/pci.c|  7 +++
 drivers/pci/quirks.c | 14 ++
 2 files changed, 21 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 73a8627822140..497ac05bf36e8 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -876,6 +876,13 @@ static void pci_std_enable_acs(struct pci_dev *dev)
/* Upstream Forwarding */
ctrl |= (cap & PCI_ACS_UF);
 
+   /* Enable Translation Blocking for external devices */
+   if (dev->external_facing || dev->untrusted)
+   if (cap & PCI_ACS_TB)
+   ctrl |= PCI_ACS_TB;
+   else
+   pci_warn(dev, "ACS: No Trans Blocking on ext dev\n");
+
pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
 }
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b341628e47527..9cc8c1dc215ee 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4934,6 +4934,13 @@ static void pci_quirk_enable_intel_rp_mpc_acs(struct 
pci_dev *dev)
}
 }
 
+/*
+ * Currently this quirk does the equivalent of
+ * PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF
+ *
+ * TODO: This quirk also needs to do equivalent of PCI_ACS_TB,
+ * if dev->external_facing || dev->untrusted
+ */
 static int pci_quirk_enable_intel_pch_acs(struct pci_dev *dev)
 {
if (!pci_quirk_intel_pch_acs_match(dev))
@@ -4973,6 +4980,13 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct 
pci_dev *dev)
ctrl |= (cap & PCI_ACS_CR);
ctrl |= (cap & PCI_ACS_UF);
 
+   /* Enable Translation Blocking for external devices */
+   if (dev->external_facing || dev->untrusted)
+   if (cap & PCI_ACS_TB)
+   ctrl |= PCI_ACS_TB;
+   else
+   pci_warn(dev, "ACS: No Trans Blocking on ext dev\n");
+
pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
 
pci_info(dev, "Intel SPT PCH root port ACS workaround enabled\n");
-- 
2.27.0.212.ge8ba1cc988-goog



[PATCH v3 1/4] PCI: Move pci_enable_acs() and its dependencies up in pci.c

2020-07-06 Thread Rajat Jain
Move pci_enable_acs() and the functions it depends on, further up in the
source code to avoid having to forward declare it when we make it static
in near future (next patch).

No functional changes intended.

Signed-off-by: Rajat Jain 
---
v3: Initial version of the patch, created per Bjorn's suggestion

 drivers/pci/pci.c | 254 +++---
 1 file changed, 127 insertions(+), 127 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce096272f52b1..eec625f0e594e 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -777,6 +777,133 @@ int pci_wait_for_pending(struct pci_dev *dev, int pos, 
u16 mask)
return 0;
 }
 
+static int pci_acs_enable;
+
+/**
+ * pci_request_acs - ask for ACS to be enabled if supported
+ */
+void pci_request_acs(void)
+{
+   pci_acs_enable = 1;
+}
+
+static const char *disable_acs_redir_param;
+
+/**
+ * pci_disable_acs_redir - disable ACS redirect capabilities
+ * @dev: the PCI device
+ *
+ * For only devices specified in the disable_acs_redir parameter.
+ */
+static void pci_disable_acs_redir(struct pci_dev *dev)
+{
+   int ret = 0;
+   const char *p;
+   int pos;
+   u16 ctrl;
+
+   if (!disable_acs_redir_param)
+   return;
+
+   p = disable_acs_redir_param;
+   while (*p) {
+   ret = pci_dev_str_match(dev, p, );
+   if (ret < 0) {
+   pr_info_once("PCI: Can't parse disable_acs_redir 
parameter: %s\n",
+disable_acs_redir_param);
+
+   break;
+   } else if (ret == 1) {
+   /* Found a match */
+   break;
+   }
+
+   if (*p != ';' && *p != ',') {
+   /* End of param or invalid format */
+   break;
+   }
+   p++;
+   }
+
+   if (ret != 1)
+   return;
+
+   if (!pci_dev_specific_disable_acs_redir(dev))
+   return;
+
+   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+   if (!pos) {
+   pci_warn(dev, "cannot disable ACS redirect for this hardware as 
it does not have ACS capabilities\n");
+   return;
+   }
+
+   pci_read_config_word(dev, pos + PCI_ACS_CTRL, );
+
+   /* P2P Request & Completion Redirect */
+   ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
+
+   pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
+
+   pci_info(dev, "disabled ACS redirect\n");
+}
+
+/**
+ * pci_std_enable_acs - enable ACS on devices using standard ACS capabilities
+ * @dev: the PCI device
+ */
+static void pci_std_enable_acs(struct pci_dev *dev)
+{
+   int pos;
+   u16 cap;
+   u16 ctrl;
+
+   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+   if (!pos)
+   return;
+
+   pci_read_config_word(dev, pos + PCI_ACS_CAP, );
+   pci_read_config_word(dev, pos + PCI_ACS_CTRL, );
+
+   /* Source Validation */
+   ctrl |= (cap & PCI_ACS_SV);
+
+   /* P2P Request Redirect */
+   ctrl |= (cap & PCI_ACS_RR);
+
+   /* P2P Completion Redirect */
+   ctrl |= (cap & PCI_ACS_CR);
+
+   /* Upstream Forwarding */
+   ctrl |= (cap & PCI_ACS_UF);
+
+   pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
+}
+
+/**
+ * pci_enable_acs - enable ACS if hardware support it
+ * @dev: the PCI device
+ */
+void pci_enable_acs(struct pci_dev *dev)
+{
+   if (!pci_acs_enable)
+   goto disable_acs_redir;
+
+   if (!pci_dev_specific_enable_acs(dev))
+   goto disable_acs_redir;
+
+   pci_std_enable_acs(dev);
+
+disable_acs_redir:
+   /*
+* Note: pci_disable_acs_redir() must be called even if ACS was not
+* enabled by the kernel because it may have been enabled by
+* platform firmware.  So if we are told to disable it, we should
+* always disable it after setting the kernel's default
+* preferences.
+*/
+   pci_disable_acs_redir(dev);
+}
+
 /**
  * pci_restore_bars - restore a device's BAR values (e.g. after wake-up)
  * @dev: PCI device to have its BARs restored
@@ -3230,133 +3357,6 @@ void pci_configure_ari(struct pci_dev *dev)
}
 }
 
-static int pci_acs_enable;
-
-/**
- * pci_request_acs - ask for ACS to be enabled if supported
- */
-void pci_request_acs(void)
-{
-   pci_acs_enable = 1;
-}
-
-static const char *disable_acs_redir_param;
-
-/**
- * pci_disable_acs_redir - disable ACS redirect capabilities
- * @dev: the PCI device
- *
- * For only devices specified in the disable_acs_redir parameter.
- */
-static void pci_disable_acs_redir(struct pci_dev *dev)
-{
-   int ret = 0;
-   const char *p;
-   int pos;
-   u16 ctrl;
-
-   if (!disable_acs_redir_param)
-   return;
-
-   p = disable_acs_redir_param;
-   while (*p) {
-   ret = pci_dev_str_match(dev, p, 

[PATCH v3 3/4] PCI: Treat "external-facing" devices as internal

2020-07-06 Thread Rajat Jain
The "ExternalFacingPort" devices (root ports) are internal devices that
sit on the internal system fabric. Ref:
https://docs.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports

Currently they were treated (marked as untrusted) at par with other
external devices downstream those external facing rootports.

Use the platform flag to identify the external facing devices and then
treat them at par with internal devices (don't mark them untrusted).
Any devices downstream continue to be marked as "untrusted". This was
discussed here:
https://lore.kernel.org/linux-pci/20200610230906.GA1528594@bjorn-Precision-5520/

Signed-off-by: Rajat Jain 
---
v3: * fix commit log and minor code comment
* Don't check for "ExternalFacingPort" on PCI_EXP_TYPE_DOWNSTREAM
* Check only for pdev->external_facing in iommu.c
v2: cosmetic changes in commit log

 drivers/iommu/intel/iommu.c |  6 +++---
 drivers/pci/of.c|  2 +-
 drivers/pci/pci-acpi.c  | 10 +-
 drivers/pci/probe.c |  2 +-
 include/linux/pci.h |  8 
 5 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e982..4f0f6ee2d4aaa 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4738,12 +4738,12 @@ const struct attribute_group *intel_iommu_groups[] = {
NULL,
 };
 
-static inline bool has_untrusted_dev(void)
+static inline bool has_external_pci(void)
 {
struct pci_dev *pdev = NULL;
 
for_each_pci_dev(pdev)
-   if (pdev->untrusted)
+   if (pdev->external_facing)
return true;
 
return false;
@@ -4751,7 +4751,7 @@ static inline bool has_untrusted_dev(void)
 
 static int __init platform_optin_force_iommu(void)
 {
-   if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
+   if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
return 0;
 
if (no_iommu || dmar_disabled)
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 27839cd2459f6..22727fc9558df 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -42,7 +42,7 @@ void pci_set_bus_of_node(struct pci_bus *bus)
} else {
node = of_node_get(bus->self->dev.of_node);
if (node && of_property_read_bool(node, "external-facing"))
-   bus->self->untrusted = true;
+   bus->self->external_facing = true;
}
 
bus->dev.of_node = node;
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7224b1e5f2a83..43a5158b2b662 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -1213,7 +1213,7 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev,
ACPI_FREE(obj);
 }
 
-static void pci_acpi_set_untrusted(struct pci_dev *dev)
+static void pci_acpi_set_external_facing(struct pci_dev *dev)
 {
u8 val;
 
@@ -1223,12 +1223,12 @@ static void pci_acpi_set_untrusted(struct pci_dev *dev)
return;
 
/*
-* These root ports expose PCIe (including DMA) outside of the
-* system so make sure we treat them and everything behind as
+* These root/down ports expose PCIe (including DMA) outside of the
+* system so make sure we treat everything behind them as
 * untrusted.
 */
if (val)
-   dev->untrusted = 1;
+   dev->external_facing = 1;
 }
 
 static void pci_acpi_setup(struct device *dev)
@@ -1240,7 +1240,7 @@ static void pci_acpi_setup(struct device *dev)
return;
 
pci_acpi_optimize_delay(pci_dev, adev->handle);
-   pci_acpi_set_untrusted(pci_dev);
+   pci_acpi_set_external_facing(pci_dev);
pci_acpi_add_edr_notifier(pci_dev);
 
pci_acpi_add_pm_notifier(adev, pci_dev);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d87066a5ecc5..8c40c00413e74 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1552,7 +1552,7 @@ static void set_pcie_untrusted(struct pci_dev *dev)
 * untrusted as well.
 */
parent = pci_upstream_bridge(dev);
-   if (parent && parent->untrusted)
+   if (parent && (parent->untrusted || parent->external_facing))
dev->untrusted = true;
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0ca39042507ce..281be857d2430 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -432,6 +432,14 @@ struct pci_dev {
 * mappings to make sure they cannot access arbitrary memory.
 */
unsigned intuntrusted:1;
+   /*
+* Devices are marked as external-facing using info from platform
+* (ACPI / devicetree). An external-facing device is still an internal
+* trusted device, but it faces external untrusted devices. Thus any
+* device enumerated downstream an external-facing device, is marked
+* as 

[PATCH v3 2/4] PCI: Keep the ACS capability offset in device

2020-07-06 Thread Rajat Jain
Currently ACS capabiity is being looked up at a number of places. Read and
store it once at bootup so that it can be used by all later.

Signed-off-by: Rajat Jain 
---
v3: fix commit log, remove forward declation of static function
v2: Commit log cosmetic changes

 drivers/pci/p2pdma.c |  2 +-
 drivers/pci/pci.c| 20 
 drivers/pci/pci.h|  2 +-
 drivers/pci/probe.c  |  2 +-
 drivers/pci/quirks.c |  8 
 include/linux/pci.h  |  1 +
 6 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index e8e444eeb1cd2..f29a48f8fa594 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -253,7 +253,7 @@ static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
int pos;
u16 ctrl;
 
-   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
+   pos = pdev->acs_cap;
if (!pos)
return 0;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index eec625f0e594e..73a8627822140 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -831,7 +831,7 @@ static void pci_disable_acs_redir(struct pci_dev *dev)
if (!pci_dev_specific_disable_acs_redir(dev))
return;
 
-   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+   pos = dev->acs_cap;
if (!pos) {
pci_warn(dev, "cannot disable ACS redirect for this hardware as 
it does not have ACS capabilities\n");
return;
@@ -857,7 +857,7 @@ static void pci_std_enable_acs(struct pci_dev *dev)
u16 cap;
u16 ctrl;
 
-   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+   pos = dev->acs_cap;
if (!pos)
return;
 
@@ -883,7 +883,7 @@ static void pci_std_enable_acs(struct pci_dev *dev)
  * pci_enable_acs - enable ACS if hardware support it
  * @dev: the PCI device
  */
-void pci_enable_acs(struct pci_dev *dev)
+static void pci_enable_acs(struct pci_dev *dev)
 {
if (!pci_acs_enable)
goto disable_acs_redir;
@@ -3362,7 +3362,7 @@ static bool pci_acs_flags_enabled(struct pci_dev *pdev, 
u16 acs_flags)
int pos;
u16 cap, ctrl;
 
-   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
+   pos = pdev->acs_cap;
if (!pos)
return false;
 
@@ -3487,6 +3487,18 @@ bool pci_acs_path_enabled(struct pci_dev *start,
return true;
 }
 
+/**
+ * pci_acs_init - Initialize ACS if hardware supports it
+ * @dev: the PCI device
+ */
+void pci_acs_init(struct pci_dev *dev)
+{
+   dev->acs_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+
+   if (dev->acs_cap)
+   pci_enable_acs(dev);
+}
+
 /**
  * pci_rebar_find_pos - find position of resize ctrl reg for BAR
  * @pdev: PCI device
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6d3f758671064..12fb79fbe29d3 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -532,7 +532,7 @@ static inline resource_size_t pci_resource_alignment(struct 
pci_dev *dev,
return resource_alignment(res);
 }
 
-void pci_enable_acs(struct pci_dev *dev);
+void pci_acs_init(struct pci_dev *dev);
 #ifdef CONFIG_PCI_QUIRKS
 int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
 int pci_dev_specific_enable_acs(struct pci_dev *dev);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2f66988cea257..6d87066a5ecc5 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2390,7 +2390,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
pci_ats_init(dev);  /* Address Translation Services */
pci_pri_init(dev);  /* Page Request Interface */
pci_pasid_init(dev);/* Process Address Space ID */
-   pci_enable_acs(dev);/* Enable ACS P2P upstream forwarding */
+   pci_acs_init(dev);  /* Access Control Services */
pci_ptm_init(dev);  /* Precision Time Measurement */
pci_aer_init(dev);  /* Advanced Error Reporting */
pci_dpc_init(dev);  /* Downstream Port Containment */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 812bfc32ecb82..b341628e47527 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4653,7 +4653,7 @@ static int pci_quirk_intel_spt_pch_acs(struct pci_dev 
*dev, u16 acs_flags)
if (!pci_quirk_intel_spt_pch_acs_match(dev))
return -ENOTTY;
 
-   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+   pos = dev->acs_cap;
if (!pos)
return -ENOTTY;
 
@@ -4961,7 +4961,7 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct 
pci_dev *dev)
if (!pci_quirk_intel_spt_pch_acs_match(dev))
return -ENOTTY;
 
-   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+   pos = dev->acs_cap;
if (!pos)
return -ENOTTY;
 
@@ -4988,7 +4988,7 @@ static int 

Re: [PATCH v2] pinctrl: qcom: sc7180: Make gpio28 non wakeup capable for google,lazor

2020-07-06 Thread Rajendra Nayak



[]..


@@ -1151,6 +1168,10 @@ static const struct msm_pinctrl_soc_data sc7180_pinctrl 
= {

  static int sc7180_pinctrl_probe(struct platform_device *pdev)
  {
+   if (of_machine_is_compatible("google,lazor")) {
+   sc7180_pinctrl.wakeirq_map = sc7180_lazor_pdc_map;
+   sc7180_pinctrl.nwakeirq_map = ARRAY_SIZE(sc7180_lazor_pdc_map);
+   }


As much as I want patches landed and things working, the above just
doesn't feel like a viable solution.  I guess it could work as a short
term hack but it's going to become untenable pretty quickly.


I second that.


As we
have more variants of this we're going to have to just keep piling
more machines in here, right?  ...this is also already broken for us
because not all boards will have the "google,lazor" compatible.  From
the current Chrome OS here are the compatibles for various revs/SKUs

compatible = "google,lazor-rev0", "qcom,sc7180";
compatible = "google,lazor-rev0-sku0", "qcom,sc7180";
compatible = "google,lazor", "qcom,sc7180";
compatible = "google,lazor-sku0", "qcom,sc7180";
compatible = "google,lazor-rev2", "qcom,sc7180";

...so of the 5 boards you'll only match one of them.


Maybe I'm jumping into a situation again where I'm ignorant since I
haven't followed all the prior conversation, but is it really that
hard to just add dual edge support to the PDC irqchip driver?  ...or


FWIK, this is really a PDC hardware issue (with the specific IP rev that exists
on sc7180) so working it around in SW could get ugly.


maybe it's just easier to change the pinctrl driver to emulate dual
edge itself and that can work around the problem in the PDC?  There
seem to be a few samples you could copy from:

$ git log --oneline --no-merges --grep=emulate drivers/pinctrl/
3221f40b7631 pinctrl: mediatek: emulate GPIO interrupt on both-edges
5a92750133ff pinctrl: rockchip: emulate both edge triggered interrupts



pinctrl-msm already supports emulating dual edge, but my understanding
was that the problem lies in that somehow this emulation would have to
be tied to or affect the PDC driver?


yes, thats correct, pinctrl-msm already supports it, the problem lies
in the fact that PDC does not. This patch, infact was trying to fix the
issue by removing all PDC involvement for gpio28 and making pinctrl-msm
in charge of it.

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


Re: [PATCH v14 07/20] mm/thp: narrow lru locking

2020-07-06 Thread Hugh Dickins
On Mon, 6 Jul 2020, Matthew Wilcox wrote:
> On Mon, Jul 06, 2020 at 05:15:09PM +0800, Alex Shi wrote:
> > Hi Kirill & Johannes & Matthew,

Adding Kirill, who was in patch's Cc list but not mail's Cc list.

I asked Alex to direct this one particularly to Kirill and Johannes
and Matthew because (and I regret that the commit message still does
not make this at all clear) this patch changes the lock ordering:
which for years has been lru_lock outside memcg move_lock outside
i_pages lock, but here inverted to lru_lock inside i_pages lock.

I don't see a strong reason to have them one way round or the other,
and think Alex is right that they can safely be reversed here: but
he doesn't actually give any reason for doing so (if cleanup, then
I think the cleanup should have been taken further), and no reason
for doing so as part of this series.

I had more need to know which way round they should go, when adding
lru_lock into mem_cgroup_move_account (inside or outside move_lock?):
but Alex's use of TestClearPageLRU appears to have successfully
eliminated the need for that; so I only need to know for the final
Doc patch in the series (credited to my name), where mm/rmap.c
documents the lock ordering.

I'm okay with leaving this patch in the series (and the final patch
currently documents this new order); but wondered if someone else
(especially Kirill or Johannes or Matthew) sees a reason against it?

And I have to admit that, in researching this, I discovered that
actually we unconsciously departed from the supposed lock ordering
years ago: back in 3.18's 8186eb6a799e, Johannes did a cleanup which
moved a clear_page_mlock() call to inside memcg move_lock, and in
principle clear_page_mlock() can take lru_lock. But we have never
seen a lockdep complaint about this, so I suspect that the page is
(almost?) always already isolated from lru when that is called,
and the issue therefore hypothetical.

My vote, for dispatch of the series, is to leave this patch in;
but cannot object if consensus were that it should be taken out.

Hugh

> > 
> > Would you like to give some comments or share your concern of this patchset,
> > specialy for THP part? 
> 
> I don't have the brain space to understand this patch set fully at
> the moment.  I'll note that the realtime folks are doing their best to
> stamp out users of local_irq_disable(), so they won't be pleased to see
> you adding a new one.  Also, you removed the comment explaining why the
> lock needed to be taken.
> 
> > Many Thanks
> > Alex
> > 
> > 在 2020/7/3 下午1:07, Alex Shi 写道:
> > > lru_lock and page cache xa_lock have no reason with current sequence,
> > > put them together isn't necessary. let's narrow the lru locking, but
> > > left the local_irq_disable to block interrupt re-entry and statistic 
> > > update.
> > > 
> > > Hugh Dickins point: split_huge_page_to_list() was already silly,to be
> > > using the _irqsave variant: it's just been taking sleeping locks, so
> > > would already be broken if entered with interrupts enabled.
> > > so we can save passing flags argument down to __split_huge_page().
> > > 
> > > Signed-off-by: Alex Shi 
> > > Signed-off-by: Wei Yang 
> > > Cc: Hugh Dickins 
> > > Cc: Kirill A. Shutemov 
> > > Cc: Andrea Arcangeli 
> > > Cc: Johannes Weiner 
> > > Cc: Matthew Wilcox 
> > > Cc: Andrew Morton 
> > > Cc: linux...@kvack.org
> > > Cc: linux-kernel@vger.kernel.org
> > > ---
> > >  mm/huge_memory.c | 24 
> > >  1 file changed, 12 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > index b18f21da4dac..607869330329 100644
> > > --- a/mm/huge_memory.c
> > > +++ b/mm/huge_memory.c
> > > @@ -2433,7 +2433,7 @@ static void __split_huge_page_tail(struct page 
> > > *head, int tail,
> > >  }
> > >  
> > >  static void __split_huge_page(struct page *page, struct list_head *list,
> > > - pgoff_t end, unsigned long flags)
> > > +   pgoff_t end)
> > >  {
> > >   struct page *head = compound_head(page);
> > >   pg_data_t *pgdat = page_pgdat(head);
> > > @@ -2442,8 +2442,6 @@ static void __split_huge_page(struct page *page, 
> > > struct list_head *list,
> > >   unsigned long offset = 0;
> > >   int i;
> > >  
> > > - lruvec = mem_cgroup_page_lruvec(head, pgdat);
> > > -
> > >   /* complete memcg works before add pages to LRU */
> > >   mem_cgroup_split_huge_fixup(head);
> > >  
> > > @@ -2455,6 +2453,11 @@ static void __split_huge_page(struct page *page, 
> > > struct list_head *list,
> > >   xa_lock(_cache->i_pages);
> > >   }
> > >  
> > > + /* lock lru list/PageCompound, ref freezed by page_ref_freeze */
> > > + spin_lock(>lru_lock);
> > > +
> > > + lruvec = mem_cgroup_page_lruvec(head, pgdat);
> > > +
> > >   for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
> > >   __split_huge_page_tail(head, i, lruvec, list);
> > >   /* Some pages can be beyond i_size: drop them from page cache */
> > > @@ -2474,6 +2477,8 @@ static void 

Re: [GIT PULL 1/1] bcm2835-dt-next-2020-07-06

2020-07-06 Thread Florian Fainelli



On 7/6/2020 10:11 AM, Nicolas Saenz Julienne wrote:
> Hi Florian,
> 
> The following changes since commit b3a9e3b9622ae10064826dccb4f7a52bd88c7407:
> 
>   Linux 5.8-rc1 (2020-06-14 12:45:04 -0700)
> 
> are available in the Git repository at:
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git 
> tags/bcm2835-dt-next-2020-07-06
> 
> for you to fetch changes up to 25c6f3960764b266a609281341f889a06d045039:
> 
>   ARM: dts: bcm2711: Add HDMI DVP (2020-07-06 18:52:01 +0200)
> 
> 
> Maxime Ripard introduces two new clock providers into RPi4's device-tree:
> 
> - The first one based on the enhancements made to clk-raspberrypi, which
>   is now registered trough DT and provides control over the whole range
>   of firmware based clocks.
> 
> - The second one based on the new clk-bcm2711-dvp driver, which gates
>   the clocks and reset signals that feed into RPi4's HDMI0/1 blocks.
> 
> 

Merged into devicetree/next, thanks Nicolas.
-- 
Florian


Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver

2020-07-06 Thread Sean Christopherson
On Tue, Jul 07, 2020 at 05:39:04AM +0100, Matthew Wilcox wrote:
> although I think you have a simpler task.
> 
>   XA_STATE(xas, ..., start_index);
> 
>   for (;;) {
>   struct page *page = xas_next();
> 
>   if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
>   return -EACCES;
>   }
> 
>   return 0;
> 
> should do the trick, I think.

Ah, neato.  Thanks!


Re: [PATCH v2 16/18] media: mtk-vcodec: venc: make S_PARM return -ENOTTY for CAPTURE queue

2020-07-06 Thread Alexandre Courbot
On Mon, Jul 6, 2020 at 9:43 PM Hans Verkuil  wrote:
>
> On 04/07/2020 14:36, Alexandre Courbot wrote:
> > On Fri, Jul 3, 2020 at 5:30 PM Hans Verkuil  wrote:
> >>
> >> On 26/06/2020 10:04, Alexandre Courbot wrote:
> >>> v4l2-compliance expects ENOTTY to be returned when a given queue does
> >>> not support S_PARM.
> >>>
> >>> Signed-off-by: Alexandre Courbot 
> >>> ---
> >>>  drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 2 +-
> >>>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c 
> >>> b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
> >>> index aae610e6d4e8..346a33c6869d 100644
> >>> --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
> >>> +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
> >>> @@ -200,7 +200,7 @@ static int vidioc_venc_s_parm(struct file *file, void 
> >>> *priv,
> >>>   struct mtk_vcodec_ctx *ctx = fh_to_ctx(priv);
> >>>
> >>>   if (a->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> >>> - return -EINVAL;
> >>> + return -ENOTTY;
> >>
> >> This doesn't look right: S_PARM *is* supported, just not for this buffer 
> >> type.
> >> So -EINVAL is the correct error code.
> >>
> >> What is the exact v4l2-compliance failure? It might be a bug in the test.
> >
> > The error is as follows:
> >
> > Format ioctls:
> > test VIDIOC_ENUM_FMT/FRAMESIZES/FRAMEINTERVALS: OK
> > fail: v4l2-test-formats.cpp(1336): got error 22 when
> > setting parms for buftype 9
> > test VIDIOC_G/S_PARM: FAIL
>
> This is due to bugs in the compliance test that do not take into account the
> stateful encoder spec.
>
> The compliance test should check for the following:
>
> If ENUM_FRAMEINTERVALS is implemented, then G/S_PARM shall be supported for 
> OUTPUT.
>
> If ENUM_FRAMEINTERVALS is not implemented, then G/S_PARM is optional for 
> OUTPUT. But if
> G_PARM is implemented, then S_PARM shall also be implemented. In this case the
> frame interval range is determined by the codec standard.
>
> If V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL is set for one or more pixel formats,
> then G/S_PARM shall be implemented for both CAPTURE and OUTPUT.
>
> For backwards compatibility: if G/S_PARM is supported for OUTPUT, then it may 
> be
> supported for CAPTURE as well.
>
> Can you try with the following patch?
>
> With this v4l2-compliance patch you should be able to drop patches 15/18 and 
> 16/18
> of your series.

Indeed, with this patch v4l2-compliance passes on the encoder device
without 15/18 and 16/18. Thanks, I'll remove them from the next
iteration.

>
> Regards,
>
> Hans
>
> -- cut here --
> diff --git a/utils/v4l2-compliance/v4l2-compliance.cpp 
> b/utils/v4l2-compliance/v4l2-compliance.cpp
> index b5bde2f4..d0441651 100644
> --- a/utils/v4l2-compliance/v4l2-compliance.cpp
> +++ b/utils/v4l2-compliance/v4l2-compliance.cpp
> @@ -1250,6 +1250,7 @@ void testNode(struct node , struct node 
> _m2m_cap, struct node _
> node.frmsizes.clear();
> node.frmsizes_count.clear();
> node.has_frmintervals = false;
> +   node.has_enc_cap_frame_interval = false;
> node.valid_buftypes = 0;
> node.valid_memorytype = 0;
> node.buf_caps = 0;
> diff --git a/utils/v4l2-compliance/v4l2-compliance.h 
> b/utils/v4l2-compliance/v4l2-compliance.h
> index 21e31872..ae10bdf9 100644
> --- a/utils/v4l2-compliance/v4l2-compliance.h
> +++ b/utils/v4l2-compliance/v4l2-compliance.h
> @@ -141,6 +141,7 @@ struct base_node {
> frmsizes_set frmsizes;
> frmsizes_count_map frmsizes_count;
> bool has_frmintervals;
> +   bool has_enc_cap_frame_interval;
> __u32 valid_buftypes;
> __u32 valid_buftype;
> __u32 valid_memorytype;
> diff --git a/utils/v4l2-compliance/v4l2-test-formats.cpp 
> b/utils/v4l2-compliance/v4l2-test-formats.cpp
> index 3dfc593e..edf1536e 100644
> --- a/utils/v4l2-compliance/v4l2-test-formats.cpp
> +++ b/utils/v4l2-compliance/v4l2-test-formats.cpp
> @@ -71,6 +71,8 @@ static int testEnumFrameIntervals(struct node *node, __u32 
> pixfmt,
> ret = doioctl(node, VIDIOC_ENUM_FRAMEINTERVALS, );
> if (ret == ENOTTY)
> return ret;
> +   // M2M devices don't support this, except for stateful 
> encoders
> +   fail_on_test(node->is_m2m && !(node->codec_mask & 
> STATEFUL_ENCODER));
> if (f == 0 && ret == EINVAL) {
> if (type == V4L2_FRMSIZE_TYPE_DISCRETE)
> warn("found framesize %dx%d, but no frame 
> intervals\n", w, h);
> @@ -264,16 +266,22 @@ static int testEnumFormatsType(struct node *node, 
> unsigned type)
> return fail("drivers must never set the emulated 
> flag\n");
> if (fmtdesc.flags & 

Lieber Freund (Assalamu Alaikum),?

2020-07-06 Thread AISHA GADDAFI
-- 
Lieber Freund (Assalamu Alaikum),

Ich bin vor einer privaten Suche auf Ihren E-Mail-Kontakt gestoßen
Ihre Hilfe. Mein Name ist Aisha Al-Qaddafi, eine alleinerziehende
Mutter und eine Witwe
mit drei Kindern. Ich bin die einzige leibliche Tochter des Spätlibyschen
Präsident (verstorbener Oberst Muammar Gaddafi).

Ich habe Investmentfonds im Wert von siebenundzwanzig Millionen
fünfhunderttausend
United State Dollar ($ 27.500.000.00) und ich brauche eine
vertrauenswürdige Investition
Manager / Partner aufgrund meines aktuellen Flüchtlingsstatus bin ich jedoch
Möglicherweise interessieren Sie sich für die Unterstützung von
Investitionsprojekten in Ihrem Land
Von dort aus können wir in naher Zukunft Geschäftsbeziehungen aufbauen.

Ich bin bereit, mit Ihnen über das Verhältnis zwischen Investition und
Unternehmensgewinn zu verhandeln
Basis für die zukünftige Investition Gewinne zu erzielen.

Wenn Sie bereit sind, dieses Projekt in meinem Namen zu bearbeiten,
antworten Sie bitte dringend
Damit ich Ihnen mehr Informationen über die Investmentfonds geben kann.

Ihre dringende Antwort wird geschätzt. schreibe mir an diese email adresse (
ayishagdda...@mail.ru ) zur weiteren Diskussion.

Freundliche Grüße
Frau Aisha Al-Qaddafi


Re: [PATCH v7 7/7] Documentation: fpga: dfl: add descriptions for interrupt related interfaces.

2020-07-06 Thread Moritz Fischer
On Tue, Jun 16, 2020 at 12:08:48PM +0800, Xu Yilun wrote:
> This patch adds introductions of interrupt related interfaces for FME
> error reporting, port error reporting and AFU user interrupts features.
> 
> Signed-off-by: Luwei Kang 
> Signed-off-by: Wu Hao 
> Signed-off-by: Xu Yilun 
> Reviewed-by: Marcelo Tosatti 
> Acked-by: Wu Hao 
> ---
> v2: Update Documents cause change of irq ioctl interfaces.
> v3: No change
> v4: Update interrupt support part.
> v5: No change
> v6: No change
> v7: No change
> ---
>  Documentation/fpga/dfl.rst | 19 +++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/Documentation/fpga/dfl.rst b/Documentation/fpga/dfl.rst
> index 978c4af..2df9a0a 100644
> --- a/Documentation/fpga/dfl.rst
> +++ b/Documentation/fpga/dfl.rst
> @@ -89,6 +89,8 @@ The following functions are exposed through ioctls:
>  - Program bitstream (DFL_FPGA_FME_PORT_PR)
>  - Assign port to PF (DFL_FPGA_FME_PORT_ASSIGN)
>  - Release port from PF (DFL_FPGA_FME_PORT_RELEASE)
> +- Get number of irqs of FME global error (DFL_FPGA_FME_ERR_GET_IRQ_NUM)
> +- Set interrupt trigger for FME error (DFL_FPGA_FME_ERR_SET_IRQ)
>  
>  More functions are exposed through sysfs
>  (/sys/class/fpga_region/regionX/dfl-fme.n/):
> @@ -149,6 +151,10 @@ The following functions are exposed through ioctls:
>  - Map DMA buffer (DFL_FPGA_PORT_DMA_MAP)
>  - Unmap DMA buffer (DFL_FPGA_PORT_DMA_UNMAP)
>  - Reset AFU (DFL_FPGA_PORT_RESET)
> +- Get number of irqs of port error (DFL_FPGA_PORT_ERR_GET_IRQ_NUM)
> +- Set interrupt trigger for port error (DFL_FPGA_PORT_ERR_SET_IRQ)
> +- Get number of irqs of UINT (DFL_FPGA_PORT_UINT_GET_IRQ_NUM)
> +- Set interrupt trigger for UINT (DFL_FPGA_PORT_UINT_SET_IRQ)
>  
>  DFL_FPGA_PORT_RESET:
>reset the FPGA Port and its AFU. Userspace can do Port
> @@ -462,6 +468,19 @@ since they are system-wide counters on FPGA device.
>  The current driver does not support sampling. So "perf record" is 
> unsupported.
>  
>  
> +Interrupt support
> +=
> +Some FME and AFU private features are able to generate interrupts. As 
> mentioned
> +above, users could call ioctl (DFL_FPGA_*_GET_IRQ_NUM) to know whether or how
> +many interrupts are supported for this private feature. Drivers also 
> implement
> +an eventfd based interrupt handling mechanism for users to get notified when
> +interrupt happens. Users could set eventfds to driver via
> +ioctl (DFL_FPGA_*_SET_IRQ), and then poll/select on these eventfds waiting 
> for
> +notification.
> +In Current DFL, 3 sub features (Port error, FME global error and AFU 
> interrupt)
> +support interrupts.
> +
> +
>  Add new FIUs support
>  
>  It's possible that developers made some new function blocks (FIUs) under this
> -- 
> 2.7.4
> 

Applied to for-next,

Thanks!


Re: [PATCH v7 5/7] fpga: dfl: fme: add interrupt support for global error reporting

2020-07-06 Thread Moritz Fischer
On Tue, Jun 16, 2020 at 12:08:46PM +0800, Xu Yilun wrote:
> Error reporting interrupt is very useful to notify users that some
> errors are detected by the hardware. Once users are notified, they
> could query hardware logged error states, no need to continuously
> poll on these states.
> 
> This patch adds interrupt support for fme global error reporting sub
> feature. It follows the common DFL interrupt notification and handling
> mechanism. And it implements two ioctls below for user to query
> number of irqs supported, and set/unset interrupt triggers.
> 
>  Ioctls:
>  * DFL_FPGA_FME_ERR_GET_IRQ_NUM
>get the number of irqs, which is used to determine whether/how many
>interrupts fme error reporting feature supports.
> 
>  * DFL_FPGA_FME_ERR_SET_IRQ
>set/unset given eventfds as fme error reporting interrupt triggers.
> 
> Signed-off-by: Luwei Kang 
> Signed-off-by: Wu Hao 
> Signed-off-by: Xu Yilun 
> Reviewed-by: Marcelo Tosatti 
> Acked-by: Wu Hao 
> ---
> v2: use DFL_FPGA_FME_ERR_GET_IRQ_NUM instead of
> DFL_FPGA_FME_ERR_GET_INFO
> Delete flags field for DFL_FPGA_FME_ERR_SET_IRQ
> v3: put_user() instead of copy_to_user()
> improves comments
> v4: use common functions to handle irq ioctls
> v5: Minor fixes for Hao's comments
> v6: No change
> v7: No change
> ---
>  drivers/fpga/dfl-fme-error.c  | 18 ++
>  drivers/fpga/dfl-fme-main.c   |  6 ++
>  include/uapi/linux/fpga-dfl.h | 23 +++
>  3 files changed, 47 insertions(+)
> 
> diff --git a/drivers/fpga/dfl-fme-error.c b/drivers/fpga/dfl-fme-error.c
> index f897d41..51c2892 100644
> --- a/drivers/fpga/dfl-fme-error.c
> +++ b/drivers/fpga/dfl-fme-error.c
> @@ -15,6 +15,7 @@
>   *   Mitchel, Henry 
>   */
>  
> +#include 
>  #include 
>  
>  #include "dfl.h"
> @@ -348,6 +349,22 @@ static void fme_global_err_uinit(struct platform_device 
> *pdev,
>   fme_err_mask(>dev, true);
>  }
>  
> +static long
> +fme_global_error_ioctl(struct platform_device *pdev,
> +struct dfl_feature *feature,
> +unsigned int cmd, unsigned long arg)
> +{
> + switch (cmd) {
> + case DFL_FPGA_FME_ERR_GET_IRQ_NUM:
> + return dfl_feature_ioctl_get_num_irqs(pdev, feature, arg);
> + case DFL_FPGA_FME_ERR_SET_IRQ:
> + return dfl_feature_ioctl_set_irq(pdev, feature, arg);
> + default:
> + dev_dbg(>dev, "%x cmd not handled", cmd);
> + return -ENODEV;
> + }
> +}
> +
>  const struct dfl_feature_id fme_global_err_id_table[] = {
>   {.id = FME_FEATURE_ID_GLOBAL_ERR,},
>   {0,}
> @@ -356,4 +373,5 @@ const struct dfl_feature_id fme_global_err_id_table[] = {
>  const struct dfl_feature_ops fme_global_err_ops = {
>   .init = fme_global_err_init,
>   .uinit = fme_global_err_uinit,
> + .ioctl = fme_global_error_ioctl,
>  };
> diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
> index fc210d4..77ea04d 100644
> --- a/drivers/fpga/dfl-fme-main.c
> +++ b/drivers/fpga/dfl-fme-main.c
> @@ -620,11 +620,17 @@ static int fme_release(struct inode *inode, struct file 
> *filp)
>  {
>   struct dfl_feature_platform_data *pdata = filp->private_data;
>   struct platform_device *pdev = pdata->dev;
> + struct dfl_feature *feature;
>  
>   dev_dbg(>dev, "Device File Release\n");
>  
>   mutex_lock(>lock);
>   dfl_feature_dev_use_end(pdata);
> +
> + if (!dfl_feature_dev_use_count(pdata))
> + dfl_fpga_dev_for_each_feature(pdata, feature)
> + dfl_fpga_set_irq_triggers(feature, 0,
> +   feature->nr_irqs, NULL);
>   mutex_unlock(>lock);
>  
>   return 0;
> diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
> index 6c71c9d..b6495ea 100644
> --- a/include/uapi/linux/fpga-dfl.h
> +++ b/include/uapi/linux/fpga-dfl.h
> @@ -230,4 +230,27 @@ struct dfl_fpga_fme_port_pr {
>   */
>  #define DFL_FPGA_FME_PORT_ASSIGN _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 2, 
> int)
>  
> +/**
> + * DFL_FPGA_FME_ERR_GET_IRQ_NUM - _IOR(DFL_FPGA_MAGIC, DFL_FME_BASE + 3,
> + *   __u32 num_irqs)
> + *
> + * Get the number of irqs supported by the fpga fme error reporting private
> + * feature. Currently hardware supports up to 1 irq.
> + * Return: 0 on success, -errno on failure.
> + */
> +#define DFL_FPGA_FME_ERR_GET_IRQ_NUM _IOR(DFL_FPGA_MAGIC,\
> +  DFL_FME_BASE + 3, __u32)
> +
> +/**
> + * DFL_FPGA_FME_ERR_SET_IRQ - _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 4,
> + *   struct dfl_fpga_irq_set)
> + *
> + * Set fpga fme error reporting interrupt trigger if evtfds[n] is valid.
> + * Unset related interrupt trigger if evtfds[n] is a negative value.
> + * Return: 0 on success, -errno on failure.
> + */
> +#define DFL_FPGA_FME_ERR_SET_IRQ _IOW(DFL_FPGA_MAGIC,\
> +

Re: [PATCH v7 6/7] fpga: dfl: afu: add AFU interrupt support

2020-07-06 Thread Moritz Fischer
On Tue, Jun 16, 2020 at 12:08:47PM +0800, Xu Yilun wrote:
> AFU (Accelerated Function Unit) is dynamic region of the DFL based FPGA,
> and always defined by users. Some DFL based FPGA cards allow users to
> implement their own interrupts in AFU. In order to support this,
> hardware implements a new UINT (AFU Interrupt) private feature with
> related capability register which describes the number of supported
> AFU interrupts as well as the local index of the interrupts for
> software enumeration, and from software side, driver follows the common
> DFL interrupt notification and handling mechanism, and it implements
> two ioctls below for user to query number of irqs supported and set/unset
> interrupt triggers.
> 
>  Ioctls:
>  * DFL_FPGA_PORT_UINT_GET_IRQ_NUM
>get the number of irqs, which is used to determine how many interrupts
>UINT feature supports.
> 
>  * DFL_FPGA_PORT_UINT_SET_IRQ
>set/unset eventfds as AFU interrupt triggers.
> 
> Signed-off-by: Luwei Kang 
> Signed-off-by: Wu Hao 
> Signed-off-by: Xu Yilun 
> Reviewed-by: Marcelo Tosatti 
> Acked-by: Wu Hao 
> ---
> v2: use DFL_FPGA_PORT_UINT_GET_IRQ_NUM instead of
> DFL_FPGA_PORT_UINT_GET_INFO
> Delete flags field for DFL_FPGA_PORT_UINT_SET_IRQ
> v3: put_user() instead of copy_to_user()
> improves comments
> v4: use common functions to handle irq ioctls
> v5: Minor fixes for Hao's comments
> v6: No change
> v7: No change
> ---
>  drivers/fpga/dfl-afu-main.c   | 28 
>  include/uapi/linux/fpga-dfl.h | 23 +++
>  2 files changed, 51 insertions(+)
> 
> diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
> index 357cd5d..7c84fee 100644
> --- a/drivers/fpga/dfl-afu-main.c
> +++ b/drivers/fpga/dfl-afu-main.c
> @@ -529,6 +529,30 @@ static const struct dfl_feature_ops port_stp_ops = {
>   .init = port_stp_init,
>  };
>  
> +static long
> +port_uint_ioctl(struct platform_device *pdev, struct dfl_feature *feature,
> + unsigned int cmd, unsigned long arg)
> +{
> + switch (cmd) {
> + case DFL_FPGA_PORT_UINT_GET_IRQ_NUM:
> + return dfl_feature_ioctl_get_num_irqs(pdev, feature, arg);
> + case DFL_FPGA_PORT_UINT_SET_IRQ:
> + return dfl_feature_ioctl_set_irq(pdev, feature, arg);
> + default:
> + dev_dbg(>dev, "%x cmd not handled", cmd);
> + return -ENODEV;
> + }
> +}
> +
> +static const struct dfl_feature_id port_uint_id_table[] = {
> + {.id = PORT_FEATURE_ID_UINT,},
> + {0,}
> +};
> +
> +static const struct dfl_feature_ops port_uint_ops = {
> + .ioctl = port_uint_ioctl,
> +};
> +
>  static struct dfl_feature_driver port_feature_drvs[] = {
>   {
>   .id_table = port_hdr_id_table,
> @@ -547,6 +571,10 @@ static struct dfl_feature_driver port_feature_drvs[] = {
>   .ops = _stp_ops,
>   },
>   {
> + .id_table = port_uint_id_table,
> + .ops = _uint_ops,
> + },
> + {
>   .ops = NULL,
>   }
>  };
> diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
> index b6495ea..1621b07 100644
> --- a/include/uapi/linux/fpga-dfl.h
> +++ b/include/uapi/linux/fpga-dfl.h
> @@ -187,6 +187,29 @@ struct dfl_fpga_irq_set {
>DFL_PORT_BASE + 6, \
>struct dfl_fpga_irq_set)
>  
> +/**
> + * DFL_FPGA_PORT_UINT_GET_IRQ_NUM - _IOR(DFL_FPGA_MAGIC, DFL_PORT_BASE + 7,
> + *   __u32 num_irqs)
> + *
> + * Get the number of irqs supported by the fpga AFU interrupt private
> + * feature.
> + * Return: 0 on success, -errno on failure.
> + */
> +#define DFL_FPGA_PORT_UINT_GET_IRQ_NUM   _IOR(DFL_FPGA_MAGIC,\
> +  DFL_PORT_BASE + 7, __u32)
> +
> +/**
> + * DFL_FPGA_PORT_UINT_SET_IRQ - _IOW(DFL_FPGA_MAGIC, DFL_PORT_BASE + 8,
> + *   struct dfl_fpga_irq_set)
> + *
> + * Set fpga AFU interrupt trigger if evtfds[n] is valid.
> + * Unset related interrupt trigger if evtfds[n] is a negative value.
> + * Return: 0 on success, -errno on failure.
> + */
> +#define DFL_FPGA_PORT_UINT_SET_IRQ   _IOW(DFL_FPGA_MAGIC,\
> +  DFL_PORT_BASE + 8, \
> +  struct dfl_fpga_irq_set)
> +
>  /* IOCTLs for FME file descriptor */
>  
>  /**
> -- 
> 2.7.4
> 

Applied to for-next,

Thanks!


Re: [PATCH v7 4/7] fpga: dfl: afu: add interrupt support for port error reporting

2020-07-06 Thread Moritz Fischer
On Tue, Jun 16, 2020 at 12:08:45PM +0800, Xu Yilun wrote:
> Error reporting interrupt is very useful to notify users that some
> errors are detected by the hardware. Once users are notified, they
> could query hardware logged error states, no need to continuously
> poll on these states.
> 
> This patch adds interrupt support for port error reporting sub feature.
> It follows the common DFL interrupt notification and handling mechanism,
> implements two ioctl commands below for user to query number of irqs
> supported, and set/unset interrupt triggers.
> 
>  Ioctls:
>  * DFL_FPGA_PORT_ERR_GET_IRQ_NUM
>get the number of irqs, which is used to determine whether/how many
>interrupts error reporting feature supports.
> 
>  * DFL_FPGA_PORT_ERR_SET_IRQ
>set/unset given eventfds as error interrupt triggers.
> 
> Signed-off-by: Luwei Kang 
> Signed-off-by: Wu Hao 
> Signed-off-by: Xu Yilun 
> Reviewed-by: Marcelo Tosatti 
> Acked-by: Wu Hao 
> ---
> v2: use DFL_FPGA_PORT_ERR_GET_IRQ_NUM instead of
> DFL_FPGA_PORT_ERR_GET_INFO
> Delete flag field for DFL_FPGA_PORT_ERR_SET_IRQ param
> v3: put_user() instead of copy_to_user()
> improves comments
> v4: use common functions to handle irq ioctls
> v5: minor fixes for Hao's comments
> v6: No change
> v7: No change
> ---
>  drivers/fpga/dfl-afu-error.c  | 17 +
>  drivers/fpga/dfl-afu-main.c   |  4 
>  include/uapi/linux/fpga-dfl.h | 23 +++
>  3 files changed, 44 insertions(+)
> 
> diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
> index c1467ae..c469118 100644
> --- a/drivers/fpga/dfl-afu-error.c
> +++ b/drivers/fpga/dfl-afu-error.c
> @@ -14,6 +14,7 @@
>   *   Mitchel Henry 
>   */
>  
> +#include 
>  #include 
>  
>  #include "dfl-afu.h"
> @@ -219,6 +220,21 @@ static void port_err_uinit(struct platform_device *pdev,
>   afu_port_err_mask(>dev, true);
>  }
>  
> +static long
> +port_err_ioctl(struct platform_device *pdev, struct dfl_feature *feature,
> +unsigned int cmd, unsigned long arg)
> +{
> + switch (cmd) {
> + case DFL_FPGA_PORT_ERR_GET_IRQ_NUM:
> + return dfl_feature_ioctl_get_num_irqs(pdev, feature, arg);
> + case DFL_FPGA_PORT_ERR_SET_IRQ:
> + return dfl_feature_ioctl_set_irq(pdev, feature, arg);
> + default:
> + dev_dbg(>dev, "%x cmd not handled", cmd);
> + return -ENODEV;
> + }
> +}
> +
>  const struct dfl_feature_id port_err_id_table[] = {
>   {.id = PORT_FEATURE_ID_ERROR,},
>   {0,}
> @@ -227,4 +243,5 @@ const struct dfl_feature_id port_err_id_table[] = {
>  const struct dfl_feature_ops port_err_ops = {
>   .init = port_err_init,
>   .uinit = port_err_uinit,
> + .ioctl = port_err_ioctl,
>  };
> diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
> index b0c3178..357cd5d 100644
> --- a/drivers/fpga/dfl-afu-main.c
> +++ b/drivers/fpga/dfl-afu-main.c
> @@ -577,6 +577,7 @@ static int afu_release(struct inode *inode, struct file 
> *filp)
>  {
>   struct platform_device *pdev = filp->private_data;
>   struct dfl_feature_platform_data *pdata;
> + struct dfl_feature *feature;
>  
>   dev_dbg(>dev, "Device File Release\n");
>  
> @@ -586,6 +587,9 @@ static int afu_release(struct inode *inode, struct file 
> *filp)
>   dfl_feature_dev_use_end(pdata);
>  
>   if (!dfl_feature_dev_use_count(pdata)) {
> + dfl_fpga_dev_for_each_feature(pdata, feature)
> + dfl_fpga_set_irq_triggers(feature, 0,
> +   feature->nr_irqs, NULL);
>   __port_reset(pdev);
>   afu_dma_region_destroy(pdata);
>   }
> diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
> index 7331350..6c71c9d 100644
> --- a/include/uapi/linux/fpga-dfl.h
> +++ b/include/uapi/linux/fpga-dfl.h
> @@ -164,6 +164,29 @@ struct dfl_fpga_irq_set {
>   __s32 evtfds[];
>  };
>  
> +/**
> + * DFL_FPGA_PORT_ERR_GET_IRQ_NUM - _IOR(DFL_FPGA_MAGIC, DFL_PORT_BASE + 5,
> + *   __u32 num_irqs)
> + *
> + * Get the number of irqs supported by the fpga port error reporting private
> + * feature. Currently hardware supports up to 1 irq.
> + * Return: 0 on success, -errno on failure.
> + */
> +#define DFL_FPGA_PORT_ERR_GET_IRQ_NUM_IOR(DFL_FPGA_MAGIC,\
> +  DFL_PORT_BASE + 5, __u32)
> +
> +/**
> + * DFL_FPGA_PORT_ERR_SET_IRQ - _IOW(DFL_FPGA_MAGIC, DFL_PORT_BASE + 6,
> + *   struct dfl_fpga_irq_set)
> + *
> + * Set fpga port error reporting interrupt trigger if evtfds[n] is valid.
> + * Unset related interrupt trigger if evtfds[n] is a negative value.
> + * Return: 0 on success, -errno on failure.
> + */
> +#define DFL_FPGA_PORT_ERR_SET_IRQ_IOW(DFL_FPGA_MAGIC,\
> +  DFL_PORT_BASE + 6, \
> + 

Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver

2020-07-06 Thread Matthew Wilcox
On Mon, Jul 06, 2020 at 09:29:04PM -0700, Sean Christopherson wrote:
> > > > +   idx_start = PFN_DOWN(start);
> > > > +   idx_end = PFN_DOWN(end - 1);
> > > > +
> > > > +   for (idx = idx_start; idx <= idx_end; ++idx) {
> > > > +   mutex_lock(>lock);
> > > > +   page = radix_tree_lookup(>page_tree, idx);
> > > > +   mutex_unlock(>lock);
> > > > +
> > > > +   if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> > > > +   return -EACCES;
> > > 
> > > You should really use an iterator here instead of repeated lookups.
> > > xas_for_each() will probably be what you want.
> > 
> > Thank you for your remarks. I'll look into using xarray for this.
> 
> Question for Matthew:
> 
> To enforce the "page must be populated" rule, is there a clean way to retrieve
> the index of the current entry?  Our entries/pages don't have information
> about their index.  Or should we just count the number of entries and check
> 'em at the end? E.g.
> 
> xas_for_each(...) {
> if (~page->vm_max_prot_bits & vm_prot_bits)
> return -EACCES;
> nr_entries++;
> }
> 
> if (nr_entries != (end_index - start_index))
> return -EACCES;

Probably best just to steal the implementation from here:

pgoff_t page_cache_next_miss(struct address_space *mapping,
 pgoff_t index, unsigned long max_scan)
{
XA_STATE(xas, >i_pages, index);

while (max_scan--) {
void *entry = xas_next();
if (!entry || xa_is_value(entry))
break;
if (xas.xa_index == 0)
break;
}

return xas.xa_index;
}

although I think you have a simpler task.

XA_STATE(xas, ..., start_index);

for (;;) {
struct page *page = xas_next();

if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
return -EACCES;
}

return 0;

should do the trick, I think.


include/linux/atomic-fallback.h:439:9: sparse: sparse: context imbalance in 'btrfs_set_lock_blocking_read' - unexpected unlock

2020-07-06 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   bfe91da29bfad9941d5d703d45e29f0812a20724
commit: d6156218bec93965b6a43ba2686ad962ce77c854 btrfs: make locking assertion 
helpers static inline
date:   8 months ago
config: ia64-randconfig-s031-20200707 (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.2-31-gabbfd661-dirty
git checkout d6156218bec93965b6a43ba2686ad962ce77c854
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=ia64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)

>> include/linux/atomic-fallback.h:439:9: sparse: sparse: context imbalance in 
>> 'btrfs_set_lock_blocking_read' - unexpected unlock
   fs/btrfs/locking.c:25:9: sparse: sparse: context imbalance in 
'btrfs_set_lock_blocking_write' - unexpected unlock
   fs/btrfs/locking.c:127:6: sparse: sparse: context imbalance in 
'btrfs_tree_read_lock' - different lock contexts for basic block
   fs/btrfs/locking.c:166:5: sparse: sparse: context imbalance in 
'btrfs_tree_read_lock_atomic' - different lock contexts for basic block
   fs/btrfs/locking.c:186:5: sparse: sparse: context imbalance in 
'btrfs_try_tree_read_lock' - different lock contexts for basic block
   fs/btrfs/locking.c:208:5: sparse: sparse: context imbalance in 
'btrfs_try_tree_write_lock' - different lock contexts for basic block
>> include/linux/atomic-fallback.h:439:9: sparse: sparse: context imbalance in 
>> 'btrfs_tree_read_unlock' - unexpected unlock
   fs/btrfs/locking.c:19:9: sparse: sparse: context imbalance in 
'btrfs_tree_lock' - wrong count at exit
   fs/btrfs/locking.c:25:9: sparse: sparse: context imbalance in 
'btrfs_tree_unlock' - unexpected unlock

vim +/btrfs_set_lock_blocking_read +439 include/linux/atomic-fallback.h

9fa45070a2e59a Mark Rutland 2018-09-04  434  
9fa45070a2e59a Mark Rutland 2018-09-04  435  #ifndef atomic_dec
9fa45070a2e59a Mark Rutland 2018-09-04  436  static inline void
9fa45070a2e59a Mark Rutland 2018-09-04  437  atomic_dec(atomic_t *v)
9fa45070a2e59a Mark Rutland 2018-09-04  438  {
9fa45070a2e59a Mark Rutland 2018-09-04 @439 atomic_sub(1, v);
9fa45070a2e59a Mark Rutland 2018-09-04  440  }
9fa45070a2e59a Mark Rutland 2018-09-04  441  #define atomic_dec atomic_dec
9fa45070a2e59a Mark Rutland 2018-09-04  442  #endif
9fa45070a2e59a Mark Rutland 2018-09-04  443  

:: The code at line 439 was first introduced by commit
:: 9fa45070a2e59a871e1cd3370173369f3a4f61e2 locking/atomics: Switch to 
generated fallbacks

:: TO: Mark Rutland 
:: CC: Ingo Molnar 

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


BE MY PARTNER IN THIS BUSINESS

2020-07-06 Thread Mr Suleman Bello
Dear Friend,

Please i want you to read this letter very carefully and i must
apologize for berging this message into your mail box without any
formal introduction due to the urgency and confidential of this issue
and i know that this message will come to you as a surprise, Please
this is not a joke and i will not like you to joke with it.I am
Mr.Suleman Bello, a staff in African Development Bank (A.D.B)
Ouagadougou, Burkina faso West Africa.I discovered existing dormant
account for years. When I discovered that there had been neither
continuation nor withdrawals from this account for this long period
and according to the laws and constitution guiding this banking
institution, any unserviceable account for more than (7) seven years,
that fund will be transferred to national treasury as unclaimed fund.

I Hoped that you will not expose or betray this trust and confident
that i am about to extablish with you for the mutual benefit of you
and i,I need your urgent assistance in transferring the sum of $10.5
)million usd into your account within 7 banking days. This money has
been dormant for years in our Bank, and The request of foreigner in
this transaction is necessary because our late customer was a
foreigner and a burkinabe cannot stand as next of kin to a
foreigner.Because of the static of this transaction I want you to
stand as the next of kin so that our bank will accord you the
recognition and have the fund transferred to your account.

Upon your response, I shall then provide you with further information
and more deities that will help you understand the transaction. I am
expecting your urgent response to enable me inform you on how the
business will be executed. Please I would like you to keep this
transaction confidential and as a top secret or delete if you are not
interested.

Thanks
Mr.Suleman Bello.


[PATCH] Documentation: kunit: Remove references to --defconfig

2020-07-06 Thread David Gow
The --defconfig option in kunit_tool was removed in [1], but the getting
started and kunit_tool documentation still encouraged its use. Update
those documents to reflect that it's no-longer required, and is the
default behaviour if no .kunitconfig is found.

Also update a couple of places where .kunitconfig is still referred to
as kunitconfig (this was changed in [2]).

[1]:
https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?h=kunit-fixes=9bdf64b35117cc10813d24e1842cd8ee40ecbf19
[2]:
https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?h=kunit-fixes=14ee5cfd4512ee3d1e0047d8751450dcc6544070

Signed-off-by: David Gow 
---
 Documentation/dev-tools/kunit/kunit-tool.rst | 17 +
 Documentation/dev-tools/kunit/start.rst  |  2 +-
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/Documentation/dev-tools/kunit/kunit-tool.rst 
b/Documentation/dev-tools/kunit/kunit-tool.rst
index 949af2da81e5..29ae2fee8123 100644
--- a/Documentation/dev-tools/kunit/kunit-tool.rst
+++ b/Documentation/dev-tools/kunit/kunit-tool.rst
@@ -19,13 +19,13 @@ compiles the kernel as a standalone Linux executable that 
can be run like any
 other program directly inside of a host operating system. To be clear, it does
 not require any virtualization support: it is just a regular program.
 
-What is a kunitconfig?
-==
+What is a .kunitconfig?
+===
 
 It's just a defconfig that kunit_tool looks for in the base directory.
 kunit_tool uses it to generate a .config as you might expect. In addition, it
 verifies that the generated .config contains the CONFIG options in the
-kunitconfig; the reason it does this is so that it is easy to be sure that a
+.kunitconfig; the reason it does this is so that it is easy to be sure that a
 CONFIG that enables a test actually ends up in the .config.
 
 How do I use kunit_tool?
@@ -46,16 +46,9 @@ However, you most likely want to use it with the following 
options:
 - ``--timeout`` sets a maximum amount of time to allow tests to run.
 - ``--jobs`` sets the number of threads to use to build the kernel.
 
-If you just want to use the defconfig that ships with the kernel, you can
-append the ``--defconfig`` flag as well:
-
-.. code-block:: bash
-
-   ./tools/testing/kunit/kunit.py run --timeout=30 --jobs=`nproc --all` 
--defconfig
-
 .. note::
-   This command is particularly helpful for getting started because it
-   just works. No kunitconfig needs to be present.
+   This command will work even without a .kunitconfig file: if no
+.kunitconfig is present, a default one will be used instead.
 
 For a list of all the flags supported by kunit_tool, you can run:
 
diff --git a/Documentation/dev-tools/kunit/start.rst 
b/Documentation/dev-tools/kunit/start.rst
index bb112cf70624..d23385e3e159 100644
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -18,7 +18,7 @@ The wrapper can be run with:
 
 .. code-block:: bash
 
-   ./tools/testing/kunit/kunit.py run --defconfig
+   ./tools/testing/kunit/kunit.py run
 
 For more information on this wrapper (also called kunit_tool) check out the
 :doc:`kunit-tool` page.
-- 
2.27.0.212.ge8ba1cc988-goog



Re: [PATCH] CodingStyle: Inclusive Terminology

2020-07-06 Thread Dan Williams
On Sat, Jul 4, 2020 at 5:41 PM Kees Cook  wrote:
>
> On Sat, Jul 04, 2020 at 01:02:51PM -0700, Dan Williams wrote:
> > Recent events have prompted a Linux position statement on inclusive
> > terminology. Given that Linux maintains a coding-style and its own
> > idiomatic set of terminology here is a proposal to answer the call to
> > replace non-inclusive terminology.
> >
> > Cc: Jonathan Corbet 
> > Cc: Kees Cook 
> > Signed-off-by: Chris Mason 
> > Signed-off-by: Greg Kroah-Hartman 
> > Signed-off-by: Dan Williams 
>
> (nit: isn't this a Co-developed-by chain, not a SoB chain?)
>
> Acked-by: Kees Cook 
>
> Comments below...
>
> > ---
> >  Documentation/process/coding-style.rst  |   12 
> >  Documentation/process/inclusive-terminology.rst |   64 
> > +++
> >  Documentation/process/index.rst |1
> >  3 files changed, 77 insertions(+)
> >  create mode 100644 Documentation/process/inclusive-terminology.rst
> >
> > diff --git a/Documentation/process/coding-style.rst 
> > b/Documentation/process/coding-style.rst
> > index 2657a55c6f12..4b15ab671089 100644
> > --- a/Documentation/process/coding-style.rst
> > +++ b/Documentation/process/coding-style.rst
> > @@ -319,6 +319,18 @@ If you are afraid to mix up your local variable names, 
> > you have another
> >  problem, which is called the function-growth-hormone-imbalance syndrome.
> >  See chapter 6 (Functions).
> >
> > +For symbol names, avoid introducing new usage of the words 'slave' and
> > +'blacklist'. Recommended replacements for 'slave' are: 'secondary',
> > +'subordinate', 'replica', 'responder', 'follower', 'proxy', or
> > +'performer'.  Recommended replacements for blacklist are: 'blocklist' or
> > +'denylist'.
>
> Keeping "master" in a "master/slave" pairing (i.e. replacing only
> "slave") seems incomplete to me. If "master" is paired with "slave", it
> should be replaced too. Potential examples: 'primary', 'leader', 'principle',
> 'controller', 'sender', 'initial'.

Yes, this matches Andy's feedback, will add.

> Similarly, for "whitelist/blacklist", "whitelist" needs to replaced when
> "blacklist" has been. For example, seccomp documentation[1] uses
> "allow-list" and "deny-list".
>
> [1] https://man7.org/linux/man-pages/man2/seccomp.2.html

Oh, good to know will make that change.

> > +Exceptions for introducing new usage is to maintain a userspace ABI, or
>
> and API?

True, yes, the intent was "don't break userspace" for terminology replacement.

>
> > +when updating code for an existing (as of 2020) hardware or protocol
> > +specification that mandates those terms. For new specifications consider
> > +translating specification usage of the terminology to the kernel coding
> > +standard where possible. See :ref:`process/inclusive-terminology.rst
> > +` for details.
>
> Let's add:
>
>  Where possible, old instances of this language should be replaced when
>  it is not tied to external specifications nor userspace ABI/API.

Sounds good to me.

>
> >
> >  5) Typedefs
> >  ---
> > diff --git a/Documentation/process/inclusive-terminology.rst 
> > b/Documentation/process/inclusive-terminology.rst
> > new file mode 100644
> > index ..a8eb26690eb4
> > --- /dev/null
> > +++ b/Documentation/process/inclusive-terminology.rst
> > @@ -0,0 +1,64 @@
> > +.. _inclusiveterminology:
> > +
> > +Linux kernel inclusive terminology
> > +==
> > +
> > +The Linux kernel is a global software project, and in 2020 there was a
> > +global reckoning on race relations that caused many organizations to
> > +re-evaluate their policies and practices relative to the inclusion of
> > +people of African descent. This document describes why the 'Naming'
> > +section in :ref:`process/coding-style.rst ` recommends
> > +avoiding usage of 'slave' and 'blacklist' in new additions to the Linux
> > +kernel.
>
> ... usage of 'master/slave', 'slave', 'whitelist/blacklist', and
> 'blacklist' in the Linux kernel.

Yes, but as I'm reading this thread backwards I've already agreed to
just push the coding-style change in isolation.

>
> > +
> > +On the triviality of replacing words
> > +
> > +
> > +The African slave trade was a brutal system of human misery deployed at
> > +global scale. Some word choice decisions in a modern software project
> > +does next to nothing to compensate for that legacy. So why put any
> > +effort into something so trivial in comparison? Because the goal is not
> > +to repair, or erase the past. The goal is to maximize availability and
> > +efficiency of the global developer community to participate in the Linux
> > +kernel development process.
> > +
> > +Word choice and developer efficiency
> > +
> > +
> > +Why does any software project go through the trouble of developing a
> > +document like :ref:`process/coding-style.rst `? It does so
> > +because a common coding style maximizes the efficiency 

Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver

2020-07-06 Thread Sean Christopherson
Man, I really need to type faster.

On Tue, Jul 07, 2020 at 07:11:51AM +0300, Jarkko Sakkinen wrote:
> On Tue, Jul 07, 2020 at 04:36:17AM +0100, Matthew Wilcox wrote:
> > What's a leaf function?  Is it like a CPU instruction?
> 
> Yeah, the opcode is ENCLS for ring-0 (enclave management and
> construction) and ENCLU for ring-3 (entrance to the enclave etc).
> The leaf function number goes to EAX.

To add to Jarkko's comments, for all intents and purposes they are individual
instructions, e.g. all of their own entries in the SDM, but are buried behind
a single opcode that switches on EAX, e.g. ECREATE is EAX=0,  EADD is EAX=1,
EINIT is EAX=2.  It's purely a way to save opcode space when the extra
overhead is a non-issue, e.g. SMX/TXT's GETSEC does the same shenanigans.

> > > + atomic_set(>flags, 0);
> > > + kref_init(>refcount);
> > > + INIT_RADIX_TREE(>page_tree, GFP_KERNEL);
> > 
> > Why are you using a radix tree instead of an xarray?
> 
> Because xarray did not exist in 2017 and nobody has pointed out to use
> it. Now I know it exists (yet do not know what it is).

I've followed xarrays a little, but obviously not closely enough to
understand their advantages over radix trees.  At a glance, range-based
iteration alone is probably justification enough to switch.

> > > +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
> > > +  unsigned long end, unsigned long vm_prot_bits)
> > > +{
> > > + unsigned long idx, idx_start, idx_end;
> > > + struct sgx_encl_page *page;
> > > +
> > > + /*
> > > +  * Disallow RIE tasks as their VMA permissions might conflict with the
> > > +  * enclave page permissions.
> > > +  */
> > > + if (!!(current->personality & READ_IMPLIES_EXEC))
> > > + return -EACCES;
> > > +
> > > + idx_start = PFN_DOWN(start);
> > > + idx_end = PFN_DOWN(end - 1);
> > > +
> > > + for (idx = idx_start; idx <= idx_end; ++idx) {
> > > + mutex_lock(>lock);
> > > + page = radix_tree_lookup(>page_tree, idx);
> > > + mutex_unlock(>lock);
> > > +
> > > + if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> > > + return -EACCES;
> > 
> > You should really use an iterator here instead of repeated lookups.
> > xas_for_each() will probably be what you want.
> 
> Thank you for your remarks. I'll look into using xarray for this.

Question for Matthew:

To enforce the "page must be populated" rule, is there a clean way to retrieve
the index of the current entry?  Our entries/pages don't have information
about their index.  Or should we just count the number of entries and check
'em at the end? E.g.

xas_for_each(...) {
if (~page->vm_max_prot_bits & vm_prot_bits)
return -EACCES;
nr_entries++;
}

if (nr_entries != (end_index - start_index))
return -EACCES;


Re: [PATCH 1/3] spi: spi-qcom-qspi: Use OPP API to set clk/perf state

2020-07-06 Thread Rajendra Nayak



On 7/3/2020 10:31 PM, Mark Brown wrote:

On Fri, Jul 03, 2020 at 03:11:31PM +0530, Rajendra Nayak wrote:

QSPI needs to vote on a performance state of a power domain depending on
the clock rate. Add support for it by specifying the perf state/clock rate
as an OPP table in device tree.


This doesn't apply against current code, please check and resend.


Hey Mark, as mentioned in the cover letter I wanted this to land via the
qcom tree, since Bjorn already has a patch in his tree which would otherwise
conflict with this change, if you were to pull this.
Hence I had this rebased on qcom for-next and requested Bjorn to pull it in,
with your ACK. Hope thats fine with you.

thanks,
Rajendra

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


[GIT PULL] soundwire: fixes for 5.8

2020-07-06 Thread Vinod Koul
Hi Greg,

Please pull to receive a single fix for Intel driver

The following changes since commit b3a9e3b9622ae10064826dccb4f7a52bd88c7407:

  Linux 5.8-rc1 (2020-06-14 12:45:04 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git 
tags/soundwire-5.8-fixes

for you to fetch changes up to bf6d6e68d2028a2d82f4c106f50ec75cc1e6ef89:

  soundwire: intel: fix memory leak with devm_kasprintf (2020-06-22 17:15:20 
+0530)


soundwire fixes for v5.8

-  Intel driver memory leak fix


Pierre-Louis Bossart (1):
  soundwire: intel: fix memory leak with devm_kasprintf

 drivers/soundwire/intel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
-- 
~Vinod


signature.asc
Description: PGP signature


Re: [PATCH v2 2/4] drm/msm: dsi: Use OPP API to set clk/perf state

2020-07-06 Thread Rajendra Nayak




On 7/6/2020 9:40 PM, Matthias Kaehlcke wrote:

On Thu, Jul 02, 2020 at 04:39:09PM +0530, Rajendra Nayak wrote:

On SDM845 and SC7180 DSI needs to express a performance state
requirement on a power domain depending on the clock rates.
Use OPP table from DT to register with OPP framework and use
dev_pm_opp_set_rate() to set the clk/perf state.

dev_pm_opp_set_rate() is designed to be equivalent to clk_set_rate()
for devices without an OPP table, hence the change works fine
on devices/platforms which only need to set a clock rate.

Signed-off-by: Rajendra Nayak 
---
  drivers/gpu/drm/msm/dsi/dsi_host.c | 26 --
  1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c 
b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 11ae5b8..09e16b8 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -14,6 +14,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -111,6 +112,9 @@ struct msm_dsi_host {
struct clk *pixel_clk_src;
struct clk *byte_intf_clk;
  
+	struct opp_table *opp_table;

+   bool has_opp_table;
+
u32 byte_clk_rate;
u32 pixel_clk_rate;
u32 esc_clk_rate;
@@ -512,9 +516,10 @@ int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host)
DBG("Set clk rates: pclk=%d, byteclk=%d",
msm_host->mode->clock, msm_host->byte_clk_rate);
  
-	ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate);

+   ret = dev_pm_opp_set_rate(_host->pdev->dev,
+ msm_host->byte_clk_rate);
if (ret) {
-   pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret);
+   pr_err("%s: dev_pm_opp_set_rate failed %d\n", __func__, ret);
return ret;
}
  
@@ -658,6 +663,8 @@ int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host)
  
  void dsi_link_clk_disable_6g(struct msm_dsi_host *msm_host)

  {
+   /* Drop the performance state vote */
+   dev_pm_opp_set_rate(_host->pdev->dev, 0);
clk_disable_unprepare(msm_host->esc_clk);
clk_disable_unprepare(msm_host->pixel_clk);
if (msm_host->byte_intf_clk)
@@ -1879,6 +1886,18 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
goto fail;
}
  
+	msm_host->opp_table = dev_pm_opp_set_clkname(>dev, "byte");

+   if (IS_ERR(msm_host->opp_table))
+   return PTR_ERR(msm_host->opp_table);
+   /* OPP table is optional */
+   ret = dev_pm_opp_of_add_table(>dev);
+   if (!ret) {
+   msm_host->has_opp_table = true;
+   } else if (ret != -ENODEV) {
+   dev_err(>dev, "invalid OPP table in device tree\n");


dev_pm_opp_put_clkname(msm_host->opp_table);


+   return ret;
+   }


With the missing _put_clkname() fixed:


Thanks, I'll fix and resend.



Reviewed-by: Matthias Kaehlcke 



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


[GIT PULL] PHY: fixes for 5.8

2020-07-06 Thread Vinod Koul
Hi Greg,

Please pull to receive fixes for phy for 5.8. Kishon is bit busy and has
asked me to send the PR for phy this time around.

The following changes since commit b3a9e3b9622ae10064826dccb4f7a52bd88c7407:

  Linux 5.8-rc1 (2020-06-14 12:45:04 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git 
tags/phy-fixes-5.8

for you to fetch changes up to 38b1927e5bf9bcad4a2e33189ef1c5569f9599ba:

  phy: sun4i-usb: fix dereference of pointer phy0 before it is null checked 
(2020-06-25 18:25:37 +0530)


phy: fixes for 5.8

*) Fix for intel combo driver for warns or errors
*) Constify symbols for am654-serdes & j721e-wiz
*) Return value fix for rockchip driver
*) Null pointer dereference fix for sun4i-usb


Arnd Bergmann (1):
  phy: intel: fix enum type mismatch warning

Colin Ian King (1):
  phy: sun4i-usb: fix dereference of pointer phy0 before it is null checked

Dilip Kota (1):
  phy: intel: Fix compilation error on FIELD_PREP usage

Rikard Falkeborn (2):
  phy: ti: am654-serdes: Constify regmap_config
  phy: ti: j721e-wiz: Constify structs

Tiezhu Yang (1):
  phy: rockchip: Fix return value of inno_dsidphy_probe()

 drivers/phy/allwinner/phy-sun4i-usb.c|  5 +++--
 drivers/phy/intel/phy-intel-combo.c  | 14 --
 drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c |  4 ++--
 drivers/phy/ti/phy-am654-serdes.c|  2 +-
 drivers/phy/ti/phy-j721e-wiz.c   | 10 +-
 5 files changed, 19 insertions(+), 16 deletions(-)

Thanks
-- 
~Vinod


signature.asc
Description: PGP signature


Re: [Ksummit-discuss] [PATCH] CodingStyle: Inclusive Terminology

2020-07-06 Thread Dan Williams
On Mon, Jul 6, 2020 at 9:07 AM Mike Rapoport  wrote:
>
> Hi Chris,
>
> On Mon, Jul 06, 2020 at 12:45:34PM +, Chris Mason via Ksummit-discuss 
> wrote:
> > On 5 Jul 2020, at 0:55, Willy Tarreau wrote:
> >
> > > On Sat, Jul 04, 2020 at 01:02:51PM -0700, Dan Williams wrote:
> > >> +Non-inclusive terminology has that same distracting effect which is
> > >> why
> > >> +it is a style issue for Linux, it injures developer efficiency.
> > >
> > > I'm personally thinking that for a non-native speaker it's already
> > > difficult to find the best term to describe something, but having to
> > > apply an extra level of filtering on the found words to figure whether
> > > they are allowed by the language police is even more difficult.
> >
> > Since our discussions are public, we’ve always had to deal with
> > comments from people outside the community on a range of topics.  But
> > inside the kernel, it’s just a group of developers trying to help each
> > other produce the best quality of code.  We’ve got a long history
> > together and in general I think we’re pretty good at assuming good
> > intent.
>
> I don't think anybody doubts your intentions. But they say, the road to
> hell is paved with good intentions.
>
> I had a "privilege" to live in the USSR and back there Newspeak was not a
> fiction but a reality.
>
> And despite the good intent, I have a really strong feeling that this
> could be a step in a wrong direction...

I've experienced some professional training classes for visiting other
countries and they tell you helpful things like "avoid making jokes
about X" or "Y topic is sensitive". It's not about censoring it's more
about how to keep discussion focused on the job at hand. So I'm hoping
this is more of the mundane advice of "what's the best way to
communicate my point efficiently to the widest possible audience" and
not a "step in a wrong direction"... time will tell.


Re: [PATCH v6 2/4] driver core: add deferring probe reason to devices_deferred property

2020-07-06 Thread Dmitry Torokhov
On Thu, Jul 02, 2020 at 08:57:55AM +0200, Andrzej Hajda wrote:
> 
> On 30.06.2020 20:00, Dmitry Torokhov wrote:
> > On Tue, Jun 30, 2020 at 8:42 AM Andrzej Hajda  wrote:
> >>
> >> On 30.06.2020 10:59, Grygorii Strashko wrote:
> >>> Hi
> >>>
> >>> On 29/06/2020 14:28, Andrzej Hajda wrote:
>  Hi Grygorii,
> 
>  (...)
> 
> >> /*
> >>  * deferred_devs_show() - Show the devices in the deferred probe
> >> pending list.
> >>  */
> >> @@ -221,7 +241,8 @@ static int deferred_devs_show(struct seq_file *s,
> >> void *data)
> >> mutex_lock(_probe_mutex);
> >>   list_for_each_entry(curr, _probe_pending_list,
> >> deferred_probe)
> >> -seq_printf(s, "%s\n", dev_name(curr->device));
> >> +seq_printf(s, "%s\t%s", dev_name(curr->device),
> >> + curr->device->p->deferred_probe_reason ?: "\n");
> >>   mutex_unlock(_probe_mutex);
> >>
> > Sry, may be i missing smth, but shouldn't it be optional
> > (CONFIG_DEBUG_FS is probably too generic).
> >
>  I am not sure what exactly are you referring to, but this patch does not
>  add new property, it just extends functionality of existing one.
> >>> Sry, needed to be more specific.
> >>>
> >>> You've added  device_set_deferred_probe_reson(dev, );
> >>> which expected to be used on every EPROBE_DEFER in dev_err_probe() in
> >>> combination with
> >>>
> >>> +   } else {
> >>> +   device_set_deferred_probe_reson(dev, );
> >>>  dev_dbg(dev, "error %d: %pV", err, );
> >>>
> >>> ^^ dev_dbg() does not add any runtime overhead during boot unless enabled
> >>> +   }
> >>>
> >>> But:
> >>>
> >>> +void device_set_deferred_probe_reson(const struct device *dev, struct
> >>> va_format *vaf)
> >>> +{
> >>> +   const char *drv = dev_driver_string(dev);
> >>> +
> >>> +   mutex_lock(_probe_mutex);
> >>> +
> >>> +   kfree(dev->p->deferred_probe_reason);
> >>> +   dev->p->deferred_probe_reason = kasprintf(GFP_KERNEL, "%s:
> >>> %pV", drv, vaf);
> >>> +
> >>> +   mutex_unlock(_probe_mutex);
> >>> +}
> >>>
> >>> ^^ Adds locking, kfree() and kasprintf() for every deferred probe
> >>> during boot and can't be disabled.
> >>>
> >>> Right?
> >>
> >> Right, but usually the burden should be insignificant in comparison to
> >> probe time, so I do not think it is worth optimizing.
> > I do not think this is going to take. You are suggesting that we
> > modify pretty much every driver to supply this deferral reason, and I
> > doubt it will happen. Can we put this burden on providers that raise
> > the deferral?
> 
> 
> I wouldn't say they raise the deferral, they just inform resource is not 
> yet available. Only device driver, and only in its probe function can 
> "raise the deferral".

Well, this is a matter of perspective. If devm_gpiod_get() returns
-EBUSY and this is returned to driver core, is it GPIO line signals that
line is busy, or is it the driver applies its knowledge. I say that in
majority of cases driver does not really get a say in this and simply
has to pass whatever error condition that is signalled by providers up
the stack.

I would consider whenever a driver does not propagate -EPROBE_DEFER to
the driver code a bug that needs fixing, because it should not degrade
functionality and/or performance just because we have not figured out
how to order probing properly and have to rely on deferrals.

> 
> 
> >   I.e. majority of code are using devm API now, so we most
> > likely know the device for which deferral is being raised. We can have
> > a list of deferral reasons and their devices and when in device code
> > once probe is done we could try reconciling it with the deferred
> > devicelist, and this would mean you only need to implement this in
> > gpiolib, regulator core, clocks core, etc.
> 
> 
> This patchset tries to solve simple issue - replace multiple lines of 
> code present in multiple probe functions (additionally fixing lot of 
> them) with single call and then enhance it little bit, nothing more.
> 
> What you are proposing is blurry at the moment for me, provider does not 
> know if consumer want to defer,

This is my point - the consumer does not get to decide. If deferral is
raised, it must be honored.

> or will continue working without missing resource,

Deferral does not mean resource does not exist and the driver has to get
by if it can. It means the resource is not ready, and even if the system
can work without it, it will not be working optimally.

> moreover some consumers can acquire resources after probe - again no
> probe deferral.

In this case we should not signal deferral either.

> Even if it will be done (it can be, for 
> example by creating probe version of all resource get functions), it 
> will require much more changes but finally it will look like:
> 
> res = devm_get_resource_from_probe()
> 
> if (IS_ERR(res))
> 
>      return PTR_ERR(res);
> 
> vs:
> 
> res 

Re: [PATCH v34 11/24] x86/sgx: Add SGX enclave driver

2020-07-06 Thread Jarkko Sakkinen
On Tue, Jul 07, 2020 at 04:36:17AM +0100, Matthew Wilcox wrote:
> On Tue, Jul 07, 2020 at 06:01:51AM +0300, Jarkko Sakkinen wrote:
> > Intel Software Guard eXtensions (SGX) is a set of CPU instructions that
> > can be used by applications to set aside private regions of code and
> > data. The code outside the SGX hosted software entity is disallowed to
> 
> s/disallowed to/prevented from/
> 
> > access the memory inside the enclave enforced by the CPU. We call these
> 
> s/enforced//
> 
> > entities enclaves.
> > 
> > Add a driver that provides an ioctl API to construct and run enclaves.
> > Enclaves are constructed from pages residing in reserved physical memory
> > areas. The contents of these pages can only be accessed when they are
> > mapped as part of an enclave, by a hardware thread running inside the
> > enclave.
> > 
> > The starting state of an enclave consists of a fixed measured set of
> > pages that are copied to the EPC during the construction process by
> > using ENCLS leaf functions and Software Enclave Control Structure (SECS)
> > that defines the enclave properties.
> > 
> > Enclaves are constructed by using ENCLS leaf functions ECREATE, EADD and
> > EINIT. ECREATE initializes SECS, EADD copies pages from system memory to
> > the EPC and EINIT checks a given signed measurement and moves the enclave
> > into a state ready for execution.
> 
> What's a leaf function?  Is it like a CPU instruction?

Yeah, the opcode is ENCLS for ring-0 (enclave management and
construction) and ENCLU for ring-3 (entrance to the enclave etc).
The leaf function number goes to EAX.

> 
> > The mmap() permissions are capped by the contained enclave page
> > permissions. The mapped areas must also be opaque, i.e. each page address
> > must contain a page. This logic is implemented in sgx_encl_may_map().
> 
> do you mean "populated" instead of "opaque"?

Yes, that would be a better word to use. I'll change this.

> 
> > +   atomic_set(>flags, 0);
> > +   kref_init(>refcount);
> > +   INIT_RADIX_TREE(>page_tree, GFP_KERNEL);
> 
> Why are you using a radix tree instead of an xarray?

Because xarray did not exist in 2017 and nobody has pointed out to use
it. Now I know it exists (yet do not know what it is).

> 
> > +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
> > +unsigned long end, unsigned long vm_prot_bits)
> > +{
> > +   unsigned long idx, idx_start, idx_end;
> > +   struct sgx_encl_page *page;
> > +
> > +   /*
> > +* Disallow RIE tasks as their VMA permissions might conflict with the
> > +* enclave page permissions.
> > +*/
> > +   if (!!(current->personality & READ_IMPLIES_EXEC))
> > +   return -EACCES;
> > +
> > +   idx_start = PFN_DOWN(start);
> > +   idx_end = PFN_DOWN(end - 1);
> > +
> > +   for (idx = idx_start; idx <= idx_end; ++idx) {
> > +   mutex_lock(>lock);
> > +   page = radix_tree_lookup(>page_tree, idx);
> > +   mutex_unlock(>lock);
> > +
> > +   if (!page || (~page->vm_max_prot_bits & vm_prot_bits))
> > +   return -EACCES;
> 
> You should really use an iterator here instead of repeated lookups.
> xas_for_each() will probably be what you want.

Thank you for your remarks. I'll look into using xarray for this.

/Jarkko


Re: [PATCH v34 10/24] mm: Add vm_ops->mprotect()

2020-07-06 Thread Matthew Wilcox
On Tue, Jul 07, 2020 at 07:01:51AM +0300, Jarkko Sakkinen wrote:
> On Tue, Jul 07, 2020 at 04:24:08AM +0100, Matthew Wilcox wrote:
> > On Mon, Jul 06, 2020 at 08:22:54PM -0700, Sean Christopherson wrote:
> > > On Tue, Jul 07, 2020 at 04:14:24AM +0100, Matthew Wilcox wrote:
> > > > > + if (vma->vm_ops && vma->vm_ops->mprotect) {
> > > > > + error = vma->vm_ops->mprotect(vma, nstart, tmp, 
> > > > > prot);
> > > > > + if (error)
> > > > > + goto out;
> > > > > + }
> > > 
> > > Based on "... and then the vma owner can do whatever it needs to before
> > > calling mprotect_fixup(), which is already not static", my interpretation
> > > is that Matthew's intent was to do:
> > > 
> > >   if (vma->vm_ops && vma->vm_ops->mprotect)
> > >   error =  = vma->vm_ops->mprotect(vma, nstart, tmp, 
> > > prot);
> > >   else
> > >   error = mprotect_fixup(vma, , nstart, tmp, 
> > > newflags);
> > >   if (error)
> > >   goto out;
> > > 
> > > i.e. make .mprotect() a full replacement as opposed to a prereq hook.
> > 
> > Yes, it was.  I was just looking at the next patch to be sure this was
> > how I'd been misunderstood.
> 
> I'm don't get this part. If mprotect_fixup is called in the tail of the
> callback, why it has to be called inside the callback and not be called
> after the callback?

Because that's how every other VM operation works.  Look at your
implementation of get_unmapped_area() for example.



Re: [PATCH v9 2/2] tpm: Add support for event log pointer found in TPM2 ACPI table

2020-07-06 Thread Stefan Berger

On 7/7/20 12:03 AM, Jarkko Sakkinen wrote:

On Mon, Jul 06, 2020 at 11:08:12PM -0400, Stefan Berger wrote:

On 7/6/20 10:24 PM, Jarkko Sakkinen wrote:

On Mon, Jul 06, 2020 at 07:55:26PM -0400, Stefan Berger wrote:

On 7/6/20 7:09 PM, Jarkko Sakkinen wrote:

On Mon, Jul 06, 2020 at 02:19:53PM -0400, Stefan Berger wrote:

From: Stefan Berger 

In case a TPM2 is attached, search for a TPM2 ACPI table when trying
to get the event log from ACPI. If one is found, use it to get the
start and length of the log area. This allows non-UEFI systems, such
as SeaBIOS, to pass an event log when using a TPM2.

Signed-off-by: Stefan Berger 

Do you think that QEMU with TPM 1.2 emulator turned on would be a viable
way to test this?

Yes.

Is the emulator bundled with QEMU or does it have to be installed
separately?

It has to be installed separately. On Fedora 31 it would just be a `sudo dnf
-y install swtpm-tools` and you should be good to go with libvirt /
virt-manager.

Is there some packaging for Debian/Ubuntu available?



So far may not be available yet. I had *experimented* with a PPA once: 
https://launchpad.net/~stefanberger/+archive/ubuntu/swtpm-focal





/Jarkko





Re: [Tech-board-discuss] [PATCH] CodingStyle: Inclusive Terminology

2020-07-06 Thread Dan Williams
On Mon, Jul 6, 2020 at 12:16 PM Mark Brown  wrote:
>
> On Sat, Jul 04, 2020 at 01:02:51PM -0700, Dan Williams wrote:
>
> > +'blacklist'. Recommended replacements for 'slave' are: 'secondary',
> > +'subordinate', 'replica', 'responder', 'follower', 'proxy', or
>
> I'd second the suggestion of device as an option here.

Sure, will do. I'm assuming you're thinking of cases where 'slave' is
used in isolation without a paired relative term? If not, please
clarify.

>
> > +Of course it is around this point someone jumps in with an etymological
> > +argument about why people should not be offended. Etymological arguments
> > +do not scale. The scope and pace of Linux to reach new developers
> > +exceeds the ability of historical terminology defenders to describe "no,
>
> More generally etymological arguments are just not super relevant here
> anyway, the issues people have are around current perceptions rather
> than where things came from.
>
> > +not that connotation". The revelation of 2020 was that black voices were
> > +heard on a global scale and the Linux kernel project has done its small
> > +part to answer that call as it wants black voices, among all voices, in
> > +its developer community.
>
> This, especially the bit about "revelation of 2020", sounds a little
> off to me - I think it's that it's worryingly close to the frequently
> derided pattern where people recognise a problem that other people have
> been talking about for a while and treat it as something new.  Perhaps a
> more neutrally worded reference to current events and/or our desire to
> improve instead?

I'd just as soon let this commentary live in the archives if people
need some more background. It's not like we have companion essays on
the other recommendations in coding-style, and we seem to be
converging on just amending coding-style.


RE: [EXT] [PATCH 4/5] net: fec: get rid of redundant code in fec_ptp_set()

2020-07-06 Thread Andy Duan
From: Sergey Organov  Sent: Monday, July 6, 2020 10:26 PM
> Code of the form "if(x) x = 0" replaced with "x = 0".
> 
> Code of the form "if(x == a) x = a" removed.
> 
> Signed-off-by: Sergey Organov 
> ---
>  drivers/net/ethernet/freescale/fec_ptp.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_ptp.c
> b/drivers/net/ethernet/freescale/fec_ptp.c
> index e455343..4152cae 100644
> --- a/drivers/net/ethernet/freescale/fec_ptp.c
> +++ b/drivers/net/ethernet/freescale/fec_ptp.c
> @@ -485,9 +485,7 @@ int fec_ptp_set(struct net_device *ndev, struct ifreq
> *ifr)
> 
> switch (config.rx_filter) {
> case HWTSTAMP_FILTER_NONE:
> -   if (fep->hwts_rx_en)
> -   fep->hwts_rx_en = 0;
> -   config.rx_filter = HWTSTAMP_FILTER_NONE;
The line should keep according your commit log.

> +   fep->hwts_rx_en = 0;
> break;
> 
> default:
> --
> 2.10.0.1.g57b01a3



Re: [PATCH v35 23/24] docs: x86/sgx: Document SGX micro architecture and kernel internals

2020-07-06 Thread Matthew Wilcox
On Tue, Jul 07, 2020 at 06:37:46AM +0300, Jarkko Sakkinen wrote:
> +*Software Guard eXtensions (SGX)* is a set of instructions that enable ring-3

I can never remember which way up intel numbers their rings.
Is that user mode or kernel mode?

> +applications to set aside private regions of code and data. These regions are
> +called enclaves. An enclave can be entered to a fixed set of entry points. 
> Only

entered through?

> +a CPU running inside the enclave can access its code and data.
> +
> +The support can be determined by
> +
> + ``grep sgx /proc/cpuinfo``
> +
> +Enclave Page Cache
> +==
> +
> +SGX utilizes an *Enclave Page Cache (EPC)* to store pages that are associated
> +with an enclave. It is contained in a BIOS reserved region of physical 
> memory.
> +Unlike pages used for regular memory, pages can only be accessed outside the
> +enclave for different purposes with the instructions **ENCLS**, **ENCLV** and
> +**ENCLU**.
> +
> +Direct memory accesses to an enclave can be only done by a CPU executing 
> inside
> +the enclave. An enclave can be entered with **ENCLU[EENTER]** to a fixed set 
> of
> +entry points. However, a CPU executing inside the enclave can do outside 
> memory
> +accesses.

This is rather tortured.  I think what you're trying to say here is:

Only a CPU executing inside an enclave can access memory belonging to the
enclave.  The CPU may access memory outside the enclave as long as it does
not attempt to access memory which is inside a different enclave.

> +Enclave Page Cache Map
> +--
> +
> +The processor tracks EPC pages via the *Enclave Page Cache Map (EPCM)*.  EPCM
> +contains an entry for each EPC page, which describes the owning enclave, 
> access
> +rights and page type among the other things.
> +
> +The permissions from EPCM is consulted if and only if walking the kernel page
> +tables succeeds. The total permissions are thus a conjunction between page 
> table
> +and EPCM permissions.
> +
> +For all intents and purposes the SGX architecture allows the processor to
> +invalidate all EPCM entries at will, i.e. requires that software be prepared 
> to
> +handle an EPCM fault at any time. The contents of EPC are encrypted with an
> +ephemeral key, which is lost on power transitions.

The SGX architecture allows the processor to invalidate any EPCM entry
at any time.  Sotware must be prepared to handle the resulting EPCM fault.
The contents of EPC are encrypted with an ephemeral key, which is lost on
power transitions.

(can you be a bit more specific about power transitions?  do you mean
suspend/resume?  hibernate/wake?  poweroff/poweron?  what about reset?
surely you don't mean S1?  or do you?)



Re: [PATCH v34 12/24] x86/sgx: Add SGX_IOC_ENCLAVE_CREATE

2020-07-06 Thread Jarkko Sakkinen
On Mon, Jul 06, 2020 at 08:29:15PM -0700, Sean Christopherson wrote:
> On Tue, Jul 07, 2020 at 06:01:52AM +0300, Jarkko Sakkinen wrote:
> > +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> > +{
> > +   struct sgx_encl *encl = filep->private_data;
> > +   int ret, encl_flags;
> > +
> > +   encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, >flags);
> > +   if (encl_flags & SGX_ENCL_IOCTL)
> > +   return -EBUSY;
> 
> As called out in my belated feedback, SGX_ENCL_DEAD needs to be checked
> here to prevent invoking ENCLS operations on a dead enclave.  If you're
> splitting hairs, the check could technically be deferred until the next
> patch, "Add SGX_IOC_ENCLAVE_ADD_PAGES", which is the first usage of
> sgx_encl_destroy() from an ioctl(), but that seems a bit gratuitous.

This is already fixed in v35.

/Jarkko


RE: [EXT] [PATCH 2/5] net: fec: enable to use PPS feature without time stamping

2020-07-06 Thread Andy Duan
From: Sergey Organov  Sent: Monday, July 6, 2020 10:26 PM
> PPS feature could be useful even when hardware time stamping of network
> packets is not in use, so remove offending check for this condition from
> fec_ptp_enable_pps().

If hardware time stamping of network packets is not in use, PPS is based on 
local
clock, what is the use case ?

> 
> Signed-off-by: Sergey Organov 
> ---
>  drivers/net/ethernet/freescale/fec_ptp.c | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_ptp.c
> b/drivers/net/ethernet/freescale/fec_ptp.c
> index f8a592c..4a12086 100644
> --- a/drivers/net/ethernet/freescale/fec_ptp.c
> +++ b/drivers/net/ethernet/freescale/fec_ptp.c
> @@ -103,11 +103,6 @@ static int fec_ptp_enable_pps(struct
> fec_enet_private *fep, uint enable)
> u64 ns;
> val = 0;
> 
> -   if (!(fep->hwts_tx_en || fep->hwts_rx_en)) {
> -   dev_err(>pdev->dev, "No ptp stack is running\n");
> -   return -EINVAL;
> -   }
> -
> if (fep->pps_enable == enable)
> return 0;
> 
> --
> 2.10.0.1.g57b01a3



Re: [Tech-board-discuss] [PATCH] CodingStyle: Inclusive Terminology

2020-07-06 Thread Dan Williams
On Mon, Jul 6, 2020 at 11:30 AM Shuah Khan  wrote:
>
> On 7/4/20 2:02 PM, Dan Williams wrote:
> > Recent events have prompted a Linux position statement on inclusive
> > terminology. Given that Linux maintains a coding-style and its own
> > idiomatic set of terminology here is a proposal to answer the call to
> > replace non-inclusive terminology.
> >
>
> Hi Dan,
>
> Thanks for taking the time to work on this patch and updating the
> coding-style.rst with the with inclusive terminology guidelines and
> adding a new document outlining the scope.
>
> The suggestions you made will help us adapt inclusive terminology
> for the current times, and also help us move toward terms that are
> intuitive and easier to understand keeping our global developer
> community in mind.
>
> Allowlist/denylist terms are intuitive and action based which have a
> globally uniform meaning.
>
> Terms such as "whitelist" etc are contextual, hence assume contextual
> knowledge on the part of the reader.
>
> A couple comments below:
>
> > Cc: Jonathan Corbet 
> > Cc: Kees Cook 
> > Signed-off-by: Chris Mason 
> > Signed-off-by: Greg Kroah-Hartman 
> > Signed-off-by: Dan Williams 
> > ---
> >   Documentation/process/coding-style.rst  |   12 
> >   Documentation/process/inclusive-terminology.rst |   64 
> > +++
> >   Documentation/process/index.rst |1
> >   3 files changed, 77 insertions(+)
> >   create mode 100644 Documentation/process/inclusive-terminology.rst
> >
> > diff --git a/Documentation/process/coding-style.rst 
> > b/Documentation/process/coding-style.rst
> > index 2657a55c6f12..4b15ab671089 100644
> > --- a/Documentation/process/coding-style.rst
> > +++ b/Documentation/process/coding-style.rst
> > @@ -319,6 +319,18 @@ If you are afraid to mix up your local variable names, 
> > you have another
> >   problem, which is called the function-growth-hormone-imbalance syndrome.
> >   See chapter 6 (Functions).
> >
> > +For symbol names, avoid introducing new usage of the words 'slave' and
> > +'blacklist'. Recommended replacements for 'slave' are: 'secondary',
> > +'subordinate', 'replica', 'responder', 'follower', 'proxy', or
> > +'performer'.  Recommended replacements for blacklist are: 'blocklist' or
> > +'denylist'.
>
> allowlist and blocklist or denylist are lot more intuitive than
> white/black in any case.

Yes, that was interesting to me when I first grappled with this. The
replacements are more direct.

I was going to go with blocklist/passlist as the common shorthand
recommendation, but if a subsystem picks allowlist/denylist as a local
custom that's fine too.

[..]
> Please add my Signed-off-by: Shuah Khan 
> or Acked-by: Shuah Khan 

Thanks Shuah.


[PATCH v3] mm/hugetlb: avoid hardcoding while checking if cma is enable

2020-07-06 Thread Barry Song
hugetlb_cma[0] can be NULL due to various reasons, for example, node0 has
no memory. so NULL hugetlb_cma[0] doesn't necessarily mean cma is not
enabled. gigantic pages might have been reserved on other nodes.

Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using 
cma")
Cc: Mike Kravetz 
Cc: Jonathan Cameron 
Acked-by: Roman Gushchin 
Signed-off-by: Barry Song 
---
 -v3: add acked-by; make code more canonical 

 mm/hugetlb.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 57ece74e3aae..d293c823121e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2546,6 +2546,20 @@ static void __init gather_bootmem_prealloc(void)
}
 }
 
+bool __init hugetlb_cma_enabled(void)
+{
+#ifdef CONFIG_CMA
+   int node;
+
+   for_each_online_node(node) {
+   if (hugetlb_cma[node])
+   return true;
+   }
+#endif
+
+   return false;
+}
+
 static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
unsigned long i;
@@ -2571,7 +2585,7 @@ static void __init hugetlb_hstate_alloc_pages(struct 
hstate *h)
 
for (i = 0; i < h->max_huge_pages; ++i) {
if (hstate_is_gigantic(h)) {
-   if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) {
+   if (hugetlb_cma_enabled()) {
pr_warn_once("HugeTLB: hugetlb_cma is enabled, 
skip boot time allocation\n");
break;
}
-- 
2.27.0




Re: [PATCH v34 10/24] mm: Add vm_ops->mprotect()

2020-07-06 Thread Jarkko Sakkinen
On Tue, Jul 07, 2020 at 04:14:24AM +0100, Matthew Wilcox wrote:
> On Tue, Jul 07, 2020 at 06:01:50AM +0300, Jarkko Sakkinen wrote:
> > +++ b/mm/mprotect.c
> > @@ -603,13 +603,20 @@ static int do_mprotect_pkey(unsigned long start, 
> > size_t len,
> > goto out;
> > }
> >  
> > +   tmp = vma->vm_end;
> > +   if (tmp > end)
> > +   tmp = end;
> > +
> > error = security_file_mprotect(vma, reqprot, prot);
> > if (error)
> > goto out;
> >  
> > -   tmp = vma->vm_end;
> > -   if (tmp > end)
> > -   tmp = end;
> 
> You don't need to move this any more, right?

My bad.

/Jarkko


Re: [PATCH v9 2/2] tpm: Add support for event log pointer found in TPM2 ACPI table

2020-07-06 Thread Jarkko Sakkinen
On Mon, Jul 06, 2020 at 11:08:12PM -0400, Stefan Berger wrote:
> On 7/6/20 10:24 PM, Jarkko Sakkinen wrote:
> > On Mon, Jul 06, 2020 at 07:55:26PM -0400, Stefan Berger wrote:
> > > On 7/6/20 7:09 PM, Jarkko Sakkinen wrote:
> > > > On Mon, Jul 06, 2020 at 02:19:53PM -0400, Stefan Berger wrote:
> > > > > From: Stefan Berger 
> > > > > 
> > > > > In case a TPM2 is attached, search for a TPM2 ACPI table when trying
> > > > > to get the event log from ACPI. If one is found, use it to get the
> > > > > start and length of the log area. This allows non-UEFI systems, such
> > > > > as SeaBIOS, to pass an event log when using a TPM2.
> > > > > 
> > > > > Signed-off-by: Stefan Berger 
> > > > Do you think that QEMU with TPM 1.2 emulator turned on would be a viable
> > > > way to test this?
> > > 
> > > Yes.
> > Is the emulator bundled with QEMU or does it have to be installed
> > separately?
> 
> It has to be installed separately. On Fedora 31 it would just be a `sudo dnf
> -y install swtpm-tools` and you should be good to go with libvirt /
> virt-manager.

Is there some packaging for Debian/Ubuntu available?

/Jarkko


Re: [GIT PULL][PATCH v6 0/8] Add support for ZSTD-compressed kernel and initramfs

2020-07-06 Thread Sedat Dilek
On Tue, Jul 7, 2020 at 5:50 AM Nick Terrell  wrote:
>
> From: Nick Terrell 
>
> Please pull from
>
>   g...@github.com:terrelln/linux.git tags/v6-zstd
>
> to get these changes. Alternatively the patchset is included.
>

Hi Nick,

cool, I just pulled from their.
Yesterday, I switched over from Linux v5.7.y to v5.8-rc4.
What a good timing :-).

I will report later.

Regards,
- Sedat -

> Hi all,
>
> This patch set adds support for a ZSTD-compressed kernel, ramdisk, and
> initramfs in the kernel boot process. ZSTD-compressed ramdisk and initramfs
> are supported on all architectures. The ZSTD-compressed kernel is only
> hooked up to x86 in this patch set.
>
> Zstandard requires slightly more memory during the kernel decompression
> on x86 (192 KB vs 64 KB), and the memory usage is independent of the
> window size.
>
> Zstandard requires memory proprortional to the window size used during
> compression for decompressing the ramdisk image, since streaming mode is
> used. Newer versions of zstd (1.3.2+) list the window size of a file
> with `zstd -lv '. The absolute maximum amount of memory required
> is just over 8 MB, but it can be controlled at compression time.
>
> This patch set has been boot tested with buildroot and QEMU based off
> of linux-5.8-rc4.
>
> On i386 and x86_64 I have tested the following configurations:
> * zstd compressed kernel and a separate zstd compressed initramfs
> * zstd compressed kernel and a built-in zstd compressed initramfs
> * gzip compressed kernel and a separate gzip compressed initramfs
> * gzip compressed kernel and a built-in gzip compressed initramfs
>
> On arm and aarch64 I tested the same configurations, except that the kernel is
> always gzip compressed.
>
> Facebook has been using v1 of these patches on x86_64 devices for more than 6
> months. When we switched from a xz compressed initramfs to a zstd compressed
> initramfs decompression time shrunk from 12 seconds to 3 seconds. When we
> switched from a xz compressed kernel to a zstd compressed kernel we saved 2
> seconds of boot time.
>
> Facebook has been using v2 of these patches on aarch64 devices for a few 
> weeks.
> When we switched from an lzma compressed initramfs to a zstd compressed 
> initramfs
> decompression time shrunk from 27 seconds to 8 seconds.
>
> The zstd compressed kernel is smaller than the gzip compressed kernel but 
> larger
> than the xz or lzma compressed kernels, and it decompresses faster than
> everything except lz4. See the table below for the measurement of an x86_64
> kernel ordered by compressed size:
>
> algosize
> xz   6,509,792
> lzma 6,856,576
> zstd 7,399,157
> gzip 8,522,527
> bzip 8,629,603
> lzo  9,808,035
> lz4 10,705,570
> none32,565,672
>
> Alex Xu ran benchmarks in https://lkml.org/lkml/2020/7/1/722.
>
> v1 -> v2:
> - Rebase
>   - usr/Makefile and init/Kconfig were changed so the patches were updated
> - No functional changes except to rebase
> - Split the patches up into smaller chunks
>
> v2 -> v3:
> - Add *.zst to the .gitignore in patch 8
> - Style nits in patch 3
> - Rename the PREBOOT macro to ZSTD_PREBOOT and XXH_PREBOOT in patches
>   1 through 3
>
> v3 -> v4:
> - Increase the ZSTD_IOBUF_SIZE from 4KB to 128KB to improve performance.
>   With this change I switch from malloc() to large_malloc() for the
>   buffers.
> - Increase the maximum allowed window size from 8 MB to 128 MB, which is
>   the max that zstd in the kernel supports.
>
> v4 -> v5:
> - Update commit message for patch 6 in response to comments
> - Rebase onto next-20200408
>
> v5 -> v6:
> - Rebase onto v5.8-rc4
>
> Best,
> Nick Terrell
>
> Adam Borowski (1):
>   .gitignore: add ZSTD-compressed files
>
> Nick Terrell (7):
>   lib: prepare zstd for preboot environment
>   lib: prepare xxhash for preboot environment
>   lib: add zstd support to decompress
>   init: add support for zstd compressed kernel
>   usr: add support for zstd compressed initramfs
>   x86: bump ZO_z_extra_bytes margin for zstd
>   x86: Add support for ZSTD compressed kernel
>
>  .gitignore|   1 +
>  Documentation/x86/boot.rst|   6 +-
>  arch/x86/Kconfig  |   1 +
>  arch/x86/boot/compressed/Makefile |   5 +-
>  arch/x86/boot/compressed/misc.c   |   4 +
>  arch/x86/boot/header.S|   8 +-
>  arch/x86/include/asm/boot.h   |   6 +-
>  include/linux/decompress/unzstd.h |  11 +
>  init/Kconfig  |  15 +-
>  lib/Kconfig   |   4 +
>  lib/Makefile  |   1 +
>  lib/decompress.c  |   5 +
>  lib/decompress_unzstd.c   | 342 ++
>  lib/xxhash.c  |  21 +-
>  lib/zstd/decompress.c |   2 +
>  lib/zstd/fse_decompress.c |   9 +-
>  lib/zstd/zstd_internal.h  |  14 +-
>  scripts/Makefile.lib  |  15 ++
>  usr/Kconfig   |  20 ++
>  usr/Makefile  |   1 

Re: [PATCH v34 10/24] mm: Add vm_ops->mprotect()

2020-07-06 Thread Jarkko Sakkinen
On Tue, Jul 07, 2020 at 04:24:08AM +0100, Matthew Wilcox wrote:
> On Mon, Jul 06, 2020 at 08:22:54PM -0700, Sean Christopherson wrote:
> > On Tue, Jul 07, 2020 at 04:14:24AM +0100, Matthew Wilcox wrote:
> > > > +   if (vma->vm_ops && vma->vm_ops->mprotect) {
> > > > +   error = vma->vm_ops->mprotect(vma, nstart, tmp, 
> > > > prot);
> > > > +   if (error)
> > > > +   goto out;
> > > > +   }
> > 
> > Based on "... and then the vma owner can do whatever it needs to before
> > calling mprotect_fixup(), which is already not static", my interpretation
> > is that Matthew's intent was to do:
> > 
> > if (vma->vm_ops && vma->vm_ops->mprotect)
> > error =  = vma->vm_ops->mprotect(vma, nstart, tmp, 
> > prot);
> > else
> > error = mprotect_fixup(vma, , nstart, tmp, 
> > newflags);
> > if (error)
> > goto out;
> > 
> > i.e. make .mprotect() a full replacement as opposed to a prereq hook.
> 
> Yes, it was.  I was just looking at the next patch to be sure this was
> how I'd been misunderstood.

I'm don't get this part. If mprotect_fixup is called in the tail of the
callback, why it has to be called inside the callback and not be called
after the callback?

The reason I only part did what you requested was to do only the part of
the change that I get. Not to oppose it.

/Jarkko


Re: [Ksummit-discuss] [PATCH] CodingStyle: Inclusive Terminology

2020-07-06 Thread Dan Williams
On Mon, Jul 6, 2020 at 9:30 AM Andy Lutomirski  wrote:
>
> On Sat, Jul 4, 2020 at 1:19 PM Dan Williams  wrote:
> >
> > Recent events have prompted a Linux position statement on inclusive
> > terminology. Given that Linux maintains a coding-style and its own
> > idiomatic set of terminology here is a proposal to answer the call to
> > replace non-inclusive terminology.
> >
> > Cc: Jonathan Corbet 
> > Cc: Kees Cook 
> > Signed-off-by: Chris Mason 
> > Signed-off-by: Greg Kroah-Hartman 
> > Signed-off-by: Dan Williams 
> > ---
> >  Documentation/process/coding-style.rst  |   12 
> >  Documentation/process/inclusive-terminology.rst |   64 
> > +++
> >  Documentation/process/index.rst |1
> >  3 files changed, 77 insertions(+)
> >  create mode 100644 Documentation/process/inclusive-terminology.rst
> >
> > diff --git a/Documentation/process/coding-style.rst 
> > b/Documentation/process/coding-style.rst
> > index 2657a55c6f12..4b15ab671089 100644
> > --- a/Documentation/process/coding-style.rst
> > +++ b/Documentation/process/coding-style.rst
> > @@ -319,6 +319,18 @@ If you are afraid to mix up your local variable names, 
> > you have another
> >  problem, which is called the function-growth-hormone-imbalance syndrome.
> >  See chapter 6 (Functions).
> >
> > +For symbol names, avoid introducing new usage of the words 'slave' and
> > +'blacklist'
>
> Can you put whitelist in the list, too?

Yes, will do. I had left it out mistakenly thinking it would help
focus the discussion, but the replacements don't make sense without
including the replacements for whitelist.

>
> >. Recommended replacements for 'slave' are: 'secondary',
> > +'subordinate', 'replica', 'responder', 'follower', 'proxy', or
> > +'performer'.
>
> Should 'target' be in this list?

Yes.

> Should there be some mention of "master" to go along with "slave"?
> This could be complicated -- as has been noted in this thread, the
> word "master" has quite a few meanings, several of which are not
> related to slavery or to any form of control, and that the meanings
> associated with "master" and its cognates in other languages vary.

Yes, I'll at least expand this with the paired terminology for each of
the replacements of 'slave'.

>
> >  Recommended replacements for blacklist are: 'blocklist' or
> > +'denylist'.
>
> As someone who has written seccomp code and described the result as a
> "whitelist" or "blacklist" in the past, I have a couple of comments.
>
> First, shouldn't whitelist be in the list?  I find it surprising to
> put 'blacklist' in the blocklist but to omit whitelist.
>
> Second, I realize that I grew up thinking that 'whitelist' and
> 'blacklist' are the common terms for lists of things to be accepted
> and rejected and that this biases my perception of what sounds good,
> but writing a seccomp "denylist" or "blocklist" doesn't seem to roll
> off the tongue.  Perhaps this language would be better:
>
> Is most contexts where 'whitelist' or 'blacklist' might be used, a
> descriptive phrase could be used instead.  For example, a seccomp
> filter could have a 'list of allowed syscalls' or a 'list of
> disallowed syscalls', and just lists could be the 'allowed' or
> 'accepted' lists and the 'disallowed', 'rejected', or 'blocked' lists.
> If a single word replacement for 'whitelist' or 'blacklist' is needed,
> 'allowlist', 'blocklist', or 'denylist' could be used.

That makes practical sense to me. Now that I look at this I think the
recommendation for the shorthand replacement should only be one style
option, lets say "blocklist/passlist" because it's not as amenable to
context sensitive replacements as "slave" and benefits from a standard
single shorthand.

>
>
> > @@ -0,0 +1,64 @@
> > +.. _inclusiveterminology:
> > +
> > +Linux kernel inclusive terminology
> > +==
> > +
> > +The Linux kernel is a global software project, and in 2020 there was a
> > +global reckoning on race relations that caused many organizations to
> > +re-evaluate their policies and practices relative to the inclusion of
> > +people of African descent. This document describes why the 'Naming'
> > +section in :ref:`process/coding-style.rst ` recommends
> > +avoiding usage of 'slave' and 'blacklist' in new additions to the Linux
> > +kernel.
> > +
> > +On the triviality of replacing words
> > +
> > +
> > +The African slave trade was a brutal system of human misery deployed at
> > +global scale. Some word choice decisions in a modern software project
> > +does next to nothing to compensate for that legacy. So why put any
> > +effort into something so trivial in comparison? Because the goal is not
> > +to repair, or erase the past. The goal is to maximize availability and
> > +efficiency of the global developer community to participate in the Linux
> > +kernel development process.
>
> Should this type of historical note be in the document or in the changelog?

[PATCH] security/selinux: Fix spelling mistakes in the comments

2020-07-06 Thread lihao
Fix spelling mistakes in the comments
quering==>querying

Signed-off-by: lihao 
---
 security/selinux/netif.c   | 2 +-
 security/selinux/netnode.c | 2 +-
 security/selinux/netport.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/selinux/netif.c b/security/selinux/netif.c
index 15b8c1b..86813b4 100644
--- a/security/selinux/netif.c
+++ b/security/selinux/netif.c
@@ -124,7 +124,7 @@ static void sel_netif_destroy(struct sel_netif *netif)
  * @sid: interface SID
  *
  * Description:
- * This function determines the SID of a network interface by quering the
+ * This function determines the SID of a network interface by querying the
  * security policy.  The result is added to the network interface table to
  * speedup future queries.  Returns zero on success, negative values on
  * failure.
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index dff587d..461fb54 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -181,7 +181,7 @@ static void sel_netnode_insert(struct sel_netnode *node)
  * @sid: node SID
  *
  * Description:
- * This function determines the SID of a network address by quering the
+ * This function determines the SID of a network address by querying the
  * security policy.  The result is added to the network address table to
  * speedup future queries.  Returns zero on success, negative values on
  * failure.
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
index de727f7..d340f4d 100644
--- a/security/selinux/netport.c
+++ b/security/selinux/netport.c
@@ -130,7 +130,7 @@ static void sel_netport_insert(struct sel_netport *port)
  * @sid: port SID
  *
  * Description:
- * This function determines the SID of a network port by quering the security
+ * This function determines the SID of a network port by querying the security
  * policy.  The result is added to the network port table to speedup future
  * queries.  Returns zero on success, negative values on failure.
  *
-- 
1.8.5.6



Re: [mm] 4e2c82a409: ltp.overcommit_memory01.fail

2020-07-06 Thread Huang, Ying
Feng Tang  writes:

> On Mon, Jul 06, 2020 at 06:34:34AM -0700, Andi Kleen wrote:
>> >ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
>> > -  if (ret == 0 && write)
>> > +  if (ret == 0 && write) {
>> > +  if (sysctl_overcommit_memory == OVERCOMMIT_NEVER)
>> > +  schedule_on_each_cpu(sync_overcommit_as);
>> 
>> The schedule_on_each_cpu is not atomic, so the problem could still happen
>> in that window.
>> 
>> I think it may be ok if it eventually resolves, but certainly needs
>> a comment explaining it. Can you do some stress testing toggling the
>> policy all the time on different CPUs and running the test on
>> other CPUs and see if the test fails?
>
> For the raw test case reported by 0day, this patch passed in 200 times
> run. And I will read the ltp code and try stress testing it as you
> suggested.
>
>
>> The other alternative would be to define some intermediate state
>> for the sysctl variable and only switch to never once the 
>> schedule_on_each_cpu
>> returned. But that's more complexity.
>
> One thought I had is to put this schedule_on_each_cpu() before
> the proc_dointvec_minmax() to do the sync before sysctl_overcommit_memory
> is really changed. But the window still exists, as the batch is
> still the larger one. 

Can we change the batch firstly, then sync the global counter, finally
change the overcommit policy?

Best Regards,
Huang, Ying


[PATCH v35 22/24] selftests/x86: Add a selftest for SGX

2020-07-06 Thread Jarkko Sakkinen
Add a selftest for SGX. It is a trivial test where a simple enclave
copies one 64-bit word of memory between two memory locations.

Cc: linux-kselft...@vger.kernel.org
Signed-off-by: Jarkko Sakkinen 
---
 tools/testing/selftests/Makefile  |   1 +
 tools/testing/selftests/sgx/.gitignore|   2 +
 tools/testing/selftests/sgx/Makefile  |  53 +++
 tools/testing/selftests/sgx/call.S|  54 +++
 tools/testing/selftests/sgx/defines.h |  21 +
 tools/testing/selftests/sgx/load.c| 282 +
 tools/testing/selftests/sgx/main.c| 199 +
 tools/testing/selftests/sgx/main.h|  38 ++
 tools/testing/selftests/sgx/sigstruct.c   | 395 ++
 tools/testing/selftests/sgx/test_encl.c   |  20 +
 tools/testing/selftests/sgx/test_encl.lds |  40 ++
 .../selftests/sgx/test_encl_bootstrap.S   |  89 
 12 files changed, 1194 insertions(+)
 create mode 100644 tools/testing/selftests/sgx/.gitignore
 create mode 100644 tools/testing/selftests/sgx/Makefile
 create mode 100644 tools/testing/selftests/sgx/call.S
 create mode 100644 tools/testing/selftests/sgx/defines.h
 create mode 100644 tools/testing/selftests/sgx/load.c
 create mode 100644 tools/testing/selftests/sgx/main.c
 create mode 100644 tools/testing/selftests/sgx/main.h
 create mode 100644 tools/testing/selftests/sgx/sigstruct.c
 create mode 100644 tools/testing/selftests/sgx/test_encl.c
 create mode 100644 tools/testing/selftests/sgx/test_encl.lds
 create mode 100644 tools/testing/selftests/sgx/test_encl_bootstrap.S

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 1195bd85af38..ec7be6d5a10d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -64,6 +64,7 @@ TARGETS += user
 TARGETS += vm
 TARGETS += x86
 TARGETS += zram
+TARGETS += sgx
 #Please keep the TARGETS list alphabetically sorted
 # Run "make quicktest=1 run_tests" or
 # "make quicktest=1 kselftest" from top level Makefile
diff --git a/tools/testing/selftests/sgx/.gitignore 
b/tools/testing/selftests/sgx/.gitignore
new file mode 100644
index ..fbaf0bda9a92
--- /dev/null
+++ b/tools/testing/selftests/sgx/.gitignore
@@ -0,0 +1,2 @@
+test_sgx
+test_encl.elf
diff --git a/tools/testing/selftests/sgx/Makefile 
b/tools/testing/selftests/sgx/Makefile
new file mode 100644
index ..95e5c4df8014
--- /dev/null
+++ b/tools/testing/selftests/sgx/Makefile
@@ -0,0 +1,53 @@
+top_srcdir = ../../../..
+
+include ../lib.mk
+
+.PHONY: all clean
+
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+   ../x86/trivial_64bit_program.c)
+
+ifndef OBJCOPY
+OBJCOPY := $(CROSS_COMPILE)objcopy
+endif
+
+INCLUDES := -I$(top_srcdir)/tools/include
+HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack
+ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \
+  -fno-stack-protector -mrdrnd $(INCLUDES)
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx $(OUTPUT)/test_encl.elf
+
+ifeq ($(CAN_BUILD_X86_64), 1)
+all: $(TEST_CUSTOM_PROGS)
+endif
+
+$(OUTPUT)/test_sgx: $(OUTPUT)/main.o \
+   $(OUTPUT)/load.o \
+   $(OUTPUT)/sigstruct.o \
+   $(OUTPUT)/call.o
+   $(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto
+
+$(OUTPUT)/main.o: main.c
+   $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/load.o: load.c
+   $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/sigstruct.o: sigstruct.c
+   $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/call.o: call.S
+   $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S
+   $(CC) $(ENCL_CFLAGS) -T $^ -o $@
+
+EXTRA_CLEAN := \
+   $(OUTPUT)/test_encl.elf \
+   $(OUTPUT)/load.o \
+   $(OUTPUT)/call.o \
+   $(OUTPUT)/main.o \
+   $(OUTPUT)/sigstruct.o \
+   $(OUTPUT)/test_sgx \
+   $(OUTPUT)/test_sgx.o \
diff --git a/tools/testing/selftests/sgx/call.S 
b/tools/testing/selftests/sgx/call.S
new file mode 100644
index ..77131e83db42
--- /dev/null
+++ b/tools/testing/selftests/sgx/call.S
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/**
+* Copyright(c) 2016-18 Intel Corporation.
+*/
+
+   .text
+
+   .macro ENCLU
+   .byte 0x0f, 0x01, 0xd7
+   .endm
+
+   .text
+
+   .global sgx_call_vdso
+sgx_call_vdso:
+   .cfi_startproc
+   push%r15
+   .cfi_adjust_cfa_offset  8
+   .cfi_rel_offset %r15, 0
+   push%r14
+   .cfi_adjust_cfa_offset  8
+   .cfi_rel_offset %r14, 0
+   push%r13
+   .cfi_adjust_cfa_offset  8
+   .cfi_rel_offset %r13, 0
+   push%r12
+   .cfi_adjust_cfa_offset  8
+   .cfi_rel_offset %r12, 0
+   push%rbx
+   .cfi_adjust_cfa_offset  8
+   .cfi_rel_offset %rbx, 0
+   push$0
+   .cfi_adjust_cfa_offset  8
+   push   

Re: [PATCH v2 1/2] hwmon: shtc1: add support for device tree bindings

2020-07-06 Thread Guenter Roeck
On Sun, Jul 05, 2020 at 11:47:25AM +0800, Chris Ruehl wrote:
> Add support for DTS bindings for the sensirion shtc1,shtw1 and shtc3.
> 
> Signed-off-by: Chris Ruehl 
> ---
>  drivers/hwmon/shtc1.c | 22 --
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
> index a0078ccede03..61e9275eb666 100644
> --- a/drivers/hwmon/shtc1.c
> +++ b/drivers/hwmon/shtc1.c
> @@ -14,6 +14,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* commands (high precision mode) */
>  static const unsigned char shtc1_cmd_measure_blocking_hpm[]= { 0x7C, 
> 0xA2 };
> @@ -196,6 +197,7 @@ static int shtc1_probe(struct i2c_client *client,
>   enum shtcx_chips chip = id->driver_data;
>   struct i2c_adapter *adap = client->adapter;
>   struct device *dev = >dev;
> + struct device_node *np = dev->of_node;
>  
>   if (!i2c_check_functionality(adap, I2C_FUNC_I2C)) {
>   dev_err(dev, "plain i2c transactions not supported\n");
> @@ -233,8 +235,13 @@ static int shtc1_probe(struct i2c_client *client,
>   data->client = client;
>   data->chip = chip;
>  
> - if (client->dev.platform_data)
> + if (np) {
> + data->setup.blocking_io = of_property_read_bool(np, 
> "sensirion,blocking_io");
> + data->setup.high_precision = of_property_read_bool(np, 
> "sensicon,low_precision");

Just noticed. Presumably that would have to be:

data->setup.high_precision = !of_property_read_bool(np, 
"sensicon,low_precision");

> + }
> + else if (client->dev.platform_data)
>   data->setup = *(struct shtc1_platform_data *)dev->platform_data;
> +
>   shtc1_select_command(data);
>   mutex_init(>update_lock);
>  
> @@ -257,8 +264,19 @@ static const struct i2c_device_id shtc1_id[] = {
>  };
>  MODULE_DEVICE_TABLE(i2c, shtc1_id);
>  
> +static const struct of_device_id shtc1_of_match[] = {
> + { .compatible = "sensirion,shtc1" },
> + { .compatible = "sensirion,shtw1" },
> + { .compatible = "sensirion,shtc3" },
> + { }
> +};
> +MODULE_DEVICE_TABLE(of, shtc1_of_match);
> +
>  static struct i2c_driver shtc1_i2c_driver = {
> - .driver.name  = "shtc1",
> + .driver = {
> + .name = "shtc1",
> + .of_match_table = shtc1_of_match,
> + },
>   .probe= shtc1_probe,
>   .id_table = shtc1_id,
>  };


Re: [PATCH RFC 1/5] f2fs: introduce inmem curseg

2020-07-06 Thread Jaegeuk Kim
On 07/07, Chao Yu wrote:
> On 2020/7/7 11:21, Jaegeuk Kim wrote:
> > Hi Chao,
> > 
> > Do you have any brief design doc to present the idea?
> 
> Hi Jaegeuk,
> 
> You mean this whole patchset, right?
> 
> I can add a brief design description in patch 0/5.

Yeah, it's a bit hard to understand the whole flow.

Thanks,

> 
> > 
> > Thanks,
> > 
> > On 06/30, Chao Yu wrote:
> >> Previous implementation of aligned pinfile allocation will:
> >> - allocate new segment on cold data log no matter whether last used
> >> segment is partially used or not, it makes IOs more random;
> >> - force concurrent cold data/GCed IO going into warm data area, it
> >> can make a bad effect on hot/cold data separation;
> >>
> >> In this patch, we introduce a new type of log named 'inmem curseg',
> >> the differents from normal curseg is:
> >> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
> >> - it only exists in memory, its segno, blkofs, summary will not b
> >>  persisted into checkpoint area;
> >>
> >> With this new feature, we can enhance scalability of log, special
> >> allocators can be created for purposes:
> >> - pure lfs allocator for aligned pinfile allocation or file
> >> defragmentation
> >> - pure ssr allocator for later feature
> >>
> >> So that, let's update aligned pinfile allocation to use this new
> >> inmem curseg fwk.
> >>
> >> Signed-off-by: Chao Yu 
> >> ---
> >>  fs/f2fs/checkpoint.c |   7 ++-
> >>  fs/f2fs/debug.c  |   6 ++-
> >>  fs/f2fs/f2fs.h   |  12 +++--
> >>  fs/f2fs/file.c   |   3 +-
> >>  fs/f2fs/gc.c |   2 +-
> >>  fs/f2fs/segment.c| 107 ++-
> >>  fs/f2fs/segment.h|  17 ---
> >>  fs/f2fs/super.c  |   9 ++--
> >>  8 files changed, 112 insertions(+), 51 deletions(-)
> >>
> >> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >> index 1bb8278a1c4a..644a914af25a 100644
> >> --- a/fs/f2fs/checkpoint.c
> >> +++ b/fs/f2fs/checkpoint.c
> >> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info 
> >> *sbi, struct cp_control *cpc)
> >>  
> >>f2fs_flush_sit_entries(sbi, cpc);
> >>  
> >> +  /* save inmem log status */
> >> +  f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> >> +
> >>err = do_checkpoint(sbi, cpc);
> >>if (err)
> >>f2fs_release_discard_addrs(sbi);
> >>else
> >>f2fs_clear_prefree_segments(sbi, cpc);
> >> +
> >> +  f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> >>  stop:
> >>unblock_operations(sbi);
> >>stat_inc_cp_count(sbi->stat_info);
> >> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info 
> >> *sbi)
> >>}
> >>  
> >>sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
> >> -  NR_CURSEG_TYPE - __cp_payload(sbi)) *
> >> +  NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
> >>F2FS_ORPHANS_PER_BLOCK;
> >>  }
> >>  
> >> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >> index 4276c0f79beb..41a91aa8c262 100644
> >> --- a/fs/f2fs/debug.c
> >> +++ b/fs/f2fs/debug.c
> >> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info 
> >> *sbi)
> >>* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
> >>/ 2;
> >>si->util_invalid = 50 - si->util_free - si->util_valid;
> >> -  for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
> >> +  for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
> >>struct curseg_info *curseg = CURSEG_I(sbi, i);
> >>si->curseg[i] = curseg->segno;
> >>si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
> >> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
> >>   si->dirty_seg[CURSEG_COLD_NODE],
> >>   si->full_seg[CURSEG_COLD_NODE],
> >>   si->valid_blks[CURSEG_COLD_NODE]);
> >> +  seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
> >> + si->curseg[CURSEG_COLD_DATA_PINNED],
> >> + si->cursec[CURSEG_COLD_DATA_PINNED],
> >> + si->curzone[CURSEG_COLD_DATA_PINNED]);
> >>seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
> >>   si->main_area_segs - si->dirty_count -
> >>   si->prefree_count - si->free_segs,
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 7d6c5f8ce16b..f06c77066284 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data 
> >> *dn, struct inode *inode,
> >>   */
> >>  #define   NR_CURSEG_DATA_TYPE (3)
> >>  #define NR_CURSEG_NODE_TYPE   (3)
> >> -#define NR_CURSEG_TYPE(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> >> +#define NR_CURSEG_INMEM_TYPE  (1)
> >> +#define NR_CURSEG_PERSIST_TYPE(NR_CURSEG_DATA_TYPE + 
> >> NR_CURSEG_NODE_TYPE)
> >> +#define NR_CURSEG_TYPE(NR_CURSEG_INMEM_TYPE + 
> >> 

Re: [PATCH V4 1/3] mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages()

2020-07-06 Thread Anshuman Khandual



On 07/06/2020 02:33 PM, David Hildenbrand wrote:
>>  return 0;
>> @@ -1505,7 +1505,7 @@ int __meminit vmemmap_populate(unsigned long start, 
>> unsigned long end, int node,
>>  int err;
>>  
>>  if (end - start < PAGES_PER_SECTION * sizeof(struct page))
>> -err = vmemmap_populate_basepages(start, end, node);
>> +err = vmemmap_populate_basepages(start, end, node, NULL);
>>  else if (boot_cpu_has(X86_FEATURE_PSE))
>>  err = vmemmap_populate_hugepages(start, end, node, altmap);
>>  else if (altmap) {
> 
> It's somewhat weird that we don't allocate basepages from altmap on x86
> (both for sub-sections and without PSE). I wonder if we can simply
> unlock that with your change. Especially, also handle the
> !X86_FEATURE_PSE case below properly with an altmap.
> 
> a) all hw with PMEM has PSE - except special QEMU setups, so nobody
> cared to implement. For the sub-section special case, nobody cared about
> a handfull of memmap not ending up on the altmap. (but it's still wasted
> system memory IIRC).
> 
> b) the pagetable overhead for small pages is not-neglectable and might
> result in similar issues as solved by the switch to altmap on very huge
> PMEM (with small amount of system RAM).
> 
> I guess it is due to a).

Hmm, I assume these are some decisions that x86 platform will have to
make going forward in a subsequent patch as the third patch does for
the arm64 platform. But it is clearly beyond the scope of this patch
which never intended to change existing behavior on a given platform.

> 
> [...]
> 
>>  
>> -pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int 
>> node)
>> +pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int 
>> node,
>> +   struct vmem_altmap *altmap)
>>  {
>>  pte_t *pte = pte_offset_kernel(pmd, addr);
>>  if (pte_none(*pte)) {
>>  pte_t entry;
>> -void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
>> +void *p;
>> +
>> +if (altmap)
>> +p = altmap_alloc_block_buf(PAGE_SIZE, altmap);
>> +else
>> +p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
>>  if (!p)
>>  return NULL;
> 
> I was wondering if
> 
> if (altmap)
>   p = altmap_alloc_block_buf(PAGE_SIZE, altmap);
> if (!p)
>   p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
> if (!p)
>   return NULL
> 
> Would make sense. But I guess this isn't really relevant in practice,
> because the altmap is usually sized properly.
> 
> In general, LGTM.

Okay, I assume that no further changes are required here.


[PATCH v6 8/8] .gitignore: add ZSTD-compressed files

2020-07-06 Thread Nick Terrell
From: Adam Borowski 

For now, that's arch/x86/boot/compressed/vmlinux.bin.zst but probably more
will come, thus let's be consistent with all other compressors.

Tested-by: Sedat Dilek 
Reviewed-by: Kees Cook 
Signed-off-by: Nick Terrell 
Signed-off-by: Adam Borowski 
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d5f4804ed07c..162bd2b67bdf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@
 *.tab.[ch]
 *.tar
 *.xz
+*.zst
 Module.symvers
 modules.builtin
 modules.order
-- 
2.27.0



[PATCH v6 6/8] x86: bump ZO_z_extra_bytes margin for zstd

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

Bump the ZO_z_extra_bytes margin for zstd.

Zstd needs 3 bytes per 128 KB, and has a 22 byte fixed overhead.
Zstd needs to maintain 128 KB of space at all times, since that is
the maximum block size. See the comments regarding in-place
decompression added in lib/decompress_unzstd.c for details.

The existing code is written so that all the compression algorithms use
the same ZO_z_extra_bytes. It is taken to be the maximum of the growth
rate plus the maximum fixed overhead. The comments just above this diff
state that:

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 arch/x86/boot/header.S | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 735ad7f21ab0..6dbd7e9f74c9 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -539,8 +539,14 @@ pref_address:  .quad LOAD_PHYSICAL_ADDR
# preferred load addr
 # the size-dependent part now grows so fast.
 #
 # extra_bytes = (uncompressed_size >> 8) + 65536
+#
+# ZSTD compressed data grows by at most 3 bytes per 128K, and only has a 22
+# byte fixed overhead but has a maximum block size of 128K, so it needs a
+# larger margin.
+#
+# extra_bytes = (uncompressed_size >> 8) + 131072
 
-#define ZO_z_extra_bytes   ((ZO_z_output_len >> 8) + 65536)
+#define ZO_z_extra_bytes   ((ZO_z_output_len >> 8) + 131072)
 #if ZO_z_output_len > ZO_z_input_len
 # define ZO_z_extract_offset   (ZO_z_output_len + ZO_z_extra_bytes - \
 ZO_z_input_len)
-- 
2.27.0



[PATCH v6 2/8] lib: prepare xxhash for preboot environment

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

Don't export symbols if XXH_PREBOOT is defined.

This change is necessary to get xxhash to work in a preboot environment,
which is needed to support zstd-compressed kernels.

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 lib/xxhash.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/lib/xxhash.c b/lib/xxhash.c
index aa61e2a3802f..b4364e011392 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -80,13 +80,11 @@ void xxh32_copy_state(struct xxh32_state *dst, const struct 
xxh32_state *src)
 {
memcpy(dst, src, sizeof(*dst));
 }
-EXPORT_SYMBOL(xxh32_copy_state);
 
 void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
 {
memcpy(dst, src, sizeof(*dst));
 }
-EXPORT_SYMBOL(xxh64_copy_state);
 
 /*-***
  * Simple Hash Functions
@@ -151,7 +149,6 @@ uint32_t xxh32(const void *input, const size_t len, const 
uint32_t seed)
 
return h32;
 }
-EXPORT_SYMBOL(xxh32);
 
 static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
 {
@@ -234,7 +231,6 @@ uint64_t xxh64(const void *input, const size_t len, const 
uint64_t seed)
 
return h64;
 }
-EXPORT_SYMBOL(xxh64);
 
 /*-**
  * Advanced Hash Functions
@@ -251,7 +247,6 @@ void xxh32_reset(struct xxh32_state *statePtr, const 
uint32_t seed)
state.v4 = seed - PRIME32_1;
memcpy(statePtr, , sizeof(state));
 }
-EXPORT_SYMBOL(xxh32_reset);
 
 void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
 {
@@ -265,7 +260,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const 
uint64_t seed)
state.v4 = seed - PRIME64_1;
memcpy(statePtr, , sizeof(state));
 }
-EXPORT_SYMBOL(xxh64_reset);
 
 int xxh32_update(struct xxh32_state *state, const void *input, const size_t 
len)
 {
@@ -334,7 +328,6 @@ int xxh32_update(struct xxh32_state *state, const void 
*input, const size_t len)
 
return 0;
 }
-EXPORT_SYMBOL(xxh32_update);
 
 uint32_t xxh32_digest(const struct xxh32_state *state)
 {
@@ -372,7 +365,6 @@ uint32_t xxh32_digest(const struct xxh32_state *state)
 
return h32;
 }
-EXPORT_SYMBOL(xxh32_digest);
 
 int xxh64_update(struct xxh64_state *state, const void *input, const size_t 
len)
 {
@@ -439,7 +431,6 @@ int xxh64_update(struct xxh64_state *state, const void 
*input, const size_t len)
 
return 0;
 }
-EXPORT_SYMBOL(xxh64_update);
 
 uint64_t xxh64_digest(const struct xxh64_state *state)
 {
@@ -494,7 +485,19 @@ uint64_t xxh64_digest(const struct xxh64_state *state)
 
return h64;
 }
+
+#ifndef XXH_PREBOOT
+EXPORT_SYMBOL(xxh32_copy_state);
+EXPORT_SYMBOL(xxh64_copy_state);
+EXPORT_SYMBOL(xxh32);
+EXPORT_SYMBOL(xxh64);
+EXPORT_SYMBOL(xxh32_reset);
+EXPORT_SYMBOL(xxh64_reset);
+EXPORT_SYMBOL(xxh32_update);
+EXPORT_SYMBOL(xxh32_digest);
+EXPORT_SYMBOL(xxh64_update);
 EXPORT_SYMBOL(xxh64_digest);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("xxHash");
+#endif
-- 
2.27.0



[PATCH v6 1/8] lib: prepare zstd for preboot environment

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

* Don't export symbols if ZSTD_PREBOOT is defined.
* Remove a double definition of the CHECK_F macro when the zstd
  library is amalgamated.
* Switch ZSTD_copy8() to __builtin_memcpy(), because in the preboot
  environment on x86 gcc can't inline `memcpy()` otherwise.
* Limit the gcc hack in ZSTD_wildcopy() to the broken gcc version. See
  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.

These changes are necessary to get the build to work in the preboot
environment, and to get reasonable performance. ZSTD_copy8() and
ZSTD_wildcopy() are in the core of the zstd hot loop. So outlining
these calls to memcpy(), and having an extra branch are very
detrimental to performance.

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 lib/zstd/decompress.c |  2 ++
 lib/zstd/fse_decompress.c |  9 +
 lib/zstd/zstd_internal.h  | 14 --
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
index 269ee9a796c1..73ded63278cf 100644
--- a/lib/zstd/decompress.c
+++ b/lib/zstd/decompress.c
@@ -2490,6 +2490,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, 
ZSTD_outBuffer *output, ZSTD_inB
}
 }
 
+#ifndef ZSTD_PREBOOT
 EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
 EXPORT_SYMBOL(ZSTD_initDCtx);
 EXPORT_SYMBOL(ZSTD_decompressDCtx);
@@ -2529,3 +2530,4 @@ EXPORT_SYMBOL(ZSTD_insertBlock);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Zstd Decompressor");
+#endif
diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
index a84300e5a013..0b353530fb3f 100644
--- a/lib/zstd/fse_decompress.c
+++ b/lib/zstd/fse_decompress.c
@@ -47,6 +47,7 @@
 /
 #include "bitstream.h"
 #include "fse.h"
+#include "zstd_internal.h"
 #include 
 #include 
 #include  /* memcpy, memset */
@@ -60,14 +61,6 @@
enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
} /* use only *after* variable declarations */
 
-/* check and forward error code */
-#define CHECK_F(f)  \
-   {   \
-   size_t const e = f; \
-   if (FSE_isError(e)) \
-   return e;   \
-   }
-
 /* **
 *  Templates
 /
diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
index 1a79fab9e13a..dac753397f86 100644
--- a/lib/zstd/zstd_internal.h
+++ b/lib/zstd/zstd_internal.h
@@ -127,7 +127,14 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 */
 ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
-   memcpy(dst, src, 8);
+   /*
+* zstd relies heavily on gcc being able to analyze and inline this
+* memcpy() call, since it is called in a tight loop. Preboot mode
+* is compiled in freestanding mode, which stops gcc from analyzing
+* memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a
+* regular memcpy().
+*/
+   __builtin_memcpy(dst, src, 8);
 }
 /*! ZSTD_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if 
length==0) */
@@ -137,13 +144,16 @@ ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void 
*src, ptrdiff_t length)
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
-   /* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
+#if defined(GCC_VERSION) && GCC_VERSION >= 7 && GCC_VERSION < 70200
+   /*
+* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
 * Avoid the bad case where the loop only runs once by handling the
 * special case separately. This doesn't trigger the bug because it
 * doesn't involve pointer/integer overflow.
 */
if (length <= 8)
return ZSTD_copy8(dst, src);
+#endif
do {
ZSTD_copy8(op, ip);
op += 8;
-- 
2.27.0



[PATCH v6 3/8] lib: add zstd support to decompress

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

* Add unzstd() and the zstd decompress interface.
* Add zstd support to decompress_method().

The decompress_method() and unzstd() functions are used to decompress
the initramfs and the initrd. The __decompress() function is used in
the preboot environment to decompress a zstd compressed kernel.

The zstd decompression function allows the input and output buffers to
overlap because that is used by x86 kernel decompression.

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 include/linux/decompress/unzstd.h |  11 +
 lib/Kconfig   |   4 +
 lib/Makefile  |   1 +
 lib/decompress.c  |   5 +
 lib/decompress_unzstd.c   | 342 ++
 5 files changed, 363 insertions(+)
 create mode 100644 include/linux/decompress/unzstd.h
 create mode 100644 lib/decompress_unzstd.c

diff --git a/include/linux/decompress/unzstd.h 
b/include/linux/decompress/unzstd.h
new file mode 100644
index ..56d539ae880f
--- /dev/null
+++ b/include/linux/decompress/unzstd.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_DECOMPRESS_UNZSTD_H
+#define LINUX_DECOMPRESS_UNZSTD_H
+
+int unzstd(unsigned char *inbuf, long len,
+  long (*fill)(void*, unsigned long),
+  long (*flush)(void*, unsigned long),
+  unsigned char *output,
+  long *pos,
+  void (*error_fn)(char *x));
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index df3f3da95990..a5d6f23c4cab 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -342,6 +342,10 @@ config DECOMPRESS_LZ4
select LZ4_DECOMPRESS
tristate
 
+config DECOMPRESS_ZSTD
+   select ZSTD_DECOMPRESS
+   tristate
+
 #
 # Generic allocator support is selected if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index b1c42c10073b..2ba9642a3a87 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -170,6 +170,7 @@ lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
 lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
 lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o
+lib-$(CONFIG_DECOMPRESS_ZSTD) += decompress_unzstd.o
 
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
diff --git a/lib/decompress.c b/lib/decompress.c
index 857ab1af1ef3..ab3fc90ffc64 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -37,6 +38,9 @@
 #ifndef CONFIG_DECOMPRESS_LZ4
 # define unlz4 NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_ZSTD
+# define unzstd NULL
+#endif
 
 struct compress_format {
unsigned char magic[2];
@@ -52,6 +56,7 @@ static const struct compress_format compressed_formats[] 
__initconst = {
{ {0xfd, 0x37}, "xz", unxz },
{ {0x89, 0x4c}, "lzo", unlzo },
{ {0x02, 0x21}, "lz4", unlz4 },
+   { {0x28, 0xb5}, "zstd", unzstd },
{ {0, 0}, NULL, NULL }
 };
 
diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
new file mode 100644
index ..f317afab502f
--- /dev/null
+++ b/lib/decompress_unzstd.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Important notes about in-place decompression
+ *
+ * At least on x86, the kernel is decompressed in place: the compressed data
+ * is placed to the end of the output buffer, and the decompressor overwrites
+ * most of the compressed data. There must be enough safety margin to
+ * guarantee that the write position is always behind the read position.
+ *
+ * The safety margin for ZSTD with a 128 KB block size is calculated below.
+ * Note that the margin with ZSTD is bigger than with GZIP or XZ!
+ *
+ * The worst case for in-place decompression is that the beginning of
+ * the file is compressed extremely well, and the rest of the file is
+ * uncompressible. Thus, we must look for worst-case expansion when the
+ * compressor is encoding uncompressible data.
+ *
+ * The structure of the .zst file in case of a compresed kernel is as follows.
+ * Maximum sizes (as bytes) of the fields are in parenthesis.
+ *
+ *Frame Header: (18)
+ *Blocks: (N)
+ *Checksum: (4)
+ *
+ * The frame header and checksum overhead is at most 22 bytes.
+ *
+ * ZSTD stores the data in blocks. Each block has a header whose size is
+ * a 3 bytes. After the block header, there is up to 128 KB of payload.
+ * The maximum uncompressed size of the payload is 128 KB. The minimum
+ * uncompressed size of the payload is never less than the payload size
+ * (excluding the block header).
+ *
+ * The assumption, that the uncompressed size of the payload is never
+ * smaller than the payload itself, is valid only when talking about
+ * the payload as a whole. It is possible that the payload has parts where
+ * the decompressor consumes more input than it produces output. Calculating
+ * the worst case for this would be tricky. Instead of trying to do that,
+ * let's 

[PATCH v6 4/8] init: add support for zstd compressed kernel

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

* Adds the zstd cmd to scripts/Makefile.lib
* Adds the HAVE_KERNEL_ZSTD and KERNEL_ZSTD options

Architecture specific support is still needed for decompression.

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 init/Kconfig | 15 ++-
 scripts/Makefile.lib | 15 +++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 0498af567f70..8d99f0c5e240 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -191,13 +191,16 @@ config HAVE_KERNEL_LZO
 config HAVE_KERNEL_LZ4
bool
 
+config HAVE_KERNEL_ZSTD
+   bool
+
 config HAVE_KERNEL_UNCOMPRESSED
bool
 
 choice
prompt "Kernel compression mode"
default KERNEL_GZIP
-   depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || 
HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED
+   depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || 
HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD || 
HAVE_KERNEL_UNCOMPRESSED
help
  The linux kernel is a kind of self-extracting executable.
  Several compression algorithms are available, which differ
@@ -276,6 +279,16 @@ config KERNEL_LZ4
  is about 8% bigger than LZO. But the decompression speed is
  faster than LZO.
 
+config KERNEL_ZSTD
+   bool "ZSTD"
+   depends on HAVE_KERNEL_ZSTD
+   help
+ ZSTD is a compression algorithm targeting intermediate compression
+ with fast decompression speed. It will compress better than GZIP and
+ decompress around the same speed as LZO, but slower than LZ4. You
+ will need at least 192 KB RAM or more for booting. The zstd command
+ line tools is required for compression.
+
 config KERNEL_UNCOMPRESSED
bool "None"
depends on HAVE_KERNEL_UNCOMPRESSED
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 916b2f7f7098..d960f8815f87 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -413,6 +413,21 @@ quiet_cmd_xzkern = XZKERN  $@
 quiet_cmd_xzmisc = XZMISC  $@
   cmd_xzmisc = cat $(real-prereqs) | $(XZ) --check=crc32 --lzma2=dict=1MiB 
> $@
 
+# ZSTD
+# ---
+# Appends the uncompressed size of the data using size_append. The .zst
+# format has the size information available at the beginning of the file too,
+# but it's in a more complex format and it's good to avoid changing the part
+# of the boot code that reads the uncompressed size.
+# Note that the bytes added by size_append will make the zstd tool think that
+# the file is corrupt. This is expected.
+
+quiet_cmd_zstd = ZSTD$@
+cmd_zstd = (cat $(filter-out FORCE,$^) | \
+   zstd -19 && \
+$(call size_append, $(filter-out FORCE,$^))) > $@ || \
+   (rm -f $@ ; false)
+
 # ASM offsets
 # ---
 
-- 
2.27.0



[PATCH v6 7/8] x86: Add support for ZSTD compressed kernel

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

* Add support for zstd compressed kernel
* Bump the heap size for zstd.
* Update the documentation.

Integrates the ZSTD decompression code to the x86 pre-boot code.

Zstandard requires slightly more memory during the kernel decompression
on x86 (192 KB vs 64 KB), and the memory usage is independent of the
window size.

This patch has been boot tested with both a zstd and gzip compressed
kernel on i386 and x86_64 using buildroot and QEMU.

Additionally, this has been tested in production on x86_64 devices.
We saw a 2 second boot time reduction by switching kernel compression
from xz to zstd.

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 Documentation/x86/boot.rst| 6 +++---
 arch/x86/Kconfig  | 1 +
 arch/x86/boot/compressed/Makefile | 5 -
 arch/x86/boot/compressed/misc.c   | 4 
 arch/x86/include/asm/boot.h   | 6 --
 5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
index 5325c71ca877..7fafc7ac00d7 100644
--- a/Documentation/x86/boot.rst
+++ b/Documentation/x86/boot.rst
@@ -782,9 +782,9 @@ Protocol:   2.08+
   uncompressed data should be determined using the standard magic
   numbers.  The currently supported compression formats are gzip
   (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA
-  (magic number 5D 00), XZ (magic number FD 37), and LZ4 (magic number
-  02 21).  The uncompressed payload is currently always ELF (magic
-  number 7F 45 4C 46).
+  (magic number 5D 00), XZ (magic number FD 37), LZ4 (magic number
+  02 21) and ZSTD (magic number 28 B5). The uncompressed payload is
+  currently always ELF (magic number 7F 45 4C 46).
 
    ==
 Field name:payload_length
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 883da0abf779..4a64395bc35d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -188,6 +188,7 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_KERNEL_XZ
+   select HAVE_KERNEL_ZSTD
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/x86/boot/compressed/Makefile 
b/arch/x86/boot/compressed/Makefile
index 7619742f91c9..471e61400a2e 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ OBJECT_FILES_NON_STANDARD := y
 KCOV_INSTRUMENT:= n
 
 targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma 
\
-   vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
+   vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
 
 KBUILD_CFLAGS := -m$(BITS) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
@@ -145,6 +145,8 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
 $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lz4)
+$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
+   $(call if_changed,zstd)
 
 suffix-$(CONFIG_KERNEL_GZIP)   := gz
 suffix-$(CONFIG_KERNEL_BZIP2)  := bz2
@@ -152,6 +154,7 @@ suffix-$(CONFIG_KERNEL_LZMA):= lzma
 suffix-$(CONFIG_KERNEL_XZ) := xz
 suffix-$(CONFIG_KERNEL_LZO):= lzo
 suffix-$(CONFIG_KERNEL_LZ4):= lz4
+suffix-$(CONFIG_KERNEL_ZSTD)   := zst
 
 quiet_cmd_mkpiggy = MKPIGGY $@
   cmd_mkpiggy = $(obj)/mkpiggy $< > $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9652d5c2afda..39e592d0e0b4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -77,6 +77,10 @@ static int lines, cols;
 #ifdef CONFIG_KERNEL_LZ4
 #include "../../../../lib/decompress_unlz4.c"
 #endif
+
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
 /*
  * NOTE: When adding a new decompressor, please update the analysis in
  * ../header.S.
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 680c320363db..d6dd43d25d9f 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -24,9 +24,11 @@
 # error "Invalid value for CONFIG_PHYSICAL_ALIGN"
 #endif
 
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
 # define BOOT_HEAP_SIZE0x40
-#else /* !CONFIG_KERNEL_BZIP2 */
+#elif defined(CONFIG_KERNEL_ZSTD)
+# define BOOT_HEAP_SIZE 0x3
+#else
 # define BOOT_HEAP_SIZE 0x1
 #endif
 
-- 
2.27.0



[PATCH v6 5/8] usr: add support for zstd compressed initramfs

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

* Add support for a zstd compressed initramfs.
* Add compression for compressing built-in initramfs with zstd.

I have tested this patch by boot testing with buildroot and QEMU.
Specifically, I booted the kernel with both a zstd and gzip compressed
initramfs, both built into the kernel and separate. I ensured that the
correct compression algorithm was used. I tested on arm, aarch64, i386,
and x86_64.

This patch has been tested in production on aarch64 and x86_64 devices.

Additionally, I have performance measurements from internal use in
production. On an aarch64 device we saw 19 second boot time improvement
from switching from lzma to zstd (27 seconds to 8 seconds). On an x86_64
device we saw a 9 second boot time reduction from switching from xz to
zstd.

Reviewed-by: Kees Cook 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Terrell 
---
 usr/Kconfig  | 20 
 usr/Makefile |  1 +
 2 files changed, 21 insertions(+)

diff --git a/usr/Kconfig b/usr/Kconfig
index 96afb03b65f9..2599bc21c1b2 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -100,6 +100,15 @@ config RD_LZ4
  Support loading of a LZ4 encoded initial ramdisk or cpio buffer
  If unsure, say N.
 
+config RD_ZSTD
+   bool "Support initial ramdisk/ramfs compressed using ZSTD"
+   default y
+   depends on BLK_DEV_INITRD
+   select DECOMPRESS_ZSTD
+   help
+ Support loading of a ZSTD encoded initial ramdisk or cpio buffer.
+ If unsure, say N.
+
 choice
prompt "Built-in initramfs compression mode"
depends on INITRAMFS_SOURCE != ""
@@ -196,6 +205,17 @@ config INITRAMFS_COMPRESSION_LZ4
  If you choose this, keep in mind that most distros don't provide lz4
  by default which could cause a build failure.
 
+config INITRAMFS_COMPRESSION_ZSTD
+   bool "ZSTD"
+   depends on RD_ZSTD
+   help
+ ZSTD is a compression algorithm targeting intermediate compression
+ with fast decompression speed. It will compress better than GZIP and
+ decompress around the same speed as LZO, but slower than LZ4.
+
+ If you choose this, keep in mind that you may need to install the zstd
+ tool to be able to compress the initram.
+
 config INITRAMFS_COMPRESSION_NONE
bool "None"
help
diff --git a/usr/Makefile b/usr/Makefile
index c12e6b15ce72..b1a81a40eab1 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -15,6 +15,7 @@ compress-$(CONFIG_INITRAMFS_COMPRESSION_LZMA) := lzma
 compress-$(CONFIG_INITRAMFS_COMPRESSION_XZ):= xzmisc
 compress-$(CONFIG_INITRAMFS_COMPRESSION_LZO)   := lzo
 compress-$(CONFIG_INITRAMFS_COMPRESSION_LZ4)   := lz4
+compress-$(CONFIG_INITRAMFS_COMPRESSION_ZSTD)  := zstd
 
 obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o
 
-- 
2.27.0



[GIT PULL][PATCH v6 0/8] Add support for ZSTD-compressed kernel and initramfs

2020-07-06 Thread Nick Terrell
From: Nick Terrell 

Please pull from

  g...@github.com:terrelln/linux.git tags/v6-zstd

to get these changes. Alternatively the patchset is included.

Hi all,

This patch set adds support for a ZSTD-compressed kernel, ramdisk, and
initramfs in the kernel boot process. ZSTD-compressed ramdisk and initramfs
are supported on all architectures. The ZSTD-compressed kernel is only
hooked up to x86 in this patch set.

Zstandard requires slightly more memory during the kernel decompression
on x86 (192 KB vs 64 KB), and the memory usage is independent of the
window size.

Zstandard requires memory proprortional to the window size used during
compression for decompressing the ramdisk image, since streaming mode is
used. Newer versions of zstd (1.3.2+) list the window size of a file
with `zstd -lv '. The absolute maximum amount of memory required
is just over 8 MB, but it can be controlled at compression time.

This patch set has been boot tested with buildroot and QEMU based off
of linux-5.8-rc4.

On i386 and x86_64 I have tested the following configurations:
* zstd compressed kernel and a separate zstd compressed initramfs
* zstd compressed kernel and a built-in zstd compressed initramfs
* gzip compressed kernel and a separate gzip compressed initramfs
* gzip compressed kernel and a built-in gzip compressed initramfs

On arm and aarch64 I tested the same configurations, except that the kernel is
always gzip compressed.

Facebook has been using v1 of these patches on x86_64 devices for more than 6
months. When we switched from a xz compressed initramfs to a zstd compressed
initramfs decompression time shrunk from 12 seconds to 3 seconds. When we
switched from a xz compressed kernel to a zstd compressed kernel we saved 2
seconds of boot time.

Facebook has been using v2 of these patches on aarch64 devices for a few weeks.
When we switched from an lzma compressed initramfs to a zstd compressed 
initramfs
decompression time shrunk from 27 seconds to 8 seconds.

The zstd compressed kernel is smaller than the gzip compressed kernel but larger
than the xz or lzma compressed kernels, and it decompresses faster than
everything except lz4. See the table below for the measurement of an x86_64
kernel ordered by compressed size:

algosize
xz   6,509,792
lzma 6,856,576
zstd 7,399,157
gzip 8,522,527
bzip 8,629,603
lzo  9,808,035
lz4 10,705,570
none32,565,672

Alex Xu ran benchmarks in https://lkml.org/lkml/2020/7/1/722.

v1 -> v2:
- Rebase
  - usr/Makefile and init/Kconfig were changed so the patches were updated
- No functional changes except to rebase
- Split the patches up into smaller chunks

v2 -> v3:
- Add *.zst to the .gitignore in patch 8
- Style nits in patch 3
- Rename the PREBOOT macro to ZSTD_PREBOOT and XXH_PREBOOT in patches
  1 through 3

v3 -> v4:
- Increase the ZSTD_IOBUF_SIZE from 4KB to 128KB to improve performance.
  With this change I switch from malloc() to large_malloc() for the
  buffers.
- Increase the maximum allowed window size from 8 MB to 128 MB, which is
  the max that zstd in the kernel supports.

v4 -> v5:
- Update commit message for patch 6 in response to comments
- Rebase onto next-20200408

v5 -> v6:
- Rebase onto v5.8-rc4

Best,
Nick Terrell

Adam Borowski (1):
  .gitignore: add ZSTD-compressed files

Nick Terrell (7):
  lib: prepare zstd for preboot environment
  lib: prepare xxhash for preboot environment
  lib: add zstd support to decompress
  init: add support for zstd compressed kernel
  usr: add support for zstd compressed initramfs
  x86: bump ZO_z_extra_bytes margin for zstd
  x86: Add support for ZSTD compressed kernel

 .gitignore|   1 +
 Documentation/x86/boot.rst|   6 +-
 arch/x86/Kconfig  |   1 +
 arch/x86/boot/compressed/Makefile |   5 +-
 arch/x86/boot/compressed/misc.c   |   4 +
 arch/x86/boot/header.S|   8 +-
 arch/x86/include/asm/boot.h   |   6 +-
 include/linux/decompress/unzstd.h |  11 +
 init/Kconfig  |  15 +-
 lib/Kconfig   |   4 +
 lib/Makefile  |   1 +
 lib/decompress.c  |   5 +
 lib/decompress_unzstd.c   | 342 ++
 lib/xxhash.c  |  21 +-
 lib/zstd/decompress.c |   2 +
 lib/zstd/fse_decompress.c |   9 +-
 lib/zstd/zstd_internal.h  |  14 +-
 scripts/Makefile.lib  |  15 ++
 usr/Kconfig   |  20 ++
 usr/Makefile  |   1 +
 20 files changed, 464 insertions(+), 27 deletions(-)
 create mode 100644 include/linux/decompress/unzstd.h
 create mode 100644 lib/decompress_unzstd.c

-- 
2.27.0



[PATCH RESEND] lockdep: Move list.h inclusion into lockdep.h

2020-07-06 Thread Herbert Xu
On Thu, Jun 18, 2020 at 04:37:35PM +0200, Peter Zijlstra wrote:
>
> OK, done. tip/locking/header should contain just this patch, and that
> branch also got merged into tip/locking/core.

Hi Peter:

Could you please apply this patch on top as there is still a header
loop otherwise? Thanks!

---8<---
Currently lockdep_types.h includes list.h without actually using any
of its macros or functions.  All it needs are the type definitions
which were moved into types.h long ago.  This potentially causes
inclusion loops because both are included by many core header
files.

This patch moves the list.h inclusion into lockdep.h.  Note that
we could probably remove it completely but that could potentially
result in compile failures should any end users not include list.h
directly and also be unlucky enough to not get list.h via some other
header file.

Reported-by: Petr Mladek 
Tested-by: Petr Mladek 
Signed-off-by: Herbert Xu 

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 3b73cf84f77d..b1ad5c045353 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -21,6 +21,7 @@ extern int lock_stat;
 #ifdef CONFIG_LOCKDEP
 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 7b9350624577..bb35b449f533 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -32,8 +32,6 @@ enum lockdep_wait_type {
 
 #ifdef CONFIG_LOCKDEP
 
-#include 
-
 /*
  * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
  * the total number of states... :-(
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


[PATCH v35 24/24] x86/sgx: Update MAINTAINERS

2020-07-06 Thread Jarkko Sakkinen
Add the maintainer information for the SGX subsystem.

Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Signed-off-by: Jarkko Sakkinen 
---
 MAINTAINERS | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1d4aa7f942de..9b5268aa26db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8968,6 +8968,17 @@ F:   Documentation/x86/intel_txt.rst
 F: arch/x86/kernel/tboot.c
 F: include/linux/tboot.h
 
+INTEL SGX
+M: Jarkko Sakkinen 
+M: Sean Christopherson 
+L: linux-...@vger.kernel.org
+S: Maintained
+Q: https://patchwork.kernel.org/project/intel-sgx/list/
+T: git https://github.com/jsakkine-intel/linux-sgx.git
+F: arch/x86/include/uapi/asm/sgx.h
+F: arch/x86/kernel/cpu/sgx/*
+K: \bSGX_
+
 INTERCONNECT API
 M: Georgi Djakov 
 L: linux...@vger.kernel.org
-- 
2.25.1



[PATCH v35 23/24] docs: x86/sgx: Document SGX micro architecture and kernel internals

2020-07-06 Thread Jarkko Sakkinen
Document the Intel SGX kernel architecture. The fine-grained micro
architecture details can be looked up from Intel SDM Volume 3D.

Cc: linux-...@vger.kernel.org
Acked-by: Randy Dunlap 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
---
 Documentation/x86/index.rst |   1 +
 Documentation/x86/sgx.rst   | 198 
 2 files changed, 199 insertions(+)
 create mode 100644 Documentation/x86/sgx.rst

diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 265d9e9a093b..807290bf357c 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -30,3 +30,4 @@ x86-specific Documentation
usb-legacy-support
i386/index
x86_64/index
+   sgx
diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst
new file mode 100644
index ..5bcbcf0ee7bc
--- /dev/null
+++ b/Documentation/x86/sgx.rst
@@ -0,0 +1,198 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+
+Architecture
+
+
+*Software Guard eXtensions (SGX)* is a set of instructions that enable ring-3
+applications to set aside private regions of code and data. These regions are
+called enclaves. An enclave can be entered to a fixed set of entry points. Only
+a CPU running inside the enclave can access its code and data.
+
+The support can be determined by
+
+   ``grep sgx /proc/cpuinfo``
+
+Enclave Page Cache
+==
+
+SGX utilizes an *Enclave Page Cache (EPC)* to store pages that are associated
+with an enclave. It is contained in a BIOS reserved region of physical memory.
+Unlike pages used for regular memory, pages can only be accessed outside the
+enclave for different purposes with the instructions **ENCLS**, **ENCLV** and
+**ENCLU**.
+
+Direct memory accesses to an enclave can be only done by a CPU executing inside
+the enclave. An enclave can be entered with **ENCLU[EENTER]** to a fixed set of
+entry points. However, a CPU executing inside the enclave can do outside memory
+accesses.
+
+Page Types
+--
+
+**SGX Enclave Control Structure (SECS)**
+   Enclave's address range, attributes and other global data are defined
+   by this structure.
+
+**Regular (REG)**
+   Regular EPC pages contain the code and data of an enclave.
+
+**Thread Control Structure (TCS)**
+   Thread Control Structure pages define the entry points to an enclave and
+   track the execution state of an enclave thread.
+
+**Version Array (VA)**
+   Version Array pages contain 512 slots, each of which can contain a version
+   number for a page evicted from the EPC.
+
+Enclave Page Cache Map
+--
+
+The processor tracks EPC pages via the *Enclave Page Cache Map (EPCM)*.  EPCM
+contains an entry for each EPC page, which describes the owning enclave, access
+rights and page type among the other things.
+
+The permissions from EPCM is consulted if and only if walking the kernel page
+tables succeeds. The total permissions are thus a conjunction between page 
table
+and EPCM permissions.
+
+For all intents and purposes the SGX architecture allows the processor to
+invalidate all EPCM entries at will, i.e. requires that software be prepared to
+handle an EPCM fault at any time. The contents of EPC are encrypted with an
+ephemeral key, which is lost on power transitions.
+
+EPC management
+==
+
+EPC pages do not have ``struct page`` instances. They are IO memory from kernel
+perspective. The consequence is that they are always mapped as shared memory.
+Kernel defines ``/dev/sgx/enclave`` that can be mapped as ``MAP_SHARED`` to
+define the address range for an enclave.
+
+EPC Over-subscription
+=
+
+When the amount of free EPC pages goes below a low watermark the swapping 
thread
+starts reclaiming pages. The pages that do not have the **A** bit set are
+selected as victim pages.
+
+Launch Control
+==
+
+SGX provides a launch control mechanism. After all enclave pages have been
+copied, kernel executes **ENCLS[EINIT]**, which initializes the enclave. Only
+after this the CPU can execute inside the enclave.
+
+This leaf function takes an RSA-3072 signature of the enclave measurement and 
an
+optional cryptographic token. Linux does not take advantage of launch tokens.
+The instruction checks that the signature is signed with the key defined in
+**IA32_SGXLEPUBKEYHASH?** MSRs and the measurement is correct. If so, the
+enclave is allowed to be executed.
+
+MSRs can be configured by the BIOS to be either readable or writable. Linux
+supports only writable configuration in order to give full control to the 
kernel
+on launch control policy. Readable configuration requires the use of previously
+mentioned launch tokens.
+
+The current kernel implementation supports only writable MSRs. The launch is
+performed by setting the MSRs to the hash of the enclave signer's public key.
+The alternative would be to have *a launch enclave* that would be signed with

[PATCH v35 18/24] x86/vdso: Add support for exception fixup in vDSO functions

2020-07-06 Thread Jarkko Sakkinen
From: Sean Christopherson 

The basic concept and implementation is very similar to the kernel's
exception fixup mechanism.  The key differences are that the kernel
handler is hardcoded and the fixup entry addresses are relative to
the overall table as opposed to individual entries.

Hardcoding the kernel handler avoids the need to figure out how to
get userspace code to point at a kernel function.  Given that the
expected usage is to propagate information to userspace, dumping all
fault information into registers is likely the desired behavior for
the vast majority of yet-to-be-created functions.  Use registers
DI, SI and DX to communicate fault information, which follows Linux's
ABI for register consumption and hopefully avoids conflict with
hardware features that might leverage the fixup capabilities, e.g.
register usage for SGX instructions was at least partially designed
with calling conventions in mind.

Making fixup addresses relative to the overall table allows the table
to be stripped from the final vDSO image (it's a kernel construct)
without complicating the offset logic, e.g. entry-relative addressing
would also need to account for the table's location relative to the
image.

Regarding stripping the table, modify vdso2c to extract the table from
the raw, a.k.a. unstripped, data and dump it as a standalone byte array
in the resulting .c file.  The original base of the table, its length
and a pointer to the byte array are captured in struct vdso_image.
Alternatively, the table could be dumped directly into the struct,
but because the number of entries can vary per image, that would
require either hardcoding a max sized table into the struct definition
or defining the table as a flexible length array.  The flexible length
array approach has zero benefits, e.g. the base/size are still needed,
and prevents reusing the extraction code, while hardcoding the max size
adds ongoing maintenance just to avoid exporting the explicit size.

The immediate use case is for Intel Software Guard Extensions (SGX).
SGX introduces a new CPL3-only "enclave" mode that runs as a sort of
black box shared object that is hosted by an untrusted "normal" CPl3
process.

Entering an enclave can only be done through SGX-specific instructions,
EENTER and ERESUME, and is a non-trivial process.  Because of the
complexity of transitioning to/from an enclave, the vast majority of
enclaves are expected to utilize a library to handle the actual
transitions.  This is roughly analogous to how e.g. libc implementations
are used by most applications.

Another crucial characteristic of SGX enclaves is that they can generate
exceptions as part of their normal (at least as "normal" as SGX can be)
operation that need to be handled *in* the enclave and/or are unique
to SGX.

And because they are essentially fancy shared objects, a process can
host any number of enclaves, each of which can execute multiple threads
simultaneously.

Putting everything together, userspace enclaves will utilize a library
that must be prepared to handle any and (almost) all exceptions any time
at least one thread may be executing in an enclave.  Leveraging signals
to handle the enclave exceptions is unpleasant, to put it mildly, e.g.
the SGX library must constantly (un)register its signal handler based
on whether or not at least one thread is executing in an enclave, and
filter and forward exceptions that aren't related to its enclaves.  This
becomes particularly nasty when using multiple levels of libraries that
register signal handlers, e.g. running an enclave via cgo inside of the
Go runtime.

Enabling exception fixup in vDSO allows the kernel to provide a vDSO
function that wraps the low-level transitions to/from the enclave, i.e.
the EENTER and ERESUME instructions.  The vDSO function can intercept
exceptions that would otherwise generate a signal and return the fault
information directly to its caller, thus avoiding the need to juggle
signal handlers.

Note that unlike the kernel's _ASM_EXTABLE_HANDLE implementation, the
'C' version of _ASM_VDSO_EXTABLE_HANDLE doesn't use a pre-compiled
assembly macro.  Duplicating four lines of code is simpler than adding
the necessary infrastructure to generate pre-compiled assembly and the
intended benefit of massaging GCC's inlining algorithm is unlikely to
realized in the vDSO any time soon, if ever.

Suggested-by: Andy Lutomirski 
Acked-by: Jethro Beekman 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/entry/vdso/Makefile  |  6 ++--
 arch/x86/entry/vdso/extable.c | 46 
 arch/x86/entry/vdso/extable.h | 29 
 arch/x86/entry/vdso/vdso-layout.lds.S |  9 -
 arch/x86/entry/vdso/vdso2c.h  | 50 ++-
 arch/x86/include/asm/vdso.h   |  5 +++
 6 files changed, 140 insertions(+), 5 deletions(-)
 create mode 100644 arch/x86/entry/vdso/extable.c
 create mode 100644 arch/x86/entry/vdso/extable.h


[PATCH v35 20/24] x86/traps: Attempt to fixup exceptions in vDSO before signaling

2020-07-06 Thread Jarkko Sakkinen
From: Sean Christopherson 

vDSO functions can now leverage an exception fixup mechanism similar to
kernel exception fixup.  For vDSO exception fixup, the initial user is
Intel's Software Guard Extensions (SGX), which will wrap the low-level
transitions to/from the enclave, i.e. EENTER and ERESUME instructions,
in a vDSO function and leverage fixup to intercept exceptions that would
otherwise generate a signal.  This allows the vDSO wrapper to return the
fault information directly to its caller, obviating the need for SGX
applications and libraries to juggle signal handlers.

Attempt to fixup vDSO exceptions immediately prior to populating and
sending signal information.  Except for the delivery mechanism, an
exception in a vDSO function should be treated like any other exception
in userspace, e.g. any fault that is successfully handled by the kernel
should not be directly visible to userspace.

Although it's debatable whether or not all exceptions are of interest to
enclaves, defer to the vDSO fixup to decide whether to do fixup or
generate a signal.  Future users of vDSO fixup, if there ever are any,
will undoubtedly have different requirements than SGX enclaves, e.g. the
fixup vs. signal logic can be made function specific if/when necessary.

Suggested-by: Andy Lutomirski 
Acked-by: Jethro Beekman 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/traps.c | 19 ---
 arch/x86/mm/fault.c |  8 
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b038695f36c5..c2c306f81045 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,6 +59,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_X86_64
 #include 
@@ -117,6 +118,9 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, 
const char *str,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
+   } else {
+   if (fixup_vdso_exception(regs, trapnr, error_code, 0))
+   return 0;
}
 
/*
@@ -548,6 +552,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
 
+   if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
+   return;
+
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
force_sig(SIGSEGV);
goto exit;
@@ -829,9 +836,12 @@ static void handle_debug(struct pt_regs *regs, unsigned 
long dr6, bool user)
 #endif
 
if (notify_die(DIE_DEBUG, "debug", regs, (long), 0,
-  SIGTRAP) == NOTIFY_STOP) {
-   return;
-   }
+  SIGTRAP) == NOTIFY_STOP)
+   goto out;
+
+   if (user_mode(regs) &&
+   fixup_vdso_exception(regs, X86_TRAP_DB, 0, 0))
+   goto out;
 
/* It's safe to allow irq's after DR6 has been saved */
cond_local_irq_enable(regs);
@@ -985,6 +995,9 @@ static void math_error(struct pt_regs *regs, int trapnr)
if (!si_code)
goto exit;
 
+   if (fixup_vdso_exception(regs, trapnr, 0, 0))
+   return;
+
force_sig_fault(SIGFPE, si_code,
(void __user *)uprobe_get_trap_addr(regs));
 exit:
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8ba78384ea73..9f44ff217418 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -31,6 +31,7 @@
 #include /* exception stack  
*/
 #include  /* VMALLOC_START, ...   */
 #include   /* kvm_handle_async_pf  */
+#include   /* fixup_vdso_exception()   */
 
 #define CREATE_TRACE_POINTS
 #include 
@@ -776,6 +777,10 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long 
error_code,
 
sanitize_error_code(address, _code);
 
+   if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code,
+   address))
+   return;
+
if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
 
@@ -895,6 +900,9 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, 
unsigned long address,
 
sanitize_error_code(address, _code);
 
+   if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
+   return;
+
set_signal_archinfo(address, error_code);
 
 #ifdef CONFIG_MEMORY_FAILURE
-- 
2.25.1



[PATCH v35 21/24] x86/vdso: Implement a vDSO for Intel SGX enclave call

2020-07-06 Thread Jarkko Sakkinen
From: Sean Christopherson 

An SGX runtime must be aware of the exceptions, which happen inside an
enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
the CPU exception back to the caller exactly when it happens.

Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
vDSO handler fills this information to the user provided buffer or
alternatively trigger user provided callback at the time of the exception.

The calling convention is custom and does not follow System V x86-64 ABI.

Suggested-by: Andy Lutomirski 
Acked-by: Jethro Beekman 
Tested-by: Jethro Beekman 
Signed-off-by: Sean Christopherson 
Co-developed-by: Cedric Xing 
Signed-off-by: Cedric Xing 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/entry/vdso/Makefile |   2 +
 arch/x86/entry/vdso/vdso.lds.S   |   1 +
 arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++
 arch/x86/include/asm/enclu.h |   8 ++
 arch/x86/include/uapi/asm/sgx.h  |  98 +
 5 files changed, 240 insertions(+)
 create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
 create mode 100644 arch/x86/include/asm/enclu.h

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index ebe82b7aecda..f71ad5ebd0c4 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)   := y
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
 vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o
+vobjs-$(VDSO64-y)  += vsgx_enter_enclave.o
 
 # files to link into kernel
 obj-y  += vma.o extable.o
@@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out 
$(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
 CFLAGS_REMOVE_vclock_gettime.o = -pg
 CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
+CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 36b644e16272..4bf48462fca7 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -27,6 +27,7 @@ VERSION {
__vdso_time;
clock_getres;
__vdso_clock_getres;
+   __vdso_sgx_enter_enclave;
local: *;
};
 }
diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S 
b/arch/x86/entry/vdso/vsgx_enter_enclave.S
new file mode 100644
index ..be7e467e1efb
--- /dev/null
+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "extable.h"
+
+#define EX_LEAF0*8
+#define EX_TRAPNR  0*8+4
+#define EX_ERROR_CODE  0*8+6
+#define EX_ADDRESS 1*8
+
+.code64
+.section .text, "ax"
+
+SYM_FUNC_START(__vdso_sgx_enter_enclave)
+   /* Prolog */
+   .cfi_startproc
+   push%rbp
+   .cfi_adjust_cfa_offset  8
+   .cfi_rel_offset %rbp, 0
+   mov %rsp, %rbp
+   .cfi_def_cfa_register   %rbp
+   push%rbx
+   .cfi_rel_offset %rbx, -8
+
+   mov %ecx, %eax
+.Lenter_enclave:
+   /* EENTER <= leaf <= ERESUME */
+   cmp $EENTER, %eax
+   jb  .Linvalid_leaf
+   cmp $ERESUME, %eax
+   ja  .Linvalid_leaf
+
+   /* Load TCS and AEP */
+   mov 0x10(%rbp), %rbx
+   lea .Lasync_exit_pointer(%rip), %rcx
+
+   /* Single ENCLU serving as both EENTER and AEP (ERESUME) */
+.Lasync_exit_pointer:
+.Lenclu_eenter_eresume:
+   enclu
+
+   /* EEXIT jumps here unless the enclave is doing something fancy. */
+   xor %eax, %eax
+
+   /* Invoke userspace's exit handler if one was provided. */
+.Lhandle_exit:
+   cmp $0, 0x20(%rbp)
+   jne .Linvoke_userspace_handler
+
+.Lout:
+   pop %rbx
+   leave
+   .cfi_def_cfa%rsp, 8
+   ret
+
+   /* The out-of-line code runs with the pre-leave stack frame. */
+   .cfi_def_cfa%rbp, 16
+
+.Linvalid_leaf:
+   mov $(-EINVAL), %eax
+   jmp .Lout
+
+.Lhandle_exception:
+   mov 0x18(%rbp), %rcx
+   test%rcx, %rcx
+   je  .Lskip_exception_info
+
+   /* Fill optional exception info. */
+   mov %eax, EX_LEAF(%rcx)
+   mov %di,  EX_TRAPNR(%rcx)
+   mov %si,  EX_ERROR_CODE(%rcx)
+   mov %rdx, EX_ADDRESS(%rcx)
+.Lskip_exception_info:
+   mov $(-EFAULT), %eax
+   jmp .Lhandle_exit
+
+.Linvoke_userspace_handler:
+   /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
+   mov %rsp, %rcx
+
+   /* Save the untrusted RSP offset in %rbx (non-volatile register). */
+   mov %rsp, %rbx
+   and $0xf, %rbx
+
+   /*
+* Align stack per x86_64 ABI. Note, %rsp needs to be 

[PATCH v35 17/24] x86/sgx: ptrace() support for the SGX driver

2020-07-06 Thread Jarkko Sakkinen
Add VMA callbacks for ptrace() that can be used with debug enclaves.
With debug enclaves data can be read and write the memory word at a time
by using ENCLS(EDBGRD) and ENCLS(EDBGWR) leaf instructions.

Acked-by: Jethro Beekman 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/encl.c | 87 ++
 1 file changed, 87 insertions(+)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index b22e7187142d..a0c84a7fb6f9 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -334,10 +334,97 @@ static int sgx_vma_mprotect(struct vm_area_struct *vma, 
unsigned long start,
calc_vm_prot_bits(prot, 0));
 }
 
+static int sgx_edbgrd(struct sgx_encl *encl, struct sgx_encl_page *page,
+ unsigned long addr, void *data)
+{
+   unsigned long offset = addr & ~PAGE_MASK;
+   int ret;
+
+
+   ret = __edbgrd(sgx_get_epc_addr(page->epc_page) + offset, data);
+   if (ret)
+   return -EIO;
+
+   return 0;
+}
+
+static int sgx_edbgwr(struct sgx_encl *encl, struct sgx_encl_page *page,
+ unsigned long addr, void *data)
+{
+   unsigned long offset = addr & ~PAGE_MASK;
+   int ret;
+
+   ret = __edbgwr(sgx_get_epc_addr(page->epc_page) + offset, data);
+   if (ret)
+   return -EIO;
+
+   return 0;
+}
+
+static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+   struct sgx_encl *encl = vma->vm_private_data;
+   struct sgx_encl_page *entry = NULL;
+   char data[sizeof(unsigned long)];
+   unsigned long align;
+   unsigned int flags;
+   int offset;
+   int cnt;
+   int ret = 0;
+   int i;
+
+   /* If process was forked, VMA is still there but vm_private_data is set
+* to NULL.
+*/
+   if (!encl)
+   return -EFAULT;
+
+   flags = atomic_read(>flags);
+
+   if (!(flags & SGX_ENCL_DEBUG) || !(flags & SGX_ENCL_INITIALIZED) ||
+   (flags & SGX_ENCL_DEAD))
+   return -EFAULT;
+
+   for (i = 0; i < len; i += cnt) {
+   entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK);
+   if (IS_ERR(entry)) {
+   ret = PTR_ERR(entry);
+   break;
+   }
+
+   align = ALIGN_DOWN(addr + i, sizeof(unsigned long));
+   offset = (addr + i) & (sizeof(unsigned long) - 1);
+   cnt = sizeof(unsigned long) - offset;
+   cnt = min(cnt, len - i);
+
+   ret = sgx_edbgrd(encl, entry, align, data);
+   if (ret)
+   goto out;
+
+   if (write) {
+   memcpy(data + offset, buf + i, cnt);
+   ret = sgx_edbgwr(encl, entry, align, data);
+   if (ret)
+   goto out;
+   } else
+   memcpy(buf + i, data + offset, cnt);
+
+out:
+   mutex_unlock(>lock);
+
+   if (ret)
+   break;
+   }
+
+   return ret < 0 ? ret : i;
+}
+
 const struct vm_operations_struct sgx_vm_ops = {
.open = sgx_vma_open,
.fault = sgx_vma_fault,
.mprotect = sgx_vma_mprotect,
+   .access = sgx_vma_access,
 };
 
 /**
-- 
2.25.1



[PATCH v35 16/24] x86/sgx: Add a page reclaimer

2020-07-06 Thread Jarkko Sakkinen
There is a limited amount of EPC available. Therefore, some of it must be
copied to the regular memory, and only subset kept in the SGX reserved
memory. While kernel cannot directly access enclave memory, SGX provides a
set of ENCLS leaf functions to perform reclaiming.

This commits implements a page reclaimer by using these leaf functions. It
picks the victim pages in LRU fashion from all the enclaves running in the
system. The thread ksgxswapd reclaims pages on the event when the number of
free EPC pages goes below SGX_NR_LOW_PAGES up until it reaches
SGX_NR_HIGH_PAGES.

sgx_alloc_epc_page() can optionally directly reclaim pages with @reclaim
set true. A caller must also supply owner for each page so that the
reclaimer can access the associated enclaves. This is needed for locking,
as most of the ENCLS leafs cannot be executed concurrently for an enclave.
The owner is also needed for accessing SECS, which is required to be
resident when its child pages are being reclaimed.

Cc: linux...@kvack.org
Acked-by: Jethro Beekman 
Tested-by: Jethro Beekman 
Tested-by: Jordan Hand 
Tested-by: Nathaniel McCallum 
Tested-by: Chunyang Hui 
Tested-by: Seth Moore 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/driver.c |   1 +
 arch/x86/kernel/cpu/sgx/encl.c   | 340 +-
 arch/x86/kernel/cpu/sgx/encl.h   |  41 +++
 arch/x86/kernel/cpu/sgx/ioctl.c  |  77 -
 arch/x86/kernel/cpu/sgx/main.c   | 464 +++
 arch/x86/kernel/cpu/sgx/sgx.h|   9 +
 6 files changed, 925 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 1cebb6e9c9b7..83e186495e67 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -32,6 +32,7 @@ static int sgx_open(struct inode *inode, struct file *file)
 
atomic_set(>flags, 0);
kref_init(>refcount);
+   INIT_LIST_HEAD(>va_pages);
INIT_RADIX_TREE(>page_tree, GFP_KERNEL);
mutex_init(>lock);
INIT_LIST_HEAD(>mm_list);
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index c3755f8bbcba..b22e7187142d 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -12,9 +12,84 @@
 #include "encls.h"
 #include "sgx.h"
 
+static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
+  struct sgx_epc_page *epc_page,
+  struct sgx_epc_page *secs_page)
+{
+   unsigned long va_offset = SGX_ENCL_PAGE_VA_OFFSET(encl_page);
+   struct sgx_encl *encl = encl_page->encl;
+   struct sgx_pageinfo pginfo;
+   struct sgx_backing b;
+   pgoff_t page_index;
+   int ret;
+
+   if (secs_page)
+   page_index = SGX_ENCL_PAGE_INDEX(encl_page);
+   else
+   page_index = PFN_DOWN(encl->size);
+
+   ret = sgx_encl_get_backing(encl, page_index, );
+   if (ret)
+   return ret;
+
+   pginfo.addr = SGX_ENCL_PAGE_ADDR(encl_page);
+   pginfo.contents = (unsigned long)kmap_atomic(b.contents);
+   pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) +
+ b.pcmd_offset;
+
+   if (secs_page)
+   pginfo.secs = (u64)sgx_get_epc_addr(secs_page);
+   else
+   pginfo.secs = 0;
+
+   ret = __eldu(, sgx_get_epc_addr(epc_page),
+sgx_get_epc_addr(encl_page->va_page->epc_page) +
+ va_offset);
+   if (ret) {
+   if (encls_failed(ret))
+   ENCLS_WARN(ret, "ELDU");
+
+   ret = -EFAULT;
+   }
+
+   kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
+   kunmap_atomic((void *)(unsigned long)pginfo.contents);
+
+   sgx_encl_put_backing(, false);
+
+   return ret;
+}
+
+static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
+ struct sgx_epc_page *secs_page)
+{
+   unsigned long va_offset = SGX_ENCL_PAGE_VA_OFFSET(encl_page);
+   struct sgx_encl *encl = encl_page->encl;
+   struct sgx_epc_page *epc_page;
+   int ret;
+
+   epc_page = sgx_alloc_epc_page(encl_page, false);
+   if (IS_ERR(epc_page))
+   return epc_page;
+
+   ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
+   if (ret) {
+   sgx_free_epc_page(epc_page);
+   return ERR_PTR(ret);
+   }
+
+   sgx_free_va_slot(encl_page->va_page, va_offset);
+   list_move(_page->va_page->list, >va_pages);
+   encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK;
+   encl_page->epc_page = epc_page;
+
+   return epc_page;
+}
+
 static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
unsigned long addr)
 {
+   struct sgx_epc_page *epc_page;

[PATCH v35 19/24] x86/fault: Add helper function to sanitize error code

2020-07-06 Thread Jarkko Sakkinen
From: Sean Christopherson 

Add helper function to sanitize error code to prepare for vDSO exception
fixup, which will expose the error code to userspace and runs before
set_signal_archinfo(), i.e. suppresses the signal when fixup is successful.

Acked-by: Jethro Beekman 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/mm/fault.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1db6fbd7af8e..8ba78384ea73 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -556,6 +556,18 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
oops_end(flags, regs, sig);
 }
 
+static void sanitize_error_code(unsigned long address,
+   unsigned long *error_code)
+{
+   /*
+* To avoid leaking information about the kernel page
+* table layout, pretend that user-mode accesses to
+* kernel addresses are always protection faults.
+*/
+   if (address >= TASK_SIZE_MAX)
+   *error_code |= X86_PF_PROT;
+}
+
 static void set_signal_archinfo(unsigned long address,
unsigned long error_code)
 {
@@ -612,6 +624,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 * faulting through the emulate_vsyscall() logic.
 */
if (current->thread.sig_on_uaccess_err && signal) {
+   sanitize_error_code(address, _code);
+
set_signal_archinfo(address, error_code);
 
/* XXX: hwpoison faults will set the wrong code. */
@@ -760,13 +774,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long 
error_code,
if (is_errata100(regs, address))
return;
 
-   /*
-* To avoid leaking information about the kernel page table
-* layout, pretend that user-mode accesses to kernel addresses
-* are always protection faults.
-*/
-   if (address >= TASK_SIZE_MAX)
-   error_code |= X86_PF_PROT;
+   sanitize_error_code(address, _code);
 
if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
@@ -885,6 +893,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, 
unsigned long address,
if (is_prefetch(regs, error_code, address))
return;
 
+   sanitize_error_code(address, _code);
+
set_signal_archinfo(address, error_code);
 
 #ifdef CONFIG_MEMORY_FAILURE
-- 
2.25.1



[PATCH v35 14/24] x86/sgx: Add SGX_IOC_ENCLAVE_INIT

2020-07-06 Thread Jarkko Sakkinen
Add an ioctl that performs ENCLS[EINIT], which locks down the measurement
and initializes the enclave for entrance. After this, new pages can no
longer be added.

Acked-by: Jethro Beekman 
Tested-by: Jethro Beekman 
Tested-by: Haitao Huang 
Tested-by: Chunyang Hui 
Tested-by: Jordan Hand 
Tested-by: Nathaniel McCallum 
Tested-by: Seth Moore 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Co-developed-by: Suresh Siddha 
Signed-off-by: Suresh Siddha 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/include/uapi/asm/sgx.h |  11 ++
 arch/x86/kernel/cpu/sgx/ioctl.c | 188 
 2 files changed, 199 insertions(+)

diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index c8f199b3fb6f..5edb08ab8fd0 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -23,6 +23,8 @@ enum sgx_page_flags {
_IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create)
 #define SGX_IOC_ENCLAVE_ADD_PAGES \
_IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
+#define SGX_IOC_ENCLAVE_INIT \
+   _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
 
 /**
  * struct sgx_enclave_create - parameter structure for the
@@ -52,4 +54,13 @@ struct sgx_enclave_add_pages {
__u64   count;
 };
 
+/**
+ * struct sgx_enclave_init - parameter structure for the
+ *   %SGX_IOC_ENCLAVE_INIT ioctl
+ * @sigstruct: address for the SIGSTRUCT data
+ */
+struct sgx_enclave_init {
+   __u64 sigstruct;
+};
+
 #endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 595da21a368d..599bd30c6d05 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -16,6 +16,9 @@
 #include "encl.h"
 #include "encls.h"
 
+/* A per-cpu cache for the last known values of IA32_SGXLEPUBKEYHASHx MSRs. */
+static DEFINE_PER_CPU(u64 [4], sgx_lepubkeyhash_cache);
+
 static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm)
 {
u32 size_max = PAGE_SIZE;
@@ -486,6 +489,188 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl 
*encl, void __user *arg)
return ret;
 }
 
+static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus,
+ void *hash)
+{
+   SHASH_DESC_ON_STACK(shash, tfm);
+
+   shash->tfm = tfm;
+
+   return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash);
+}
+
+static int sgx_get_key_hash(const void *modulus, void *hash)
+{
+   struct crypto_shash *tfm;
+   int ret;
+
+   tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC);
+   if (IS_ERR(tfm))
+   return PTR_ERR(tfm);
+
+   ret = __sgx_get_key_hash(tfm, modulus, hash);
+
+   crypto_free_shash(tfm);
+   return ret;
+}
+
+static void sgx_update_lepubkeyhash_msrs(u64 *lepubkeyhash, bool enforce)
+{
+   u64 *cache;
+   int i;
+
+   cache = per_cpu(sgx_lepubkeyhash_cache, smp_processor_id());
+   for (i = 0; i < 4; i++) {
+   if (enforce || (lepubkeyhash[i] != cache[i])) {
+   wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
+   cache[i] = lepubkeyhash[i];
+   }
+   }
+}
+
+static int sgx_einit(struct sgx_sigstruct *sigstruct, void *token,
+struct sgx_epc_page *secs, u64 *lepubkeyhash)
+{
+   int ret;
+
+   preempt_disable();
+   sgx_update_lepubkeyhash_msrs(lepubkeyhash, false);
+   ret = __einit(sigstruct, token, sgx_get_epc_addr(secs));
+   if (ret == SGX_INVALID_EINITTOKEN) {
+   sgx_update_lepubkeyhash_msrs(lepubkeyhash, true);
+   ret = __einit(sigstruct, token, sgx_get_epc_addr(secs));
+   }
+   preempt_enable();
+   return ret;
+}
+
+static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct 
*sigstruct,
+void *token)
+{
+   u64 mrsigner[4];
+   int ret;
+   int i;
+   int j;
+
+   /* Check that the required attributes have been authorized. */
+   if (encl->secs_attributes & ~encl->allowed_attributes)
+   return -EACCES;
+
+   ret = sgx_get_key_hash(sigstruct->modulus, mrsigner);
+   if (ret)
+   return ret;
+
+   mutex_lock(>lock);
+
+   /*
+* Periodically, EINIT polls for certain asynchronous events. If such an
+* event is detected, it completes with SGX_UNMSKED_EVENT.
+*/
+   for (i = 0; i < SGX_EINIT_SLEEP_COUNT; i++) {
+   for (j = 0; j < SGX_EINIT_SPIN_COUNT; j++) {
+   ret = sgx_einit(sigstruct, token, encl->secs.epc_page,
+   mrsigner);
+   if (ret == SGX_UNMASKED_EVENT)
+   continue;
+   else
+   break;
+   }
+
+   if (ret != SGX_UNMASKED_EVENT)
+   break;
+
+

[PATCH v35 15/24] x86/sgx: Allow a limited use of ATTRIBUTE.PROVISIONKEY for attestation

2020-07-06 Thread Jarkko Sakkinen
Provisioning Certification Enclave (PCE), the root of trust for other
enclaves, generates a signing key from a fused key called Provisioning
Certification Key. PCE can then use this key to certify an attestation key
of a Quoting Enclave (QE), e.g. we get the chain of trust down to the
hardware if the Intel signed PCE is used.

To use the needed keys, ATTRIBUTE.PROVISIONKEY is required but should be
only allowed for those who actually need it so that only the trusted
parties can certify QE's.

Obviously the attestation service should know the public key of the used
PCE and that way detect illegit attestation, but whitelisting the legit
users still adds an additional layer of defence.

Add new device file called /dev/sgx/provision. The sole purpose of this
file is to provide file descriptors that act as privilege tokens to allow
to build enclaves with ATTRIBUTE.PROVISIONKEY set. A new ioctl called
SGX_IOC_ENCLAVE_SET_ATTRIBUTE is used to assign this token to an enclave.

Cc: linux-security-mod...@vger.kernel.org
Acked-by: Jethro Beekman 
Suggested-by: Andy Lutomirski 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/include/uapi/asm/sgx.h  | 11 
 arch/x86/kernel/cpu/sgx/driver.c | 18 
 arch/x86/kernel/cpu/sgx/driver.h |  2 ++
 arch/x86/kernel/cpu/sgx/ioctl.c  | 47 
 4 files changed, 78 insertions(+)

diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 5edb08ab8fd0..57d0d30c79b3 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -25,6 +25,8 @@ enum sgx_page_flags {
_IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
 #define SGX_IOC_ENCLAVE_INIT \
_IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
+#define SGX_IOC_ENCLAVE_SET_ATTRIBUTE \
+   _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_set_attribute)
 
 /**
  * struct sgx_enclave_create - parameter structure for the
@@ -63,4 +65,13 @@ struct sgx_enclave_init {
__u64 sigstruct;
 };
 
+/**
+ * struct sgx_enclave_set_attribute - parameter structure for the
+ *   %SGX_IOC_ENCLAVE_SET_ATTRIBUTE ioctl
+ * @attribute_fd:  file handle of the attribute file in the securityfs
+ */
+struct sgx_enclave_set_attribute {
+   __u64 attribute_fd;
+};
+
 #endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 20c3254675e9..1cebb6e9c9b7 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -139,6 +139,10 @@ static const struct file_operations sgx_encl_fops = {
.get_unmapped_area  = sgx_get_unmapped_area,
 };
 
+const struct file_operations sgx_provision_fops = {
+   .owner  = THIS_MODULE,
+};
+
 static struct miscdevice sgx_dev_enclave = {
.minor = MISC_DYNAMIC_MINOR,
.name = "enclave",
@@ -146,6 +150,13 @@ static struct miscdevice sgx_dev_enclave = {
.fops = _encl_fops,
 };
 
+static struct miscdevice sgx_dev_provision = {
+   .minor = MISC_DYNAMIC_MINOR,
+   .name = "provision",
+   .nodename = "sgx/provision",
+   .fops = _provision_fops,
+};
+
 int __init sgx_drv_init(void)
 {
unsigned int eax, ebx, ecx, edx;
@@ -186,5 +197,12 @@ int __init sgx_drv_init(void)
return ret;
}
 
+   ret = misc_register(_dev_provision);
+   if (ret) {
+   pr_err("Creating /dev/sgx/provision failed with %d.\n", ret);
+   misc_deregister(_dev_enclave);
+   return ret;
+   }
+
return 0;
 }
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
index e4063923115b..72747d01c046 100644
--- a/arch/x86/kernel/cpu/sgx/driver.h
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -23,6 +23,8 @@ extern u64 sgx_attributes_reserved_mask;
 extern u64 sgx_xfrm_reserved_mask;
 extern u32 sgx_xsave_size_tbl[64];
 
+extern const struct file_operations sgx_provision_fops;
+
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 
 int sgx_drv_init(void);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 599bd30c6d05..07aa45e77dd0 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -670,6 +670,50 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, 
void __user *arg)
return ret;
 }
 
+/**
+ * sgx_ioc_enclave_set_attribute - handler for %SGX_IOC_ENCLAVE_SET_ATTRIBUTE
+ * @filep: open file to /dev/sgx
+ * @arg:   userspace pointer to a struct sgx_enclave_set_attribute instance
+ *
+ * Mark the enclave as being allowed to access a restricted attribute bit.
+ * The requested attribute is specified via the attribute_fd field in the
+ * provided struct sgx_enclave_set_attribute.  The attribute_fd must be a
+ * handle to an SGX attribute file, e.g. "/dev/sgx/provision".
+ *
+ * Failure to explicitly request access to a restricted attribute will cause
+ * 

[PATCH v35 13/24] x86/sgx: Add SGX_IOC_ENCLAVE_ADD_PAGES

2020-07-06 Thread Jarkko Sakkinen
Add an ioctl, which performs ENCLS[EADD] that adds new visible page to an
enclave, and optionally ENCLS[EEXTEND] operations that hash the page to the
enclave measurement. By visible we mean a page that can be mapped to the
address range of an enclave.

Acked-by: Jethro Beekman 
Tested-by: Jethro Beekman 
Tested-by: Haitao Huang 
Tested-by: Chunyang Hui 
Tested-by: Jordan Hand 
Tested-by: Nathaniel McCallum 
Tested-by: Seth Moore 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Co-developed-by: Suresh Siddha 
Signed-off-by: Suresh Siddha 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/include/uapi/asm/sgx.h |  30 
 arch/x86/kernel/cpu/sgx/ioctl.c | 292 
 2 files changed, 322 insertions(+)

diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 3787d278e84b..c8f199b3fb6f 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -8,10 +8,21 @@
 #include 
 #include 
 
+/**
+ * enum sgx_epage_flags - page control flags
+ * %SGX_PAGE_MEASURE:  Measure the page contents with a sequence of
+ * ENCLS[EEXTEND] operations.
+ */
+enum sgx_page_flags {
+   SGX_PAGE_MEASURE= 0x01,
+};
+
 #define SGX_MAGIC 0xA4
 
 #define SGX_IOC_ENCLAVE_CREATE \
_IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create)
+#define SGX_IOC_ENCLAVE_ADD_PAGES \
+   _IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
 
 /**
  * struct sgx_enclave_create - parameter structure for the
@@ -22,4 +33,23 @@ struct sgx_enclave_create  {
__u64   src;
 };
 
+/**
+ * struct sgx_enclave_add_pages - parameter structure for the
+ *%SGX_IOC_ENCLAVE_ADD_PAGE ioctl
+ * @src:   start address for the page data
+ * @offset:starting page offset
+ * @length:length of the data (multiple of the page size)
+ * @secinfo:   address for the SECINFO data
+ * @flags: page control flags
+ * @count: number of bytes added (multiple of the page size)
+ */
+struct sgx_enclave_add_pages {
+   __u64   src;
+   __u64   offset;
+   __u64   length;
+   __u64   secinfo;
+   __u64   flags;
+   __u64   count;
+};
+
 #endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 7981c411b05a..595da21a368d 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -197,6 +197,295 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, 
void __user *arg)
return ret;
 }
 
+static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+unsigned long offset,
+u64 secinfo_flags)
+{
+   struct sgx_encl_page *encl_page;
+   unsigned long prot;
+
+   encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+   if (!encl_page)
+   return ERR_PTR(-ENOMEM);
+
+   encl_page->desc = encl->base + offset;
+   encl_page->encl = encl;
+
+   prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
+  _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
+  _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
+
+   /*
+* TCS pages must always RW set for CPU access while the SECINFO
+* permissions are *always* zero - the CPU ignores the user provided
+* values and silently overwrites them with zero permissions.
+*/
+   if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
+   prot |= PROT_READ | PROT_WRITE;
+
+   /* Calculate maximum of the VM flags for the page. */
+   encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+   return encl_page;
+}
+
+static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
+{
+   u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
+   u64 pt = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK;
+
+   if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS)
+   return -EINVAL;
+
+   if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R))
+   return -EINVAL;
+
+   /*
+* CPU will silently overwrite the permissions as zero, which means
+* that we need to validate it ourselves.
+*/
+   if (pt == SGX_SECINFO_TCS && perm)
+   return -EINVAL;
+
+   if (secinfo->flags & SGX_SECINFO_RESERVED_MASK)
+   return -EINVAL;
+
+   if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved)))
+   return -EINVAL;
+
+   return 0;
+}
+
+static int __sgx_encl_add_page(struct sgx_encl *encl,
+  struct sgx_encl_page *encl_page,
+  struct sgx_epc_page *epc_page,
+  struct sgx_secinfo *secinfo, unsigned long src)
+{
+   struct sgx_pageinfo pginfo;
+   struct vm_area_struct *vma;
+   struct page 

[PATCH v35 12/24] x86/sgx: Add SGX_IOC_ENCLAVE_CREATE

2020-07-06 Thread Jarkko Sakkinen
Add an ioctl that performs ENCLS[ECREATE], which creates SGX Enclave
Control Structure for the enclave. SECS contains attributes about the
enclave that are used by the hardware and cannot be directly accessed by
software, as SECS resides in the EPC.

One essential field in SECS is a field that stores the SHA256 of the
measured enclave pages. This field, MRENCLAVE, is initialized by the
ECREATE instruction and updated by every EADD and EEXTEND operation.
Finally, EINIT locks down the value.

Acked-by: Jethro Beekman 
Tested-by: Jethro Beekman 
Tested-by: Haitao Huang 
Tested-by: Chunyang Hui 
Tested-by: Jordan Hand 
Tested-by: Nathaniel McCallum 
Tested-by: Seth Moore 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Co-developed-by: Suresh Siddha 
Signed-off-by: Suresh Siddha 
Signed-off-by: Jarkko Sakkinen 
---
 .../userspace-api/ioctl/ioctl-number.rst  |   1 +
 arch/x86/include/uapi/asm/sgx.h   |  25 ++
 arch/x86/kernel/cpu/sgx/Makefile  |   1 +
 arch/x86/kernel/cpu/sgx/driver.c  |  12 +
 arch/x86/kernel/cpu/sgx/driver.h  |   1 +
 arch/x86/kernel/cpu/sgx/ioctl.c   | 226 ++
 6 files changed, 266 insertions(+)
 create mode 100644 arch/x86/include/uapi/asm/sgx.h
 create mode 100644 arch/x86/kernel/cpu/sgx/ioctl.c

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 59472cd6a11d..35f713e3a267 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -323,6 +323,7 @@ Code  Seq#Include File  
 Comments
  

 0xA3  90-9F  linux/dtlk.h
 0xA4  00-1F  uapi/linux/tee.hGeneric 
TEE subsystem
+0xA4  00-1F  uapi/asm/sgx.h  Intel SGX 
subsystem (a legit conflict as TEE and SGX do not co-exist)
 0xAA  00-3F  linux/uapi/linux/userfaultfd.h
 0xAB  00-1F  linux/nbd.h
 0xAC  00-1F  linux/raw.h
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
new file mode 100644
index ..3787d278e84b
--- /dev/null
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) WITH Linux-syscall-note 
*/
+/*
+ * Copyright(c) 2016-19 Intel Corporation.
+ */
+#ifndef _UAPI_ASM_X86_SGX_H
+#define _UAPI_ASM_X86_SGX_H
+
+#include 
+#include 
+
+#define SGX_MAGIC 0xA4
+
+#define SGX_IOC_ENCLAVE_CREATE \
+   _IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create)
+
+/**
+ * struct sgx_enclave_create - parameter structure for the
+ * %SGX_IOC_ENCLAVE_CREATE ioctl
+ * @src:   address for the SECS page data
+ */
+struct sgx_enclave_create  {
+   __u64   src;
+};
+
+#endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 3fc451120735..91d3dc784a29 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -1,4 +1,5 @@
 obj-y += \
driver.o \
encl.o \
+   ioctl.o \
main.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 682ec78230ac..20c3254675e9 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -119,10 +119,22 @@ static unsigned long sgx_get_unmapped_area(struct file 
*file,
return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
 }
 
+#ifdef CONFIG_COMPAT
+static long sgx_compat_ioctl(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+   return sgx_ioctl(filep, cmd, arg);
+}
+#endif
+
 static const struct file_operations sgx_encl_fops = {
.owner  = THIS_MODULE,
.open   = sgx_open,
.release= sgx_release,
+   .unlocked_ioctl = sgx_ioctl,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl   = sgx_compat_ioctl,
+#endif
.mmap   = sgx_mmap,
.get_unmapped_area  = sgx_get_unmapped_area,
 };
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
index f7ce40dedc91..e4063923115b 100644
--- a/arch/x86/kernel/cpu/sgx/driver.h
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "sgx.h"
 
 #define SGX_EINIT_SPIN_COUNT   20
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
new file mode 100644
index ..7981c411b05a
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-19 Intel Corporation.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 

[PATCH v35 10/24] mm: Add vm_ops->mprotect()

2020-07-06 Thread Jarkko Sakkinen
From: Sean Christopherson 

Add vm_ops()->mprotect() for additional constraints for a VMA.

Intel Software Guard eXtensions (SGX) will use this callback to add two
constraints:

1. Verify that the address range does not have holes: each page address
   must be filled with an enclave page.
2. Verify that VMA permissions won't surpass the permissions of any enclave
   page within the address range. Enclave cryptographically sealed
   permissions for each page address that set the upper limit for possible
   VMA permissions. Not respecting this can cause #GP's to be emitted.

Cc: linux...@kvack.org
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Acked-by: Jethro Beekman 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
---
 include/linux/mm.h |  2 ++
 mm/mprotect.c  | 13 ++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..fc0e3ef28873 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -542,6 +542,8 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
int (*split)(struct vm_area_struct * area, unsigned long addr);
int (*mremap)(struct vm_area_struct * area);
+   int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
+   unsigned long end, unsigned long prot);
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ce8b8a5eacbb..e23dfd8d18bc 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -603,13 +603,20 @@ static int do_mprotect_pkey(unsigned long start, size_t 
len,
goto out;
}
 
+   tmp = vma->vm_end;
+   if (tmp > end)
+   tmp = end;
+
error = security_file_mprotect(vma, reqprot, prot);
if (error)
goto out;
 
-   tmp = vma->vm_end;
-   if (tmp > end)
-   tmp = end;
+   if (vma->vm_ops && vma->vm_ops->mprotect) {
+   error = vma->vm_ops->mprotect(vma, nstart, tmp, prot);
+   if (error)
+   goto out;
+   }
+
error = mprotect_fixup(vma, , nstart, tmp, newflags);
if (error)
goto out;
-- 
2.25.1



[PATCH v35 11/24] x86/sgx: Add SGX enclave driver

2020-07-06 Thread Jarkko Sakkinen
Intel Software Guard eXtensions (SGX) is a set of CPU instructions that
can be used by applications to set aside private regions of code and
data. The code outside the SGX hosted software entity is disallowed to
access the memory inside the enclave enforced by the CPU. We call these
entities enclaves.

Add a driver that provides an ioctl API to construct and run enclaves.
Enclaves are constructed from pages residing in reserved physical memory
areas. The contents of these pages can only be accessed when they are
mapped as part of an enclave, by a hardware thread running inside the
enclave.

The starting state of an enclave consists of a fixed measured set of
pages that are copied to the EPC during the construction process by
using ENCLS leaf functions and Software Enclave Control Structure (SECS)
that defines the enclave properties.

Enclaves are constructed by using ENCLS leaf functions ECREATE, EADD and
EINIT. ECREATE initializes SECS, EADD copies pages from system memory to
the EPC and EINIT checks a given signed measurement and moves the enclave
into a state ready for execution.

An initialized enclave can only be accessed through special Thread Control
Structure (TCS) pages by using ENCLU (ring-3 only) leaf EENTER.  This leaf
function converts a thread into enclave mode and continues the execution in
the offset defined by the TCS provided to EENTER. An enclave is exited
through syscall, exception, interrupts or by explicitly calling another
ENCLU leaf EEXIT.

The mmap() permissions are capped by the contained enclave page
permissions. The mapped areas must also be opaque, i.e. each page address
must contain a page. This logic is implemented in sgx_encl_may_map().

Cc: linux-security-mod...@vger.kernel.org
Cc: linux...@kvack.org
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Acked-by: Jethro Beekman 
Tested-by: Jethro Beekman 
Tested-by: Haitao Huang 
Tested-by: Chunyang Hui 
Tested-by: Jordan Hand 
Tested-by: Nathaniel McCallum 
Tested-by: Seth Moore 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Co-developed-by: Suresh Siddha 
Signed-off-by: Suresh Siddha 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/Makefile |   2 +
 arch/x86/kernel/cpu/sgx/driver.c | 178 
 arch/x86/kernel/cpu/sgx/driver.h |  29 +++
 arch/x86/kernel/cpu/sgx/encl.c   | 335 +++
 arch/x86/kernel/cpu/sgx/encl.h   |  87 
 arch/x86/kernel/cpu/sgx/main.c   |  11 +
 6 files changed, 642 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/sgx/driver.c
 create mode 100644 arch/x86/kernel/cpu/sgx/driver.h
 create mode 100644 arch/x86/kernel/cpu/sgx/encl.c
 create mode 100644 arch/x86/kernel/cpu/sgx/encl.h

diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 79510ce01b3b..3fc451120735 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -1,2 +1,4 @@
 obj-y += \
+   driver.o \
+   encl.o \
main.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
new file mode 100644
index ..682ec78230ac
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-18 Intel Corporation.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "driver.h"
+#include "encl.h"
+
+MODULE_DESCRIPTION("Intel SGX Enclave Driver");
+MODULE_AUTHOR("Jarkko Sakkinen ");
+MODULE_LICENSE("Dual BSD/GPL");
+
+u64 sgx_encl_size_max_32;
+u64 sgx_encl_size_max_64;
+u32 sgx_misc_reserved_mask;
+u64 sgx_attributes_reserved_mask;
+u64 sgx_xfrm_reserved_mask = ~0x3;
+u32 sgx_xsave_size_tbl[64];
+
+static int sgx_open(struct inode *inode, struct file *file)
+{
+   struct sgx_encl *encl;
+   int ret;
+
+   encl = kzalloc(sizeof(*encl), GFP_KERNEL);
+   if (!encl)
+   return -ENOMEM;
+
+   atomic_set(>flags, 0);
+   kref_init(>refcount);
+   INIT_RADIX_TREE(>page_tree, GFP_KERNEL);
+   mutex_init(>lock);
+   INIT_LIST_HEAD(>mm_list);
+   spin_lock_init(>mm_lock);
+
+   ret = init_srcu_struct(>srcu);
+   if (ret) {
+   kfree(encl);
+   return ret;
+   }
+
+   file->private_data = encl;
+
+   return 0;
+}
+
+static int sgx_release(struct inode *inode, struct file *file)
+{
+   struct sgx_encl *encl = file->private_data;
+   struct sgx_encl_mm *encl_mm;
+
+   for ( ; ; )  {
+   spin_lock(>mm_lock);
+
+   if (list_empty(>mm_list)) {
+   encl_mm = NULL;
+   } else {
+   encl_mm = list_first_entry(>mm_list,
+  struct sgx_encl_mm, list);
+   list_del_rcu(_mm->list);
+   }
+
+   spin_unlock(>mm_lock);
+
+   /* The list is empty, ready to go. */
+   if (!encl_mm)
+

[PATCH v35 08/24] x86/sgx: Initialize metadata for Enclave Page Cache (EPC) sections

2020-07-06 Thread Jarkko Sakkinen
From: Sean Christopherson 

Enumerate Enclave Page Cache (EPC) sections via CPUID and add the data
structures necessary to track EPC pages so that they can be easily borrowed
for different uses.

Embed section index to the first eight bits of the EPC page descriptor.
Existing client hardware supports only a single section, while upcoming
server hardware will support at most eight sections. Thus, eight bits
should be enough for long term needs.

Acked-by: Jethro Beekman 
Signed-off-by: Sean Christopherson 
Co-developed-by: Serge Ayoun 
Signed-off-by: Serge Ayoun 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/Kconfig |  17 +++
 arch/x86/kernel/cpu/Makefile |   1 +
 arch/x86/kernel/cpu/sgx/Makefile |   2 +
 arch/x86/kernel/cpu/sgx/main.c   | 216 +++
 arch/x86/kernel/cpu/sgx/sgx.h|  52 
 5 files changed, 288 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/sgx/Makefile
 create mode 100644 arch/x86/kernel/cpu/sgx/main.c
 create mode 100644 arch/x86/kernel/cpu/sgx/sgx.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 883da0abf779..0dea7fdd7a00 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1926,6 +1926,23 @@ config X86_INTEL_TSX_MODE_AUTO
  side channel attacks- equals the tsx=auto command line parameter.
 endchoice
 
+config INTEL_SGX
+   bool "Intel SGX"
+   depends on X86_64 && CPU_SUP_INTEL
+   depends on CRYPTO=y
+   depends on CRYPTO_SHA256=y
+   select SRCU
+   select MMU_NOTIFIER
+   help
+ Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
+ that can be used by applications to set aside private regions of code
+ and data, referred to as enclaves. An enclave's private memory can
+ only be accessed by code running within the enclave. Accesses from
+ outside the enclave, including other enclaves, are disallowed by
+ hardware.
+
+ If unsure, say N.
+
 config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index dba6a83bc349..b00f801601f3 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_X86_MCE) += mce/
 obj-$(CONFIG_MTRR) += mtrr/
 obj-$(CONFIG_MICROCODE)+= microcode/
 obj-$(CONFIG_X86_CPU_RESCTRL)  += resctrl/
+obj-$(CONFIG_INTEL_SGX)+= sgx/
 
 obj-$(CONFIG_X86_LOCAL_APIC)   += perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
new file mode 100644
index ..79510ce01b3b
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -0,0 +1,2 @@
+obj-y += \
+   main.o
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
new file mode 100644
index ..c5831e3db14a
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-17 Intel Corporation.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "encls.h"
+
+struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+static int sgx_nr_epc_sections;
+static struct task_struct *ksgxswapd_tsk;
+
+static void sgx_sanitize_section(struct sgx_epc_section *section)
+{
+   struct sgx_epc_page *page;
+   LIST_HEAD(secs_list);
+   int ret;
+
+   while (!list_empty(>unsanitized_page_list)) {
+   if (kthread_should_stop())
+   return;
+
+   spin_lock(>lock);
+
+   page = list_first_entry(>unsanitized_page_list,
+   struct sgx_epc_page, list);
+
+   ret = __eremove(sgx_get_epc_addr(page));
+   if (!ret)
+   list_move(>list, >page_list);
+   else
+   list_move_tail(>list, _list);
+
+   spin_unlock(>lock);
+
+   cond_resched();
+   }
+}
+
+static int ksgxswapd(void *p)
+{
+   int i;
+
+   set_freezable();
+
+   /*
+* Reset all pages to uninitialized state. Pages could be in initialized
+* on kmemexec.
+*/
+   for (i = 0; i < sgx_nr_epc_sections; i++)
+   sgx_sanitize_section(_epc_sections[i]);
+
+   /*
+* 2nd round for the SECS pages as they cannot be removed when they
+* still hold child pages.
+*/
+   for (i = 0; i < sgx_nr_epc_sections; i++) {
+   sgx_sanitize_section(_epc_sections[i]);
+
+   /* Should never happen. */
+   if (!list_empty(_epc_sections[i].unsanitized_page_list))
+   WARN(1, "EPC section %d has unsanitized pages.\n", i);
+   }
+
+   return 0;
+}
+
+static bool __init sgx_page_reclaimer_init(void)
+{

  1   2   3   4   5   6   7   8   9   10   >