Re: [PATCH] mm/nvdimm: Use correct #defines instead of opencoding

2019-05-21 Thread Aneesh Kumar K.V

On 5/21/19 9:37 PM, Dan Williams wrote:

On Tue, May 21, 2019 at 2:51 AM Aneesh Kumar K.V
 wrote:







Something like the below (Not tested). I am not sure what we will init the 
page_size
for minor version < 3. This will mark the namespace disabled if the
PAGE_SIZE and sizeof(struct page) doesn't match with the values used
during namespace create.


Yes, this is on the right track.

I would special-case page_size == 0 as 4096 and page_struct_size == 0
as 64. If either of those is non-zero then the info-block version
needs to be revved and it needs to be crafted to make older kernels
fail to parse it.



page_size = SZ_4K implies we fail to enable namesepaces created on ppc64 
till now. We do work fine with page_size = PAGE_SIZE. It is a few error 
check and pfn_sb->npfns that got wrong values. We do reserve the correct 
space for the required pfns even when we recorded wrong pfn_sb->npfs.




There was an earlier attempt to implement minimum info-block versions here:

https://lore.kernel.org/lkml/155000670159.348031.17631616775326330606.st...@dwillia2-desk3.amr.corp.intel.com/

...but that was dropped in favor of the the "sub-section" patches.



Ok i will pick that too.

-aneesh



Re: [PATCH 10/10] docs: fix broken documentation links

2019-05-21 Thread Federico Vaga
On Monday, May 20, 2019 4:47:39 PM CEST Mauro Carvalho Chehab wrote:
> Mostly due to x86 and acpi conversion, several documentation
> links are still pointing to the old file. Fix them.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> ---
>  Documentation/acpi/dsd/leds.txt  |  2 +-
>  Documentation/admin-guide/kernel-parameters.rst  |  6 +++---
>  Documentation/admin-guide/kernel-parameters.txt  | 16 
>  Documentation/admin-guide/ras.rst|  2 +-
>  .../devicetree/bindings/net/fsl-enetc.txt|  7 +++
>  .../bindings/pci/amlogic,meson-pcie.txt  |  2 +-
>  .../bindings/regulator/qcom,rpmh-regulator.txt   |  2 +-
>  Documentation/devicetree/booting-without-of.txt  |  2 +-
>  Documentation/driver-api/gpio/board.rst  |  2 +-
>  Documentation/driver-api/gpio/consumer.rst   |  2 +-
>  .../firmware-guide/acpi/enumeration.rst  |  2 +-
>  .../firmware-guide/acpi/method-tracing.rst   |  2 +-
>  Documentation/i2c/instantiating-devices  |  2 +-
>  Documentation/sysctl/kernel.txt  |  4 ++--
>  .../translations/it_IT/process/4.Coding.rst  |  2 +-
>  .../translations/it_IT/process/howto.rst |  2 +-
>  .../it_IT/process/stable-kernel-rules.rst|  4 ++--
>  .../translations/zh_CN/process/4.Coding.rst  |  2 +-
>  Documentation/x86/x86_64/5level-paging.rst   |  2 +-
>  Documentation/x86/x86_64/boot-options.rst|  4 ++--
>  .../x86/x86_64/fake-numa-for-cpusets.rst |  2 +-
>  MAINTAINERS  |  6 +++---
>  arch/arm/Kconfig |  2 +-
>  arch/arm64/kernel/kexec_image.c  |  2 +-
>  arch/powerpc/Kconfig |  2 +-
>  arch/x86/Kconfig | 16 
>  arch/x86/Kconfig.debug   |  2 +-
>  arch/x86/boot/header.S   |  2 +-
>  arch/x86/entry/entry_64.S|  2 +-
>  arch/x86/include/asm/bootparam_utils.h   |  2 +-
>  arch/x86/include/asm/page_64_types.h |  2 +-
>  arch/x86/include/asm/pgtable_64_types.h  |  2 +-
>  arch/x86/kernel/cpu/microcode/amd.c  |  2 +-
>  arch/x86/kernel/kexec-bzimage64.c|  2 +-
>  arch/x86/kernel/pci-dma.c|  2 +-
>  arch/x86/mm/tlb.c|  2 +-
>  arch/x86/platform/pvh/enlighten.c|  2 +-
>  drivers/acpi/Kconfig | 10 +-
>  drivers/net/ethernet/faraday/ftgmac100.c |  2 +-
>  .../fieldbus/Documentation/fieldbus_dev.txt  |  4 ++--
>  drivers/vhost/vhost.c|  2 +-
>  include/acpi/acpi_drivers.h  |  2 +-
>  include/linux/fs_context.h   |  2 +-
>  include/linux/lsm_hooks.h|  2 +-
>  mm/Kconfig   |  2 +-
>  security/Kconfig |  2 +-
>  tools/include/linux/err.h|  2 +-
>  tools/objtool/Documentation/stack-validation.txt |  4 ++--
>  tools/testing/selftests/x86/protection_keys.c|  2 +-
>  49 files changed, 78 insertions(+), 79 deletions(-)
> 
> diff --git a/Documentation/acpi/dsd/leds.txt
> b/Documentation/acpi/dsd/leds.txt index 81a63af42ed2..cc58b1a574c5 100644
> --- a/Documentation/acpi/dsd/leds.txt
> +++ b/Documentation/acpi/dsd/leds.txt
> @@ -96,4 +96,4 @@ where
> 
> http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-da
> ta-extension-UUID-v1.1.pdf>, referenced 2019-02-21.
> 
> -[7] Documentation/acpi/dsd/data-node-reference.txt
> +[7] Documentation/firmware-guide/acpi/dsd/data-node-references.rst
> diff --git a/Documentation/admin-guide/kernel-parameters.rst
> b/Documentation/admin-guide/kernel-parameters.rst index
> 0124980dca2d..8d3273e32eb1 100644
> --- a/Documentation/admin-guide/kernel-parameters.rst
> +++ b/Documentation/admin-guide/kernel-parameters.rst
> @@ -167,7 +167,7 @@ parameter is applicable::
>   X86-32  X86-32, aka i386 architecture is enabled.
>   X86-64  X86-64 architecture is enabled.
>   More X86-64 boot options can be found in
> - Documentation/x86/x86_64/boot-options.txt 
.
> + Documentation/x86/x86_64/boot-options.rst.
>   X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
>   X86_UV  SGI UV support is enabled.
>   XEN Xen support is enabled
> @@ -181,10 +181,10 @@ In addition, the following text indicates that the
> option:: Parameters denoted with BOOT are actually interpreted by the boot
> loader, and have no meaning to the kernel directly.
>  Do not modify the syntax of boot loader parameters without extreme
> -need or coordination with .
> +need or coordination with .
> 
>  There are also arch-specific kernel-parameters not documented here.
> -See for example .
> +See 

Re: [PATCH 2/2] powerpc/perf: Fix mmcra corruption by bhrb_filter

2019-05-21 Thread Madhavan Srinivasan



On 11/05/19 8:12 AM, Ravi Bangoria wrote:

Consider a scenario where user creates two events:

   1st event:
 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
 attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
 fd = perf_event_open(attr, 0, 1, -1, 0);

   This sets cpuhw->bhrb_filter to 0 and returns valid fd.

   2nd event:
 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
 attr.branch_sample_type = PERF_SAMPLE_BRANCH_CALL;
 fd = perf_event_open(attr, 0, 1, -1, 0);

   It overrides cpuhw->bhrb_filter to -1 and returns with error.

Now if power_pmu_enable() gets called by any path other than
power_pmu_add(), ppmu->config_bhrb(-1) will set mmcra to -1.


Reviewed-by: Madhavan Srinivasan 



Signed-off-by: Ravi Bangoria 
---
  arch/powerpc/perf/core-book3s.c | 6 --
  arch/powerpc/perf/power8-pmu.c  | 3 +++
  arch/powerpc/perf/power9-pmu.c  | 3 +++
  3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0723002a396..8eb5dc5df62b 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1846,6 +1846,7 @@ static int power_pmu_event_init(struct perf_event *event)
int n;
int err;
struct cpu_hw_events *cpuhw;
+   u64 bhrb_filter;

if (!ppmu)
return -ENOENT;
@@ -1951,13 +1952,14 @@ static int power_pmu_event_init(struct perf_event 
*event)
err = power_check_constraints(cpuhw, events, cflags, n + 1);

if (has_branch_stack(event)) {
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);

-   if (cpuhw->bhrb_filter == -1) {
+   if (bhrb_filter == -1) {
put_cpu_var(cpu_hw_events);
return -EOPNOTSUPP;
}
+   cpuhw->bhrb_filter = bhrb_filter;
}

put_cpu_var(cpu_hw_events);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index d12a2db26353..d10feef93b6b 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -29,6 +29,7 @@ enum {
  #define   POWER8_MMCRA_IFM1   0x4000UL
  #define   POWER8_MMCRA_IFM2   0x8000UL
  #define   POWER8_MMCRA_IFM3   0xC000UL
+#definePOWER8_MMCRA_BHRB_MASK  0xC000UL

  /*
   * Raw event encoding for PowerISA v2.07 (Power8):
@@ -243,6 +244,8 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)

  static void power8_config_bhrb(u64 pmu_bhrb_filter)
  {
+   pmu_bhrb_filter &= POWER8_MMCRA_BHRB_MASK;
+
/* Enable BHRB filter in PMU */
mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
  }
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 030544e35959..f3987915cadc 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -92,6 +92,7 @@ enum {
  #define POWER9_MMCRA_IFM1 0x4000UL
  #define POWER9_MMCRA_IFM2 0x8000UL
  #define POWER9_MMCRA_IFM3 0xC000UL
+#define POWER9_MMCRA_BHRB_MASK 0xC000UL

  /* Nasty Power9 specific hack */
  #define PVR_POWER9_CUMULUS0x2000
@@ -300,6 +301,8 @@ static u64 power9_bhrb_filter_map(u64 branch_sample_type)

  static void power9_config_bhrb(u64 pmu_bhrb_filter)
  {
+   pmu_bhrb_filter &= POWER9_MMCRA_BHRB_MASK;
+
/* Enable BHRB filter in PMU */
mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
  }




Re: [PATCH] misc: remove redundant 'default n' from Kconfig-s

2019-05-21 Thread Frederic Barrat




Le 20/05/2019 à 16:10, Bartlomiej Zolnierkiewicz a écrit :

'default n' is the default value for any bool or tristate Kconfig
setting so there is no need to write it explicitly.

Also since commit f467c5640c29 ("kconfig: only write '# CONFIG_FOO
is not set' for visible symbols") the Kconfig behavior is the same
regardless of 'default n' being present or not:

 ...
 One side effect of (and the main motivation for) this change is making
 the following two definitions behave exactly the same:
 
 config FOO

 bool
 
 config FOO

 bool
 default n
 
 With this change, neither of these will generate a

 '# CONFIG_FOO is not set' line (assuming FOO isn't selected/implied).
 That might make it clearer to people that a bare 'default n' is
 redundant.
 ...

Signed-off-by: Bartlomiej Zolnierkiewicz 
---


for cxl and ocxl:
Acked-by: Frederic Barrat 



  drivers/misc/Kconfig  |   10 --
  drivers/misc/altera-stapl/Kconfig |1 -
  drivers/misc/c2port/Kconfig   |2 --
  drivers/misc/cb710/Kconfig|1 -
  drivers/misc/cxl/Kconfig  |3 ---
  drivers/misc/echo/Kconfig |1 -
  drivers/misc/genwqe/Kconfig   |1 -
  drivers/misc/lis3lv02d/Kconfig|2 --
  drivers/misc/ocxl/Kconfig |1 -
  9 files changed, 22 deletions(-)

Index: b/drivers/misc/Kconfig
===
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -8,7 +8,6 @@ config SENSORS_LIS3LV02D
tristate
depends on INPUT
select INPUT_POLLDEV
-   default n
  
  config AD525X_DPOT

tristate "Analog Devices Digital Potentiometers"
@@ -61,7 +60,6 @@ config ATMEL_TCLIB
  
  config DUMMY_IRQ

tristate "Dummy IRQ handler"
-   default n
---help---
  This module accepts a single 'irq' parameter, which it should 
register for.
  The sole purpose of this module is to help with debugging of systems 
on
@@ -117,7 +115,6 @@ config PHANTOM
  config INTEL_MID_PTI
tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST)
-   default n
help
  The PTI (Parallel Trace Interface) driver directs
  trace data routed from various parts in the system out
@@ -193,7 +190,6 @@ config ATMEL_SSC
  
  config ENCLOSURE_SERVICES

tristate "Enclosure Services"
-   default n
help
  Provides support for intelligent enclosures (bays which
  contain storage devices).  You also need either a host
@@ -217,7 +213,6 @@ config SGI_XP
  config CS5535_MFGPT
tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) 
support"
depends on MFD_CS5535
-   default n
help
  This driver provides access to MFGPT functionality for other
  drivers that need timers.  MFGPTs are available in the CS5535 and
@@ -250,7 +245,6 @@ config CS5535_CLOCK_EVENT_SRC
  config HP_ILO
tristate "Channel interface driver for the HP iLO processor"
depends on PCI
-   default n
help
  The channel interface driver allows applications to communicate
  with iLO management processors present on HP ProLiant servers.
@@ -285,7 +279,6 @@ config QCOM_FASTRPC
  config SGI_GRU
tristate "SGI GRU driver"
depends on X86_UV && SMP
-   default n
select MMU_NOTIFIER
---help---
The GRU is a hardware resource located in the system chipset. The GRU
@@ -300,7 +293,6 @@ config SGI_GRU
  config SGI_GRU_DEBUG
bool  "SGI GRU driver debug"
depends on SGI_GRU
-   default n
---help---
This option enables additional debugging code for the SGI GRU driver.
If you are unsure, say N.
@@ -358,7 +350,6 @@ config SENSORS_BH1770
  config SENSORS_APDS990X
 tristate "APDS990X combined als and proximity sensors"
 depends on I2C
-default n
 ---help---
   Say Y here if you want to build a driver for Avago APDS990x
   combined ambient light and proximity sensor chip.
@@ -386,7 +377,6 @@ config DS1682
  config SPEAR13XX_PCIE_GADGET
bool "PCIe gadget support for SPEAr13XX platform"
depends on ARCH_SPEAR13XX && BROKEN
-   default n
help
 This option enables gadget support for PCIe controller. If
 board file defines any controller as PCIe endpoint then a sysfs
Index: b/drivers/misc/altera-stapl/Kconfig
===
--- a/drivers/misc/altera-stapl/Kconfig
+++ b/drivers/misc/altera-stapl/Kconfig
@@ -4,6 +4,5 @@ comment "Altera FPGA firmware download m
  config ALTERA_STAPL
tristate "Altera FPGA firmware download module"
depends on I2C
-   default n
help
  

Re: [RFC PATCH 02/12] powerpc: Add support for adding an ESM blob to the zImage wrapper

2019-05-21 Thread Paul Mackerras
On Tue, May 21, 2019 at 07:13:26AM +0200, Christoph Hellwig wrote:
> On Tue, May 21, 2019 at 01:49:02AM -0300, Thiago Jung Bauermann wrote:
> > From: Benjamin Herrenschmidt 
> > 
> > For secure VMs, the signing tool will create a ticket called the "ESM blob"
> > for the Enter Secure Mode ultravisor call with the signatures of the kernel
> > and initrd among other things.
> > 
> > This adds support to the wrapper script for adding that blob via the "-e"
> > option to the zImage.pseries.
> > 
> > It also adds code to the zImage wrapper itself to retrieve and if necessary
> > relocate the blob, and pass its address to Linux via the device-tree, to be
> > later consumed by prom_init.
> 
> Where does the "BLOB" come from?  How is it licensed and how can we
> satisfy the GPL with it?

The blob is data, not code, and it will be created by a tool that will
be open source.  My understanding is that most of it will be encrypted
with a session key that is encrypted with the secret key of the
ultravisor.  Ram Pai's KVM Forum talk last year explained how this
works.

Paul.


[BISECTED] kexec regression on PowerBook G4

2019-05-21 Thread Aaro Koskinen
Hi,

I was trying to upgrade from v5.0 -> v5.1 on PowerBook G4, but when trying
to kexec a kernel the system gets stuck (no errors seen on the console).

Bisected to: 93c4a162b014 ("powerpc/6xx: Store PGDIR physical address
in a SPRG"). This commit doesn't revert cleanly anymore but I tested
that the one before works OK.

With current Linus HEAD (9c7db5004280), it gets a bit further but still
doesn't work: now I get an error on the console after kexec "Starting
new kernel! ... Bye!":

kernel tried to execute exec-protected page (...) - exploit attempt?

A.


Applied "ASoC: fsl_asrc: Unify the supported input and output rate" to the asoc tree

2019-05-21 Thread Mark Brown
The patch

   ASoC: fsl_asrc: Unify the supported input and output rate

has been applied to the asoc tree at

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-5.3

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From d281bf5d924a0284f0dac1f471e1d8328b3a92ca Mon Sep 17 00:00:00 2001
From: "S.j. Wang" 
Date: Wed, 15 May 2019 06:42:26 +
Subject: [PATCH] ASoC: fsl_asrc: Unify the supported input and output rate

Unify the supported input and output rate, add the
12kHz/24kHz/128kHz to the support list

Signed-off-by: Shengjiu Wang 
Acked-by: Nicolin Chen 
Signed-off-by: Mark Brown 
---
 sound/soc/fsl/fsl_asrc.c | 32 +++-
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index a8d6710f2541..cbbf6257f08a 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -27,13 +27,14 @@
dev_dbg(_priv->pdev->dev, "Pair %c: " fmt, 'A' + index, 
##__VA_ARGS__)
 
 /* Corresponding to process_option */
-static int supported_input_rate[] = {
-   5512, 8000, 11025, 16000, 22050, 32000, 44100, 48000, 64000, 88200,
-   96000, 176400, 192000,
+static unsigned int supported_asrc_rate[] = {
+   5512, 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000,
+   64000, 88200, 96000, 128000, 176400, 192000,
 };
 
-static int supported_asrc_rate[] = {
-   8000, 11025, 16000, 22050, 32000, 44100, 48000, 64000, 88200, 96000, 
176400, 192000,
+static struct snd_pcm_hw_constraint_list fsl_asrc_rate_constraints = {
+   .count = ARRAY_SIZE(supported_asrc_rate),
+   .list = supported_asrc_rate,
 };
 
 /**
@@ -293,11 +294,11 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair 
*pair)
ideal = config->inclk == INCLK_NONE;
 
/* Validate input and output sample rates */
-   for (in = 0; in < ARRAY_SIZE(supported_input_rate); in++)
-   if (inrate == supported_input_rate[in])
+   for (in = 0; in < ARRAY_SIZE(supported_asrc_rate); in++)
+   if (inrate == supported_asrc_rate[in])
break;
 
-   if (in == ARRAY_SIZE(supported_input_rate)) {
+   if (in == ARRAY_SIZE(supported_asrc_rate)) {
pair_err("unsupported input sample rate: %dHz\n", inrate);
return -EINVAL;
}
@@ -311,7 +312,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair)
return -EINVAL;
}
 
-   if ((outrate >= 8000 && outrate <= 3) &&
+   if ((outrate >= 5512 && outrate <= 3) &&
(outrate > 24 * inrate || inrate > 8 * outrate)) {
pair_err("exceed supported ratio range [1/24, 8] for \
inrate/outrate: %d/%d\n", inrate, outrate);
@@ -486,7 +487,9 @@ static int fsl_asrc_dai_startup(struct snd_pcm_substream 
*substream,
snd_pcm_hw_constraint_step(substream->runtime, 0,
   SNDRV_PCM_HW_PARAM_CHANNELS, 2);
 
-   return 0;
+
+   return snd_pcm_hw_constraint_list(substream->runtime, 0,
+   SNDRV_PCM_HW_PARAM_RATE, _asrc_rate_constraints);
 }
 
 static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream,
@@ -599,7 +602,6 @@ static int fsl_asrc_dai_probe(struct snd_soc_dai *dai)
return 0;
 }
 
-#define FSL_ASRC_RATES  SNDRV_PCM_RATE_8000_192000
 #define FSL_ASRC_FORMATS   (SNDRV_PCM_FMTBIT_S24_LE | \
 SNDRV_PCM_FMTBIT_S16_LE | \
 SNDRV_PCM_FMTBIT_S20_3LE)
@@ -610,14 +612,18 @@ static struct snd_soc_dai_driver fsl_asrc_dai = {
.stream_name = "ASRC-Playback",
.channels_min = 1,
.channels_max = 10,
-   .rates = FSL_ASRC_RATES,
+   .rate_min = 5512,
+   .rate_max = 192000,
+   .rates = SNDRV_PCM_RATE_KNOT,
.formats = FSL_ASRC_FORMATS,
},
.capture = {
.stream_name = "ASRC-Capture",
.channels_min = 1,
.channels_max = 10,
-   .rates = FSL_ASRC_RATES,
+   .rate_min = 5512,
+   .rate_max = 192000,
+   .rates 

Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Linus Torvalds
On Tue, May 21, 2019 at 9:41 AM Christian Brauner  wrote:
>
> Yeah, you mentioned this before. I do like being able to specify an
> upper bound to have the ability to place fds strategically after said
> upper bound.

I suspect that's the case.

And if somebody really wants to just close everything and uses a large
upper bound, we can - if we really want to - just compare the upper
bound to the file table size, and do an optimized case for that. We do
that upper bound comparison anyway to limit the size of the walk, so
*if* it's a big deal, that case could then do the whole "shrink
fdtable" case too.

But I don't believe it's worth optimizing for unless somebody really
has a load where that is shown to be a big deal.   Just do the silly
and simple loop, and add a cond_resched() in the loop, like
close_files() does for the "we have a _lot_ of files open" case.

   Linus


Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Matthew Wilcox
On Tue, May 21, 2019 at 08:20:09PM +0100, Al Viro wrote:
> On Tue, May 21, 2019 at 05:30:27PM +0100, David Howells wrote:
> 
> > If we can live with close_from(int first) rather than close_range(), then 
> > this
> > can perhaps be done a lot more efficiently by:
> > 
> > new = alloc_fdtable(first);
> > spin_lock(>file_lock);
> > old = files_fdtable(files);
> > copy_fds(new, old, 0, first - 1);
> > rcu_assign_pointer(files->fdt, new);
> > spin_unlock(>file_lock);
> > clear_fds(old, 0, first - 1);
> > close_fdt_from(old, first);
> > kfree_rcu(old);
> 
> I really hate to think how that would interact with POSIX locks...

POSIX locks store current->files in fl_owner; David's resizing the
underlying files->fdt, just like growing from 64 to 256 fds.


Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Al Viro
On Tue, May 21, 2019 at 05:30:27PM +0100, David Howells wrote:

> If we can live with close_from(int first) rather than close_range(), then this
> can perhaps be done a lot more efficiently by:
> 
>   new = alloc_fdtable(first);
>   spin_lock(>file_lock);
>   old = files_fdtable(files);
>   copy_fds(new, old, 0, first - 1);
>   rcu_assign_pointer(files->fdt, new);
>   spin_unlock(>file_lock);
>   clear_fds(old, 0, first - 1);
>   close_fdt_from(old, first);
>   kfree_rcu(old);

I really hate to think how that would interact with POSIX locks...


Re: [PATCH v1 02/15] crypto: talitos - rename alternative AEAD algos.

2019-05-21 Thread Joe Perches
On Tue, 2019-05-21 at 17:54 +, Christophe Leroy wrote:
> Hi Joe & Andy
[]
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
[]
> > @@ -2356,7 +2356,7 @@ static struct talitos_alg_template driver_algs[] = {
> > .base = {
> > .cra_name = "authenc(hmac(sha1),cbc(aes))",
> > .cra_driver_name = "authenc-hmac-sha1-"
> > -  "cbc-aes-talitos",
> > +  "cbc-aes-talitos-hsna",
> 
> checkpatch reports the following warning on the above:
> 
> WARNING: quoted string split across lines
> #27: FILE: drivers/crypto/talitos.c:2359:
>   .cra_driver_name = "authenc-hmac-sha1-"
> +"cbc-aes-talitos-hsna",
> 
> 
> But when I fixes the patch as follows, I get another warning:
> 
> @@ -2355,8 +2355,7 @@ static struct talitos_alg_template driver_algs[] = {
>   .alg.aead = {
>   .base = {
>   .cra_name = "authenc(hmac(sha1),cbc(aes))",
> - .cra_driver_name = "authenc-hmac-sha1-"
> -"cbc-aes-talitos",
> + .cra_driver_name = 
> "authenc-hmac-sha1-cbc-aes-talitos-hsna",
>   .cra_blocksize = AES_BLOCK_SIZE,
>   .cra_flags = CRYPTO_ALG_ASYNC,
>   },
> 
> 
> 
> WARNING: line over 80 characters
> #28: FILE: drivers/crypto/talitos.c:2358:
> + .cra_driver_name = 
> "authenc-hmac-sha1-cbc-aes-talitos-hsna",
> 
> 
> So, how should this be fixed ?

For at least now, by ignoring this checkpatch message.

It's a brainless script.  You are not brainless.



Re: [PATCH v1 02/15] crypto: talitos - rename alternative AEAD algos.

2019-05-21 Thread Christophe Leroy

Hi Joe & Andy

On 05/21/2019 01:34 PM, Christophe Leroy wrote:

The talitos driver has two ways to perform AEAD depending on the
HW capability. Some HW support both. It is needed to give them
different names to distingish which one it is for instance when
a test fails.

Signed-off-by: Christophe Leroy 
Fixes: 7405c8d7ff97 ("crypto: talitos - templates for AEAD using 
HMAC_SNOOP_NO_AFEU")
Cc: sta...@vger.kernel.org
---
  drivers/crypto/talitos.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index f443cbe7da80..6f8bc6467706 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2356,7 +2356,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha1),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha1-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",


checkpatch reports the following warning on the above:

WARNING: quoted string split across lines
#27: FILE: drivers/crypto/talitos.c:2359:
.cra_driver_name = "authenc-hmac-sha1-"
+  "cbc-aes-talitos-hsna",


But when I fixes the patch as follows, I get another warning:

@@ -2355,8 +2355,7 @@ static struct talitos_alg_template driver_algs[] = {
.alg.aead = {
.base = {
.cra_name = "authenc(hmac(sha1),cbc(aes))",
-   .cra_driver_name = "authenc-hmac-sha1-"
-  "cbc-aes-talitos",
+   .cra_driver_name = 
"authenc-hmac-sha1-cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},



WARNING: line over 80 characters
#28: FILE: drivers/crypto/talitos.c:2358:
+   .cra_driver_name = 
"authenc-hmac-sha1-cbc-aes-talitos-hsna",


So, how should this be fixed ?

Thanks
Christophe


.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2401,7 +2401,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha1),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha1-"
-  "cbc-3des-talitos",
+  "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2444,7 +2444,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha224),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha224-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2489,7 +2489,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha224),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha224-"
-  "cbc-3des-talitos",
+  "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2532,7 +2532,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha256),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha256-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2577,7 +2577,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha256),"
"cbc(des3_ede))",
.cra_driver_name = 

Re: [PATCH v2 2/5] powerpc: Fix vDSO clock_getres()

2019-05-21 Thread Christophe Leroy




Le 21/05/2019 à 18:08, Vincenzo Frascino a écrit :

Hi Christophe,

I did not see this patch in 5.2-rc1 and I was wondering if there is anything I
can do to help with upstraming it.


As far as I can see you series still has status 'new' in patchwork so I 
guess Michael didn't have time to look at it 
(https://patchwork.ozlabs.org/patch/1086410/)




Please let me know.



You series involves several arches, how do you plan to handle it ? Do 
you expect each arch maintainer to take each patch independentely, or do 
you expect acks from arch maintainers in order to merge the entire 
series in a given tree (which one ?)


Christophe




Thanks,
Vincenzo

On 23/04/2019 17:33, Christophe Leroy wrote:



Le 16/04/2019 à 18:14, Vincenzo Frascino a écrit :

clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:
  sec = 0;
  ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.

Fix the powerpc vdso implementation of clock_getres keeping a copy of
hrtimer_resolution in vdso data and using that directly.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Signed-off-by: Vincenzo Frascino 


Reviewed-by: Christophe Leroy 


---
   arch/powerpc/include/asm/vdso_datapage.h  | 2 ++
   arch/powerpc/kernel/asm-offsets.c | 2 +-
   arch/powerpc/kernel/time.c| 1 +
   arch/powerpc/kernel/vdso32/gettimeofday.S | 7 +--
   arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +--
   5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso_datapage.h 
b/arch/powerpc/include/asm/vdso_datapage.h
index bbc06bd72b1f..4333b9a473dc 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -86,6 +86,7 @@ struct vdso_data {
__s32 wtom_clock_nsec;  /* Wall to monotonic clock nsec 
*/
__s64 wtom_clock_sec;   /* Wall to monotonic clock sec 
*/
struct timespec stamp_xtime;/* xtime as at tb_orig_stamp */
+   __u32 hrtimer_res;  /* hrtimer resolution */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
   };
@@ -107,6 +108,7 @@ struct vdso_data {
__s32 wtom_clock_nsec;
struct timespec stamp_xtime;/* xtime as at tb_orig_stamp */
__u32 stamp_sec_fraction;   /* fractional seconds of stamp_xtime */
+   __u32 hrtimer_res;  /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 dcache_block_size;/* L1 d-cache block size */
__u32 icache_block_size;/* L1 i-cache block size */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 86a61e5f8285..52e4b98a8492 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -383,6 +383,7 @@ int main(void)
OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+   OFFSET(CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
@@ -413,7 +414,6 @@ int main(void)
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
-   DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
   
   #ifdef CONFIG_BUG

DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc0503ef9c9c..62c04a6746d8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -955,6 +955,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
vdso_data->stamp_xtime = xt;
vdso_data->stamp_sec_fraction = frac_sec;
+   vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++(vdso_data->tb_update_count);
   }
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S 
b/arch/powerpc/kernel/vdso32/gettimeofday.S
index afd516b572f8..2b5f9e83c610 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -160,12 +160,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
crorcr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
   
+	mflr	r12

+  .cfi_register lr,r12
+   bl  __get_datapage@local
+   lwz r5,CLOCK_REALTIME_RES(r3)
+   mtlrr12
li  r3,0
cmpli   

Re: [PATCH v2 2/5] powerpc: Fix vDSO clock_getres()

2019-05-21 Thread Christophe Leroy




Le 21/05/2019 à 18:46, Christophe Leroy a écrit :



Le 21/05/2019 à 18:08, Vincenzo Frascino a écrit :

Hi Christophe,

I did not see this patch in 5.2-rc1 and I was wondering if there is 
anything I

can do to help with upstraming it.


As far as I can see you series still has status 'new' in patchwork so I 
guess Michael didn't have time to look at it 
(https://patchwork.ozlabs.org/patch/1086410/)


Maybe you should also provide a Fixes: tag. What about:

Fixes: a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support 
to 32 bits kernel")

Cc: sta...@vger.kernel.org

Christophe





Please let me know.



You series involves several arches, how do you plan to handle it ? Do 
you expect each arch maintainer to take each patch independentely, or do 
you expect acks from arch maintainers in order to merge the entire 
series in a given tree (which one ?)


Christophe




Thanks,
Vincenzo

On 23/04/2019 17:33, Christophe Leroy wrote:



Le 16/04/2019 à 18:14, Vincenzo Frascino a écrit :

clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:
  sec = 0;
  ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.

Fix the powerpc vdso implementation of clock_getres keeping a copy of
hrtimer_resolution in vdso data and using that directly.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Signed-off-by: Vincenzo Frascino 


Reviewed-by: Christophe Leroy 


---
   arch/powerpc/include/asm/vdso_datapage.h  | 2 ++
   arch/powerpc/kernel/asm-offsets.c | 2 +-
   arch/powerpc/kernel/time.c    | 1 +
   arch/powerpc/kernel/vdso32/gettimeofday.S | 7 +--
   arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +--
   5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso_datapage.h 
b/arch/powerpc/include/asm/vdso_datapage.h

index bbc06bd72b1f..4333b9a473dc 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -86,6 +86,7 @@ struct vdso_data {
   __s32 wtom_clock_nsec;    /* Wall to monotonic clock 
nsec */
   __s64 wtom_clock_sec;    /* Wall to monotonic clock 
sec */
   struct timespec stamp_xtime;    /* xtime as at 
tb_orig_stamp */

+    __u32 hrtimer_res;    /* hrtimer resolution */
  __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of 
syscalls  */

  __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
   };
@@ -107,6 +108,7 @@ struct vdso_data {
   __s32 wtom_clock_nsec;
   struct timespec stamp_xtime;    /* xtime as at tb_orig_stamp */
   __u32 stamp_sec_fraction;    /* fractional seconds of 
stamp_xtime */

+    __u32 hrtimer_res;    /* hrtimer resolution */
  __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
   __u32 dcache_block_size;    /* L1 d-cache block size */
   __u32 icache_block_size;    /* L1 i-cache block size */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c

index 86a61e5f8285..52e4b98a8492 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -383,6 +383,7 @@ int main(void)
   OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
   OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
   OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+    OFFSET(CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
   OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
   OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
   OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
@@ -413,7 +414,6 @@ int main(void)
   DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
   DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
   DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
-    DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
   #ifdef CONFIG_BUG
   DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc0503ef9c9c..62c04a6746d8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -955,6 +955,7 @@ void update_vsyscall(struct timekeeper *tk)
   vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
   vdso_data->stamp_xtime = xt;
   vdso_data->stamp_sec_fraction = frac_sec;
+    vdso_data->hrtimer_res = hrtimer_resolution;
   smp_wmb();
   ++(vdso_data->tb_update_count);
   }
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S 
b/arch/powerpc/kernel/vdso32/gettimeofday.S

index afd516b572f8..2b5f9e83c610 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -160,12 +160,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
   cror    cr0*4+eq,cr0*4+eq,cr1*4+eq
   bne    cr0,99f
+    

Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Christian Brauner
On Tue, May 21, 2019 at 04:00:06PM +0100, Al Viro wrote:
> On Tue, May 21, 2019 at 01:34:47PM +0200, Christian Brauner wrote:
> 
> > This adds the close_range() syscall. It allows to efficiently close a range
> > of file descriptors up to all file descriptors of a calling task.
> > 
> > The syscall came up in a recent discussion around the new mount API and
> > making new file descriptor types cloexec by default. During this
> > discussion, Al suggested the close_range() syscall (cf. [1]). Note, a
> > syscall in this manner has been requested by various people over time.
> > 
> > First, it helps to close all file descriptors of an exec()ing task. This
> > can be done safely via (quoting Al's example from [1] verbatim):
> > 
> > /* that exec is sensitive */
> > unshare(CLONE_FILES);
> > /* we don't want anything past stderr here */
> > close_range(3, ~0U);
> > execve();
> > 
> > The code snippet above is one way of working around the problem that file
> > descriptors are not cloexec by default. This is aggravated by the fact that
> > we can't just switch them over without massively regressing userspace. For
> > a whole class of programs having an in-kernel method of closing all file
> > descriptors is very helpful (e.g. demons, service managers, programming
> > language standard libraries, container managers etc.).
> > (Please note, unshare(CLONE_FILES) should only be needed if the calling
> >  task is multi-threaded and shares the file descriptor table with another
> >  thread in which case two threads could race with one thread allocating
> >  file descriptors and the other one closing them via close_range(). For the
> >  general case close_range() before the execve() is sufficient.)
> > 
> > Second, it allows userspace to avoid implementing closing all file
> > descriptors by parsing through /proc//fd/* and calling close() on each
> > file descriptor. From looking at various large(ish) userspace code bases
> > this or similar patterns are very common in:
> > - service managers (cf. [4])
> > - libcs (cf. [6])
> > - container runtimes (cf. [5])
> > - programming language runtimes/standard libraries
> >   - Python (cf. [2])
> >   - Rust (cf. [7], [8])
> > As Dmitry pointed out there's even a long-standing glibc bug about missing
> > kernel support for this task (cf. [3]).
> > In addition, the syscall will also work for tasks that do not have procfs
> > mounted and on kernels that do not have procfs support compiled in. In such
> > situations the only way to make sure that all file descriptors are closed
> > is to call close() on each file descriptor up to UINT_MAX or RLIMIT_NOFILE,
> > OPEN_MAX trickery (cf. comment [8] on Rust).
> > 
> > The performance is striking. For good measure, comparing the following
> > simple close_all_fds() userspace implementation that is essentially just
> > glibc's version in [6]:
> > 
> > static int close_all_fds(void)
> > {
> > DIR *dir;
> > struct dirent *direntp;
> > 
> > dir = opendir("/proc/self/fd");
> > if (!dir)
> > return -1;
> > 
> > while ((direntp = readdir(dir))) {
> > int fd;
> > if (strcmp(direntp->d_name, ".") == 0)
> > continue;
> > if (strcmp(direntp->d_name, "..") == 0)
> > continue;
> > fd = atoi(direntp->d_name);
> > if (fd == 0 || fd == 1 || fd == 2)
> > continue;
> > close(fd);
> > }
> > 
> > closedir(dir); /* cannot fail */
> > return 0;
> > }
> > 
> > to close_range() yields:
> > 1. closing 4 open files:
> >- close_all_fds(): ~280 us
> >- close_range():~24 us
> > 
> > 2. closing 1000 open files:
> >- close_all_fds(): ~5000 us
> >- close_range():   ~800 us
> > 
> > close_range() is designed to allow for some flexibility. Specifically, it
> > does not simply always close all open file descriptors of a task. Instead,
> > callers can specify an upper bound.
> > This is e.g. useful for scenarios where specific file descriptors are
> > created with well-known numbers that are supposed to be excluded from
> > getting closed.
> > For extra paranoia close_range() comes with a flags argument. This can e.g.
> > be used to implement extension. Once can imagine userspace wanting to stop
> > at the first error instead of ignoring errors under certain circumstances.
> > There might be other valid ideas in the future. In any case, a flag
> > argument doesn't hurt and keeps us on the safe side.
> > 
> > >From an implementation side this is kept rather dumb. It saw some input
> > from David and Jann but all nonsense is obviously my own!
> > - Errors to close file descriptors are currently ignored. (Could be changed
> >   by setting a flag in the future if needed.)
> > - __close_range() is a rather simplistic wrapper around __close_fd().
> >   My reasoning behind 

Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Christian Brauner
On Tue, May 21, 2019 at 05:30:27PM +0100, David Howells wrote:
> Al Viro  wrote:
> 
> > Umm...  That's going to be very painful if you dup2() something to MAX_INT 
> > and
> > then run that; roughly 2G iterations of bouncing ->file_lock up and down,
> > without anything that would yield CPU in process.
> > 
> > If anything, I would suggest something like
> > 
> > fd = *start_fd;
> > grab the lock
> > fdt = files_fdtable(files);
> > more:
> > look for the next eviction candidate in ->open_fds, starting at fd
> > if there's none up to max_fd
> > drop the lock
> > return NULL
> > *start_fd = fd + 1;
> > if the fscker is really opened and not just reserved
> > rcu_assign_pointer(fdt->fd[fd], NULL);
> > __put_unused_fd(files, fd);
> > drop the lock
> > return the file we'd got
> > if (unlikely(need_resched()))
> > drop lock
> > cond_resched();
> > grab lock
> > fdt = files_fdtable(files);
> > goto more;
> > 
> > with the main loop being basically
> > while ((file = pick_next(files, _fd, max_fd)) != NULL)
> > filp_close(file, files);
> 
> If we can live with close_from(int first) rather than close_range(), then this
> can perhaps be done a lot more efficiently by:

Yeah, you mentioned this before. I do like being able to specify an
upper bound to have the ability to place fds strategically after said
upper bound.
I have used this quite a few times where I know that given task may have
inherited up to m fds and I want to inherit a specific pipe who's fd I
know. Then I'd dup2(pipe_fd, ) and then close all
other fds. Is that too much of a corner case?

Christian


Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread David Howells
Al Viro  wrote:

> Umm...  That's going to be very painful if you dup2() something to MAX_INT and
> then run that; roughly 2G iterations of bouncing ->file_lock up and down,
> without anything that would yield CPU in process.
> 
> If anything, I would suggest something like
> 
>   fd = *start_fd;
>   grab the lock
> fdt = files_fdtable(files);
> more:
>   look for the next eviction candidate in ->open_fds, starting at fd
>   if there's none up to max_fd
>   drop the lock
>   return NULL
>   *start_fd = fd + 1;
>   if the fscker is really opened and not just reserved
>   rcu_assign_pointer(fdt->fd[fd], NULL);
>   __put_unused_fd(files, fd);
>   drop the lock
>   return the file we'd got
>   if (unlikely(need_resched()))
>   drop lock
>   cond_resched();
>   grab lock
>   fdt = files_fdtable(files);
>   goto more;
> 
> with the main loop being basically
>   while ((file = pick_next(files, _fd, max_fd)) != NULL)
>   filp_close(file, files);

If we can live with close_from(int first) rather than close_range(), then this
can perhaps be done a lot more efficiently by:

new = alloc_fdtable(first);
spin_lock(>file_lock);
old = files_fdtable(files);
copy_fds(new, old, 0, first - 1);
rcu_assign_pointer(files->fdt, new);
spin_unlock(>file_lock);
clear_fds(old, 0, first - 1);
close_fdt_from(old, first);
kfree_rcu(old);

David


Extra selftests failure on Talitos SEC1 hash algs - how can I identify the issue ?

2019-05-21 Thread Christophe Leroy

Hello,

With the new selftests I get the following failures with Talitos on SEC1 
(mpc885).


I don't get those failures with Talitos on SEC2 (mpc8321E), but the 
driver is slightly different for SEC1 as it doesn't support S/G operations.


How can I identify what problem to look for based on the below logs ? (I 
have modified the testmgr to return 0 instead of -EINVAL on wrong hash 
result in order to have the tests run up to the end).


There are four successive startups.

Thanks
Christophe

[   44.001833] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=2715 ksize=0", cfg="random: use_finup 
src_divs=[2.19%@+14, 70.54%@+8554, 27.27%@+15]"
[   44.108062] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=860 ksize=0", cfg="random: use_final nosimd 
src_divs=[34.53%@+6281, 32.30%@+16375, 33.17%@+9323] 
dst_divs=[72.16%@+13710, 18.5%@+14, 9.79%@+16338]"
[   44.192174] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=19109 ksize=0", cfg="random: inplace may_sleep 
use_final src_divs=[70.8%@+31, 6.44%@+5996, 23.48%@+16307]"
[   44.227768] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=2713 ksize=0", cfg="random: inplace may_sleep 
use_finup src_divs=[6.7%@+1661, 90.5%@+12507, 
1.2%@alignmask+16306, 2.86%@+16309] iv_offset=49"
[   44.456440] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=584 ksize=0", cfg="random: may_sleep use_finup 
src_divs=[84.77%@+9, 10.43%@+10, 4.2%@+2817, 0.78%@+13809]"
[   44.679193] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=11637 ksize=0", cfg="random: inplace use_final 
src_divs=[71.87%@+19, 14.59%@+4039, 13.54%@+23] 
iv_offset=43"
[   44.829884] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=140 ksize=0", cfg="random: inplace may_sleep 
use_final src_divs=[12.38%@+16262, 49.7%@+16280, 
38.55%@+17]"
[   44.937608] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=15581 ksize=0", cfg="random: inplace use_final 
src_divs=[63.2%@+5723, 26.27%@+0, 10.71%@+16336]"
[   45.161565] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=602 ksize=0", cfg="random: may_sleep use_final 
src_divs=[40.26%@+16371, 26.4%@+7851, 
20.91%@alignmask+16336, 12.79%@+13696] dst_divs=[100.0%@+16285] 
iv_offset=24"
[   45.185838] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=225 ksize=0", cfg="random: use_finup 
src_divs=[8.53%@+8836, 57.48%@alignmask+10, 33.99%@+3781] 
dst_divs=[100.0%@+13601]"
[   45.255888] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=535 ksize=0", cfg="random: may_sleep use_final 
src_divs=[69.27%@+6420, 25.64%@+16364, 5.9%@+16359] iv_offset=44"
[   45.540747] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=6123 ksize=0", cfg="random: inplace may_sleep 
use_finup src_divs=[13.47%@+24, 68.88%@+1540, 17.65%@+16312] 
iv_offset=38"
[   45.564728] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=131 ksize=0", cfg="random: use_finup 
src_divs=[63.96%@+2313, 12.3%@+1, 19.49%@+7, 
4.52%@+388] iv_offset=48"
[   45.651347] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=802 ksize=0", cfg="random: may_sleep use_final 
src_divs=[52.62%@alignmask+26, 25.69%@+16350, 
16.79%@+16276, 4.90%@+5] dst_divs=[72.18%@+12892, 
27.82%@alignmask+5] iv_offset=59"
[   45.757169] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=742 ksize=0", cfg="random: may_sleep use_finup 
src_divs=[32.14%@+14908, 11.80%@+24, 26.47%@+13, 
29.59%@alignmask+26] dst_divs=[100.0%@+11250] iv_offset=3"
[   45.909272] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=19022 ksize=0", cfg="random: use_finup nosimd 
src_divs=[45.28%@+14, 18.41%@alignmask+16331, 20.9%@+8817, 
13.65%@+10854, 2.1%@+10, 0.29%@+0, 0.27%@+16350] 
iv_offset=35"
[   45.959169] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=2449 ksize=0", cfg="random: inplace use_final 
nosimd src_divs=[65.95%@+8789, 6.91%@alignmask+16363, 27.14%@+15] 
iv_offset=22"
[   45.995863] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=963 ksize=0", cfg="random: use_final nosimd 
src_divs=[52.18%@+8311, 45.55%@+13735, 2.27%@+16371] iv_offset=5"
[   46.366870] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=2591 ksize=0", cfg="random: inplace may_sleep 
use_final src_divs=[21.73%@+16354, 37.20%@alignmask+16302, 
41.7%@+5145] iv_offset=57"
[   46.411039] alg: hash: md5-talitos test failed (wrong result) on test 
vector "random: psize=417 ksize=0", cfg="random: may_sleep use_final 
src_divs=[8.51%@+11479, 53.54%@alignmask+11025, 
5.97%@+15807, 31.98%@+8761] dst_divs=[29.29%@+7, 

Re: [PATCH] mm/nvdimm: Use correct #defines instead of opencoding

2019-05-21 Thread Dan Williams
On Tue, May 21, 2019 at 2:51 AM Aneesh Kumar K.V
 wrote:
>
> Dan Williams  writes:
>
> > On Mon, May 13, 2019 at 9:46 PM Aneesh Kumar K.V
> >  wrote:
> >>
> >> On 5/14/19 9:42 AM, Dan Williams wrote:
> >> > On Mon, May 13, 2019 at 9:05 PM Aneesh Kumar K.V
> >> >  wrote:
> >> >>
> >> >> On 5/14/19 9:28 AM, Dan Williams wrote:
> >> >>> On Mon, May 13, 2019 at 7:56 PM Aneesh Kumar K.V
> >> >>>  wrote:
> >> 
> >>  The nfpn related change is needed to fix the kernel message
> >> 
> >>  "number of pfns truncated from 2617344 to 163584"
> >> 
> >>  The change makes sure the nfpns stored in the superblock is right 
> >>  value.
> >> 
> >>  Signed-off-by: Aneesh Kumar K.V 
> >>  ---
> >> drivers/nvdimm/pfn_devs.c| 6 +++---
> >> drivers/nvdimm/region_devs.c | 8 
> >> 2 files changed, 7 insertions(+), 7 deletions(-)
> >> 
> >>  diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
> >>  index 347cab166376..6751ff0296ef 100644
> >>  --- a/drivers/nvdimm/pfn_devs.c
> >>  +++ b/drivers/nvdimm/pfn_devs.c
> >>  @@ -777,8 +777,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
> >> * when populating the vmemmap. This *should* be 
> >>  equal to
> >> * PMD_SIZE for most architectures.
> >> */
> >>  -   offset = ALIGN(start + reserve + 64 * npfns,
> >>  -   max(nd_pfn->align, PMD_SIZE)) - start;
> >>  +   offset = ALIGN(start + reserve + sizeof(struct page) 
> >>  * npfns,
> >>  +  max(nd_pfn->align, PMD_SIZE)) - start;
> >> >>>
> >> >>> No, I think we need to record the page-size into the superblock format
> >> >>> otherwise this breaks in debug builds where the struct-page size is
> >> >>> extended.
> >> >>>
> >>    } else if (nd_pfn->mode == PFN_MODE_RAM)
> >>    offset = ALIGN(start + reserve, nd_pfn->align) - 
> >>  start;
> >>    else
> >>  @@ -790,7 +790,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
> >>    return -ENXIO;
> >>    }
> >> 
> >>  -   npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
> >>  +   npfns = (size - offset - start_pad - end_trunc) / PAGE_SIZE;
> >> >>>
> >> >>> Similar comment, if the page size is variable then the superblock
> >> >>> needs to explicitly account for it.
> >> >>>
> >> >>
> >> >> PAGE_SIZE is not really variable. What we can run into is the issue you
> >> >> mentioned above. The size of struct page can change which means the
> >> >> reserved space for keeping vmemmap in device may not be sufficient for
> >> >> certain kernel builds.
> >> >>
> >> >> I was planning to add another patch that fails namespace init if we
> >> >> don't have enough space to keep the struct page.
> >> >>
> >> >> Why do you suggest we need to have PAGE_SIZE as part of pfn superblock?
> >> >
> >> > So that the kernel has a chance to identify cases where the superblock
> >> > it is handling was created on a system with different PAGE_SIZE
> >> > assumptions.
> >> >
> >>
> >> The reason to do that is we don't have enough space to keep struct page
> >> backing the total number of pfns? If so, what i suggested above should
> >> handle that.
> >>
> >> or are you finding any other reason why we should fail a namespace init
> >> with a different PAGE_SIZE value?
> >
> > I want the kernel to be able to start understand cross-architecture
> > and cross-configuration geometries. Which to me means incrementing the
> > info-block version and recording PAGE_SIZE and sizeof(struct page) in
> > the info-block directly.
> >
> >> My another patch handle the details w.r.t devdax alignment for which
> >> devdax got created with PAGE_SIZE 4K but we are now trying to load that
> >> in a kernel with PAGE_SIZE 64k.
> >
> > Sure, but what about the reverse? These info-block format assumptions
> > are as fundamental as the byte-order of the info-block, it needs to be
> > cross-arch compatible and the x86 assumptions need to be fully lifted.
>
> Something like the below (Not tested). I am not sure what we will init the 
> page_size
> for minor version < 3. This will mark the namespace disabled if the
> PAGE_SIZE and sizeof(struct page) doesn't match with the values used
> during namespace create.

Yes, this is on the right track.

I would special-case page_size == 0 as 4096 and page_struct_size == 0
as 64. If either of those is non-zero then the info-block version
needs to be revved and it needs to be crafted to make older kernels
fail to parse it.

There was an earlier attempt to implement minimum info-block versions here:

https://lore.kernel.org/lkml/155000670159.348031.17631616775326330606.st...@dwillia2-desk3.amr.corp.intel.com/

...but that was dropped in favor of the the "sub-section" patches.


Re: Re: [RFC PATCH 02/12] powerpc: Add support for adding an ESM blob to the zImage wrapper

2019-05-21 Thread Ram Pai
On Tue, May 21, 2019 at 07:13:26AM +0200, Christoph Hellwig wrote:
> On Tue, May 21, 2019 at 01:49:02AM -0300, Thiago Jung Bauermann wrote:
> > From: Benjamin Herrenschmidt 
> > 
> > For secure VMs, the signing tool will create a ticket called the "ESM blob"
> > for the Enter Secure Mode ultravisor call with the signatures of the kernel
> > and initrd among other things.
> > 
> > This adds support to the wrapper script for adding that blob via the "-e"
> > option to the zImage.pseries.
> > 
> > It also adds code to the zImage wrapper itself to retrieve and if necessary
> > relocate the blob, and pass its address to Linux via the device-tree, to be
> > later consumed by prom_init.
> 
> Where does the "BLOB" come from?  How is it licensed and how can we
> satisfy the GPL with it?

The "BLOB" is not a piece of code. Its just a piece of data that gets
generated by our build tools. This data contains the
signed hash of the kernel, initrd, and kernel command line parameters.
Also it contains any information that the creator the the BLOB wants to
be made available to anyone needing it, inside the
secure-virtual-machine. All of this is integrity-protected and encrypted
to safegaurd it when at rest and at runtime.
 
Bottomline -- Blob is data, and hence no licensing implication. And due
to some reason, even data needs to have licensing statement, we can
make it available to have no conflicts with GPL.


-- 
Ram Pai



Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Al Viro
On Tue, May 21, 2019 at 01:34:47PM +0200, Christian Brauner wrote:

> This adds the close_range() syscall. It allows to efficiently close a range
> of file descriptors up to all file descriptors of a calling task.
> 
> The syscall came up in a recent discussion around the new mount API and
> making new file descriptor types cloexec by default. During this
> discussion, Al suggested the close_range() syscall (cf. [1]). Note, a
> syscall in this manner has been requested by various people over time.
> 
> First, it helps to close all file descriptors of an exec()ing task. This
> can be done safely via (quoting Al's example from [1] verbatim):
> 
> /* that exec is sensitive */
> unshare(CLONE_FILES);
> /* we don't want anything past stderr here */
> close_range(3, ~0U);
> execve();
> 
> The code snippet above is one way of working around the problem that file
> descriptors are not cloexec by default. This is aggravated by the fact that
> we can't just switch them over without massively regressing userspace. For
> a whole class of programs having an in-kernel method of closing all file
> descriptors is very helpful (e.g. demons, service managers, programming
> language standard libraries, container managers etc.).
> (Please note, unshare(CLONE_FILES) should only be needed if the calling
>  task is multi-threaded and shares the file descriptor table with another
>  thread in which case two threads could race with one thread allocating
>  file descriptors and the other one closing them via close_range(). For the
>  general case close_range() before the execve() is sufficient.)
> 
> Second, it allows userspace to avoid implementing closing all file
> descriptors by parsing through /proc//fd/* and calling close() on each
> file descriptor. From looking at various large(ish) userspace code bases
> this or similar patterns are very common in:
> - service managers (cf. [4])
> - libcs (cf. [6])
> - container runtimes (cf. [5])
> - programming language runtimes/standard libraries
>   - Python (cf. [2])
>   - Rust (cf. [7], [8])
> As Dmitry pointed out there's even a long-standing glibc bug about missing
> kernel support for this task (cf. [3]).
> In addition, the syscall will also work for tasks that do not have procfs
> mounted and on kernels that do not have procfs support compiled in. In such
> situations the only way to make sure that all file descriptors are closed
> is to call close() on each file descriptor up to UINT_MAX or RLIMIT_NOFILE,
> OPEN_MAX trickery (cf. comment [8] on Rust).
> 
> The performance is striking. For good measure, comparing the following
> simple close_all_fds() userspace implementation that is essentially just
> glibc's version in [6]:
> 
> static int close_all_fds(void)
> {
> DIR *dir;
> struct dirent *direntp;
> 
> dir = opendir("/proc/self/fd");
> if (!dir)
> return -1;
> 
> while ((direntp = readdir(dir))) {
> int fd;
> if (strcmp(direntp->d_name, ".") == 0)
> continue;
> if (strcmp(direntp->d_name, "..") == 0)
> continue;
> fd = atoi(direntp->d_name);
> if (fd == 0 || fd == 1 || fd == 2)
> continue;
> close(fd);
> }
> 
> closedir(dir); /* cannot fail */
> return 0;
> }
> 
> to close_range() yields:
> 1. closing 4 open files:
>- close_all_fds(): ~280 us
>- close_range():~24 us
> 
> 2. closing 1000 open files:
>- close_all_fds(): ~5000 us
>- close_range():   ~800 us
> 
> close_range() is designed to allow for some flexibility. Specifically, it
> does not simply always close all open file descriptors of a task. Instead,
> callers can specify an upper bound.
> This is e.g. useful for scenarios where specific file descriptors are
> created with well-known numbers that are supposed to be excluded from
> getting closed.
> For extra paranoia close_range() comes with a flags argument. This can e.g.
> be used to implement extension. Once can imagine userspace wanting to stop
> at the first error instead of ignoring errors under certain circumstances.
> There might be other valid ideas in the future. In any case, a flag
> argument doesn't hurt and keeps us on the safe side.
> 
> >From an implementation side this is kept rather dumb. It saw some input
> from David and Jann but all nonsense is obviously my own!
> - Errors to close file descriptors are currently ignored. (Could be changed
>   by setting a flag in the future if needed.)
> - __close_range() is a rather simplistic wrapper around __close_fd().
>   My reasoning behind this is based on the nature of how __close_fd() needs
>   to release an fd. But maybe I misunderstood specifics:
>   We take the files_lock and rcu-dereference the fdtable of the calling
>   task, we find the entry in the fdtable, get the file and need to release
>  

RE: [PATCH] mm/nvdimm: Use correct #defines instead of opencoding

2019-05-21 Thread Elliott, Robert (Servers)



> -Original Message-
> From: Linux-nvdimm  On Behalf Of
> Aneesh Kumar K.V
> Sent: Tuesday, May 21, 2019 4:51 AM
> Subject: Re: [PATCH] mm/nvdimm: Use correct #defines instead of
> opencoding
> 
...
> @@ -36,6 +36,9 @@ struct nd_pfn_sb {
>   __le32 end_trunc;
>   /* minor-version-2 record the base alignment of the mapping */
>   __le32 align;
> + /* minor-version-3 record the page size and struct page size
> */
> + __le32 page_size;
> + __le32 page_struct_size;
>   u8 padding[4000];
>   __le64 checksum;
>  };

You might need to reduce the padding size to offset the extra added
fields.




Re: [PATCH v3 1/2] pid: add pidfd_open()

2019-05-21 Thread Christian Brauner
On Mon, May 20, 2019 at 05:56:29PM +0200, Christian Brauner wrote:
> This adds the pidfd_open() syscall. It allows a caller to retrieve pollable
> pidfds for a process which did not get created via CLONE_PIDFD, i.e. for a
> process that is created via traditional fork()/clone() calls that is only
> referenced by a PID:
> 
> int pidfd = pidfd_open(1234, 0);
> ret = pidfd_send_signal(pidfd, SIGSTOP, NULL, 0);
> 
> With the introduction of pidfds through CLONE_PIDFD it is possible to
> created pidfds at process creation time.
> However, a lot of processes get created with traditional PID-based calls
> such as fork() or clone() (without CLONE_PIDFD). For these processes a
> caller can currently not create a pollable pidfd. This is a problem for
> Android's low memory killer (LMK) and service managers such as systemd.
> Both are examples of tools that want to make use of pidfds to get reliable
> notification of process exit for non-parents (pidfd polling) and race-free
> signal sending (pidfd_send_signal()). They intend to switch to this API for
> process supervision/management as soon as possible. Having no way to get
> pollable pidfds from PID-only processes is one of the biggest blockers for
> them in adopting this api. With pidfd_open() making it possible to retrieve
> pidfds for PID-based processes we enable them to adopt this api.
> 
> In line with Arnd's recent changes to consolidate syscall numbers across
> architectures, I have added the pidfd_open() syscall to all architectures
> at the same time.
> 
> Signed-off-by: Christian Brauner 
> Reviewed-by: Oleg Nesterov 

This now also carries a Reviewed-by from David.

> Acked-by: Arnd Bergmann 
> Cc: "Eric W. Biederman" 
> Cc: Kees Cook 
> Cc: Joel Fernandes (Google) 
> Cc: Thomas Gleixner 
> Cc: Jann Horn 
> Cc: David Howells 
> Cc: Andy Lutomirsky 
> Cc: Andrew Morton 
> Cc: Aleksa Sarai 
> Cc: Linus Torvalds 
> Cc: Al Viro 
> Cc: linux-...@vger.kernel.org

I've moved pidfd_open() into my for-next branch together with Joel's
pidfd polling changes. Everything is based on v5.2-rc1.

The chosen syscall number for now is 434. David is going to send out
another pile of mount api related syscalls. I'll coordinate with him
accordingly prior to the 5.3 merge window.

Thanks!
Christian


Re: [PATCH v2] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Christophe Leroy




Le 21/05/2019 à 15:13, Masahiro Yamada a écrit :

With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
with gcc 9.1.1:

   arch/powerpc/mm/book3s64/radix_tlb.c: In function '_tlbiel_pid':
   arch/powerpc/mm/book3s64/radix_tlb.c:104:2: warning: asm operand 3 probably 
doesn't match constraints
 104 |  asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 |  ^~~
   arch/powerpc/mm/book3s64/radix_tlb.c:104:2: error: impossible constraint in 
'asm'

Fixing _tlbiel_pid() is enough to address the warning above, but I
inlined more functions to fix all potential issues.

To meet the "i" (immediate) constraint for the asm operands, functions
propagating "ric" must be always inlined.

Fixes: 9012d011660e ("compiler: allow all arches to enable 
CONFIG_OPTIMIZE_INLINING")
Reported-by: Laura Abbott 
Signed-off-by: Masahiro Yamada 


Reviewed-by: Christophe Leroy 


---

Changes in v2:
   - Do not split lines

  arch/powerpc/mm/book3s64/hash_native.c |  2 +-
  arch/powerpc/mm/book3s64/radix_tlb.c   | 32 
  2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_native.c 
b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd..c854151 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -60,7 +60,7 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, 
unsigned int is)
   * tlbiel instruction for hash, set invalidation
   * i.e., r=1 and is=01 or is=10 or is=11
   */
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned 
int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
  {
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4d84136..4d3dc10 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -29,7 +29,7 @@
   * tlbiel instruction for radix, set invalidation
   * i.e., r=1 and is=01 or is=10 or is=11
   */
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned 
int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
  {
@@ -150,8 +150,8 @@ static __always_inline void __tlbie_lpid(unsigned long 
lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
  }
  
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,

-   unsigned long ric)
+static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
+   unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -167,8 +167,8 @@ static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,

  }
  
  
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,

-  unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+   unsigned long ap, unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -183,8 +183,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,

trace_tlbie(0, 1, rb, rs, ric, prs, r);
  }
  
-static inline void __tlbie_va(unsigned long va, unsigned long pid,

- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+  unsigned long ap, unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -199,8 +199,8 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,

trace_tlbie(0, 0, rb, rs, ric, prs, r);
  }
  
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,

- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long 
lpid,
+   unsigned long ap, unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -239,7 +239,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)

  /*
   * We use 128 set in radix mode and 256 set in hpt mode.
   */
-static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
  {
int set;
  
@@ -341,7 +341,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)

asm volatile("eieio; tlbsync; ptesync": : :"memory");
  }
  
-static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)

+static __always_inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned 
long ric)
  {
int set;
  

[PATCH v1 15/15] crypto: talitos - use SPDX-License-Identifier

2019-05-21 Thread Christophe Leroy
This patch drops the license text and replaces it
with an SPDX-License-Identifier tag.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 15 +--
 drivers/crypto/talitos.h | 25 +
 2 files changed, 2 insertions(+), 38 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6fe900d8e5d8..32a7e747dc5f 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * talitos - Freescale Integrated Security Engine (SEC) device driver
  *
@@ -9,20 +10,6 @@
  * Crypto algorithm registration code copied from hifn driver:
  * 2007+ Copyright (c) Evgeniy Polyakov 
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include 
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 5699d46401e6..32ad4fc679ed 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -1,31 +1,8 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
 /*
  * Freescale SEC (talitos) device register and descriptor header defines
  *
  * Copyright (c) 2006-2011 Freescale Semiconductor, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #define TALITOS_TIMEOUT 10
-- 
2.13.3



[PATCH v1 14/15] crypto: talitos - use IS_ENABLED() in has_ftr_sec1()

2019-05-21 Thread Christophe Leroy
This patch rewrites has_ftr_sec1() using IS_ENABLED()
instead of #ifdefs

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.h | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 95e97951b924..5699d46401e6 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -164,13 +164,11 @@ struct talitos_private {
  */
 static inline bool has_ftr_sec1(struct talitos_private *priv)
 {
-#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2)
-   return priv->features & TALITOS_FTR_SEC1 ? true : false;
-#elif defined(CONFIG_CRYPTO_DEV_TALITOS1)
-   return true;
-#else
-   return false;
-#endif
+   if (IS_ENABLED(CONFIG_CRYPTO_DEV_TALITOS1) &&
+   IS_ENABLED(CONFIG_CRYPTO_DEV_TALITOS2))
+   return priv->features & TALITOS_FTR_SEC1;
+
+   return IS_ENABLED(CONFIG_CRYPTO_DEV_TALITOS1);
 }
 
 /*
-- 
2.13.3



[PATCH v1 13/15] Revert "crypto: talitos - export the talitos_submit function"

2019-05-21 Thread Christophe Leroy
There is no other file using talitos_submit in the kernel tree,
so it doesn't need to be exported nor made global.

This reverts commit 865d506155b117edc7e668ced373030ce7108ce9.

Signed-off-by: Christophe Leroy 
Fixes: 865d506155b1 ("crypto: talitos - export the talitos_submit function")
---
 drivers/crypto/talitos.c | 11 +--
 drivers/crypto/talitos.h |  6 --
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 12917fd54bcb..6fe900d8e5d8 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -278,11 +278,11 @@ static int init_device(struct device *dev)
  * callback must check err and feedback in descriptor header
  * for device processing status.
  */
-int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
-  void (*callback)(struct device *dev,
-   struct talitos_desc *desc,
-   void *context, int error),
-  void *context)
+static int talitos_submit(struct device *dev, int ch, struct talitos_desc 
*desc,
+ void (*callback)(struct device *dev,
+  struct talitos_desc *desc,
+  void *context, int error),
+ void *context)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_request *request;
@@ -332,7 +332,6 @@ int talitos_submit(struct device *dev, int ch, struct 
talitos_desc *desc,
 
return -EINPROGRESS;
 }
-EXPORT_SYMBOL(talitos_submit);
 
 /*
  * process what was done, notify callback of error if not
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index dbedd0956c8a..95e97951b924 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -150,12 +150,6 @@ struct talitos_private {
bool rng_registered;
 };
 
-extern int talitos_submit(struct device *dev, int ch, struct talitos_desc 
*desc,
- void (*callback)(struct device *dev,
-  struct talitos_desc *desc,
-  void *context, int error),
- void *context);
-
 /* .features flag */
 #define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x0001
 #define TALITOS_FTR_HW_AUTH_CHECK 0x0002
-- 
2.13.3



[PATCH v1 12/15] crypto: talitos - fix AEAD processing.

2019-05-21 Thread Christophe Leroy
This driver is working well in 'simple cases', but as soon as
more exotic SG lists are provided (dst different from src,
auth part not in a single SG fragment, ...) there are
wrong results, overruns, etc ...

This patch cleans up the AEAD processing by:
- Simplifying the location of 'out of line' ICV
- Never using 'out of line' ICV on encryp
- Always using 'out of line' ICV on decrypt
- Forcing the generation of a SG table on decrypt

Signed-off-by: Christophe Leroy 
Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface")
---
 drivers/crypto/talitos.c | 158 ---
 drivers/crypto/talitos.h |   2 +-
 2 files changed, 55 insertions(+), 105 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 750b0159e654..12917fd54bcb 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1040,8 +1040,8 @@ static void ipsec_esp_unmap(struct device *dev,
 DMA_FROM_DEVICE);
unmap_single_talitos_ptr(dev, civ_ptr, DMA_TO_DEVICE);
 
-   talitos_sg_unmap(dev, edesc, areq->src, areq->dst, cryptlen,
-areq->assoclen);
+   talitos_sg_unmap(dev, edesc, areq->src, areq->dst,
+cryptlen + authsize, areq->assoclen);
 
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
@@ -1062,30 +1062,15 @@ static void ipsec_esp_encrypt_done(struct device *dev,
   struct talitos_desc *desc, void *context,
   int err)
 {
-   struct talitos_private *priv = dev_get_drvdata(dev);
-   bool is_sec1 = has_ftr_sec1(priv);
struct aead_request *areq = context;
struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
-   unsigned int authsize = crypto_aead_authsize(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
struct talitos_edesc *edesc;
-   void *icvdata;
 
edesc = container_of(desc, struct talitos_edesc, desc);
 
ipsec_esp_unmap(dev, edesc, areq, true);
 
-   /* copy the generated ICV to dst */
-   if (edesc->icv_ool) {
-   if (is_sec1)
-   icvdata = edesc->buf + areq->assoclen + areq->cryptlen;
-   else
-   icvdata = >link_tbl[edesc->src_nents +
-  edesc->dst_nents + 2];
-   sg_pcopy_from_buffer(areq->dst, edesc->dst_nents ? : 1, icvdata,
-authsize, areq->assoclen + areq->cryptlen);
-   }
-
dma_unmap_single(dev, edesc->iv_dma, ivsize, DMA_TO_DEVICE);
 
kfree(edesc);
@@ -1102,39 +1087,15 @@ static void ipsec_esp_decrypt_swauth_done(struct device 
*dev,
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_edesc *edesc;
char *oicv, *icv;
-   struct talitos_private *priv = dev_get_drvdata(dev);
-   bool is_sec1 = has_ftr_sec1(priv);
 
edesc = container_of(desc, struct talitos_edesc, desc);
 
ipsec_esp_unmap(dev, edesc, req, false);
 
if (!err) {
-   char icvdata[SHA512_DIGEST_SIZE];
-   int nents = edesc->dst_nents ? : 1;
-   unsigned int len = req->assoclen + req->cryptlen;
-
/* auth check */
-   if (nents > 1) {
-   sg_pcopy_to_buffer(req->dst, nents, icvdata, authsize,
-  len - authsize);
-   icv = icvdata;
-   } else {
-   icv = (char *)sg_virt(req->dst) + len - authsize;
-   }
-
-   if (edesc->dma_len) {
-   if (is_sec1)
-   oicv = (char *)>dma_link_tbl +
-  req->assoclen + req->cryptlen;
-   else
-   oicv = (char *)
-  >link_tbl[edesc->src_nents +
-   edesc->dst_nents + 2];
-   if (edesc->icv_ool)
-   icv = oicv + authsize;
-   } else
-   oicv = (char *)>link_tbl[0];
+   oicv = edesc->buf + edesc->dma_len;
+   icv = oicv - authsize;
 
err = crypto_memneq(oicv, icv, authsize) ? -EBADMSG : 0;
}
@@ -1170,11 +1131,12 @@ static void ipsec_esp_decrypt_hwauth_done(struct device 
*dev,
  * stop at cryptlen bytes
  */
 static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
-unsigned int offset, int cryptlen,
+unsigned int offset, int datalen, int elen,
 struct talitos_ptr *link_tbl_ptr)
 {
-   int n_sg = sg_count;
+   int n_sg = elen ? sg_count + 1 : 

[PATCH v1 11/15] crypto: talitos - Align SEC1 accesses to 32 bits boundaries.

2019-05-21 Thread Christophe Leroy
The MPC885 reference manual states:

SEC Lite-initiated 8xx writes can occur only on 32-bit-word boundaries, but
reads can occur on any byte boundary. Writing back a header read from a
non-32-bit-word boundary will yield unpredictable results.

In order to ensure that, cra_alignmask is set to 3 for SEC1.

Signed-off-by: Christophe Leroy 
Fixes: 9c4a79653b35 ("crypto: talitos - Freescale integrated security engine 
(SEC) driver")
---
 drivers/crypto/talitos.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 7c8a3a717b91..750b0159e654 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -3327,7 +3327,10 @@ static struct talitos_crypto_alg 
*talitos_alg_alloc(struct device *dev,
alg->cra_priority = t_alg->algt.priority;
else
alg->cra_priority = TALITOS_CRA_PRIORITY;
-   alg->cra_alignmask = 0;
+   if (has_ftr_sec1(priv))
+   alg->cra_alignmask = 3;
+   else
+   alg->cra_alignmask = 0;
alg->cra_ctxsize = sizeof(struct talitos_ctx);
alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY;
 
-- 
2.13.3



[PATCH v1 06/15] crypto: talitos - check data blocksize in ablkcipher.

2019-05-21 Thread Christophe Leroy
When data size is not a multiple of the alg's block size,
the SEC generates an error interrupt and dumps the registers.
And for NULL size, the SEC does just nothing and the interrupt
is awaited forever.

This patch ensures the data size is correct before submitting
the request to the SEC engine.

Signed-off-by: Christophe Leroy 
Fixes: 4de9d0b547b9 ("crypto: talitos - Add ablkcipher algorithms")
---
 drivers/crypto/talitos.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 8b9a529f1b66..1e5410f92166 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1756,6 +1756,14 @@ static int ablkcipher_encrypt(struct ablkcipher_request 
*areq)
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct talitos_edesc *edesc;
+   unsigned int blocksize =
+   crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(cipher));
+
+   if (!areq->nbytes)
+   return 0;
+
+   if (areq->nbytes % blocksize)
+   return -EINVAL;
 
/* allocate extended descriptor */
edesc = ablkcipher_edesc_alloc(areq, true);
@@ -1773,6 +1781,14 @@ static int ablkcipher_decrypt(struct ablkcipher_request 
*areq)
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct talitos_edesc *edesc;
+   unsigned int blocksize =
+   crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(cipher));
+
+   if (!areq->nbytes)
+   return 0;
+
+   if (areq->nbytes % blocksize)
+   return -EINVAL;
 
/* allocate extended descriptor */
edesc = ablkcipher_edesc_alloc(areq, false);
-- 
2.13.3



[PATCH v1 10/15] crypto: talitos - properly handle split ICV.

2019-05-21 Thread Christophe Leroy
The driver assumes that the ICV is as a single piece in the last
element of the scatterlist. This assumption is wrong.

This patch ensures that the ICV is properly handled regardless of
the scatterlist layout.

Fixes: 9c4a79653b35 ("crypto: talitos - Freescale integrated security engine 
(SEC) driver")
Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index e35581d67315..7c8a3a717b91 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1069,7 +1069,6 @@ static void ipsec_esp_encrypt_done(struct device *dev,
unsigned int authsize = crypto_aead_authsize(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
struct talitos_edesc *edesc;
-   struct scatterlist *sg;
void *icvdata;
 
edesc = container_of(desc, struct talitos_edesc, desc);
@@ -1083,9 +1082,8 @@ static void ipsec_esp_encrypt_done(struct device *dev,
else
icvdata = >link_tbl[edesc->src_nents +
   edesc->dst_nents + 2];
-   sg = sg_last(areq->dst, edesc->dst_nents);
-   memcpy((char *)sg_virt(sg) + sg->length - authsize,
-  icvdata, authsize);
+   sg_pcopy_from_buffer(areq->dst, edesc->dst_nents ? : 1, icvdata,
+authsize, areq->assoclen + areq->cryptlen);
}
 
dma_unmap_single(dev, edesc->iv_dma, ivsize, DMA_TO_DEVICE);
@@ -1103,7 +1101,6 @@ static void ipsec_esp_decrypt_swauth_done(struct device 
*dev,
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_edesc *edesc;
-   struct scatterlist *sg;
char *oicv, *icv;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
@@ -1113,9 +1110,18 @@ static void ipsec_esp_decrypt_swauth_done(struct device 
*dev,
ipsec_esp_unmap(dev, edesc, req, false);
 
if (!err) {
+   char icvdata[SHA512_DIGEST_SIZE];
+   int nents = edesc->dst_nents ? : 1;
+   unsigned int len = req->assoclen + req->cryptlen;
+
/* auth check */
-   sg = sg_last(req->dst, edesc->dst_nents ? : 1);
-   icv = (char *)sg_virt(sg) + sg->length - authsize;
+   if (nents > 1) {
+   sg_pcopy_to_buffer(req->dst, nents, icvdata, authsize,
+  len - authsize);
+   icv = icvdata;
+   } else {
+   icv = (char *)sg_virt(req->dst) + len - authsize;
+   }
 
if (edesc->dma_len) {
if (is_sec1)
@@ -1537,7 +1543,6 @@ static int aead_decrypt(struct aead_request *req)
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
struct talitos_private *priv = dev_get_drvdata(ctx->dev);
struct talitos_edesc *edesc;
-   struct scatterlist *sg;
void *icvdata;
 
/* allocate extended descriptor */
@@ -1571,9 +1576,8 @@ static int aead_decrypt(struct aead_request *req)
else
icvdata = >link_tbl[0];
 
-   sg = sg_last(req->src, edesc->src_nents ? : 1);
-
-   memcpy(icvdata, (char *)sg_virt(sg) + sg->length - authsize, authsize);
+   sg_pcopy_to_buffer(req->src, edesc->src_nents ? : 1, icvdata, authsize,
+  req->assoclen + req->cryptlen - authsize);
 
return ipsec_esp(edesc, req, false, ipsec_esp_decrypt_swauth_done);
 }
-- 
2.13.3



[PATCH v1 09/15] crypto: talitos - HMAC SNOOP NO AFEU mode requires SW icv checking.

2019-05-21 Thread Christophe Leroy
In that mode, hardware ICV verification is not supported.

Signed-off-by: Christophe Leroy 
Fixes: 7405c8d7ff97 ("crypto: talitos - templates for AEAD using 
HMAC_SNOOP_NO_AFEU")
---
 drivers/crypto/talitos.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index a15aa6d6ec33..e35581d67315 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1545,7 +1545,8 @@ static int aead_decrypt(struct aead_request *req)
if (IS_ERR(edesc))
return PTR_ERR(edesc);
 
-   if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
+   if ((edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP) &&
+   (priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
((!edesc->src_nents && !edesc->dst_nents) ||
 priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) {
 
-- 
2.13.3



[PATCH v1 08/15] crypto: talitos - Do not modify req->cryptlen on decryption.

2019-05-21 Thread Christophe Leroy
For decrypt, req->cryptlen includes the size of the authentication
part while all functions of the driver expect cryptlen to be
the size of the encrypted data.

As it is not expected to change req->cryptlen, this patch
implements local calculation of cryptlen.

Signed-off-by: Christophe Leroy 
Fixes: 9c4a79653b35 ("crypto: talitos - Freescale integrated security engine 
(SEC) driver")
---
 drivers/crypto/talitos.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6f6f34754ad8..a15aa6d6ec33 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1025,11 +1025,13 @@ static void talitos_sg_unmap(struct device *dev,
 
 static void ipsec_esp_unmap(struct device *dev,
struct talitos_edesc *edesc,
-   struct aead_request *areq)
+   struct aead_request *areq, bool encrypt)
 {
struct crypto_aead *aead = crypto_aead_reqtfm(areq);
struct talitos_ctx *ctx = crypto_aead_ctx(aead);
unsigned int ivsize = crypto_aead_ivsize(aead);
+   unsigned int authsize = crypto_aead_authsize(aead);
+   unsigned int cryptlen = areq->cryptlen - (encrypt ? 0 : authsize);
bool is_ipsec_esp = edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP;
struct talitos_ptr *civ_ptr = >desc.ptr[is_ipsec_esp ? 2 : 3];
 
@@ -1038,7 +1040,7 @@ static void ipsec_esp_unmap(struct device *dev,
 DMA_FROM_DEVICE);
unmap_single_talitos_ptr(dev, civ_ptr, DMA_TO_DEVICE);
 
-   talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen,
+   talitos_sg_unmap(dev, edesc, areq->src, areq->dst, cryptlen,
 areq->assoclen);
 
if (edesc->dma_len)
@@ -1049,7 +1051,7 @@ static void ipsec_esp_unmap(struct device *dev,
unsigned int dst_nents = edesc->dst_nents ? : 1;
 
sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize,
-  areq->assoclen + areq->cryptlen - ivsize);
+  areq->assoclen + cryptlen - ivsize);
}
 }
 
@@ -1072,7 +1074,7 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 
edesc = container_of(desc, struct talitos_edesc, desc);
 
-   ipsec_esp_unmap(dev, edesc, areq);
+   ipsec_esp_unmap(dev, edesc, areq, true);
 
/* copy the generated ICV to dst */
if (edesc->icv_ool) {
@@ -1108,7 +1110,7 @@ static void ipsec_esp_decrypt_swauth_done(struct device 
*dev,
 
edesc = container_of(desc, struct talitos_edesc, desc);
 
-   ipsec_esp_unmap(dev, edesc, req);
+   ipsec_esp_unmap(dev, edesc, req, false);
 
if (!err) {
/* auth check */
@@ -1145,7 +1147,7 @@ static void ipsec_esp_decrypt_hwauth_done(struct device 
*dev,
 
edesc = container_of(desc, struct talitos_edesc, desc);
 
-   ipsec_esp_unmap(dev, edesc, req);
+   ipsec_esp_unmap(dev, edesc, req, false);
 
/* check ICV auth status */
if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
@@ -1248,6 +1250,7 @@ static int talitos_sg_map(struct device *dev, struct 
scatterlist *src,
  * fill in and submit ipsec_esp descriptor
  */
 static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
+bool encrypt,
 void (*callback)(struct device *dev,
  struct talitos_desc *desc,
  void *context, int error))
@@ -1257,7 +1260,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
struct talitos_ctx *ctx = crypto_aead_ctx(aead);
struct device *dev = ctx->dev;
struct talitos_desc *desc = >desc;
-   unsigned int cryptlen = areq->cryptlen;
+   unsigned int cryptlen = areq->cryptlen - (encrypt ? 0 : authsize);
unsigned int ivsize = crypto_aead_ivsize(aead);
int tbl_off = 0;
int sg_count, ret;
@@ -1384,7 +1387,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
 
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
-   ipsec_esp_unmap(dev, edesc, areq);
+   ipsec_esp_unmap(dev, edesc, areq, encrypt);
kfree(edesc);
}
return ret;
@@ -1502,9 +1505,10 @@ static struct talitos_edesc *aead_edesc_alloc(struct 
aead_request *areq, u8 *iv,
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
+   unsigned int cryptlen = areq->cryptlen - (encrypt ? 0 : authsize);
 
return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
-  iv, areq->assoclen, areq->cryptlen,
+ 

[PATCH v1 07/15] crypto: talitos - fix ECB algs ivsize

2019-05-21 Thread Christophe Leroy
ECB's ivsize must be 0.

Signed-off-by: Christophe Leroy 
Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
---
 drivers/crypto/talitos.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 1e5410f92166..6f6f34754ad8 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2809,7 +2809,6 @@ static struct talitos_alg_template driver_algs[] = {
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
-   .ivsize = AES_BLOCK_SIZE,
.setkey = ablkcipher_aes_setkey,
}
},
@@ -2862,7 +2861,6 @@ static struct talitos_alg_template driver_algs[] = {
.cra_ablkcipher = {
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
-   .ivsize = DES_BLOCK_SIZE,
.setkey = ablkcipher_des_setkey,
}
},
@@ -2897,7 +2895,6 @@ static struct talitos_alg_template driver_algs[] = {
.cra_ablkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
-   .ivsize = DES3_EDE_BLOCK_SIZE,
.setkey = ablkcipher_des3_setkey,
}
},
-- 
2.13.3



[PATCH v1 05/15] crypto: talitos - fix CTR alg blocksize

2019-05-21 Thread Christophe Leroy
CTR has a blocksize of 1.

Signed-off-by: Christophe Leroy 
Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
---
 drivers/crypto/talitos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 95f71e18bf55..8b9a529f1b66 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2822,7 +2822,7 @@ static struct talitos_alg_template driver_algs[] = {
.alg.crypto = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-talitos",
-   .cra_blocksize = AES_BLOCK_SIZE,
+   .cra_blocksize = 1,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
 CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
-- 
2.13.3



[PATCH v1 04/15] crypto: talitos - check AES key size

2019-05-21 Thread Christophe Leroy
Although the HW accepts any size and silently truncates
it to the correct length, the extra tests expects EINVAL
to be returned when the key size is not valid.

Signed-off-by: Christophe Leroy 
Fixes: 4de9d0b547b9 ("crypto: talitos - Add ablkcipher algorithms")
---
 drivers/crypto/talitos.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6312f8d501b1..95f71e18bf55 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1622,6 +1622,18 @@ static int ablkcipher_des3_setkey(struct 
crypto_ablkcipher *cipher,
return ablkcipher_setkey(cipher, key, keylen);
 }
 
+static int ablkcipher_aes_setkey(struct crypto_ablkcipher *cipher,
+ const u8 *key, unsigned int keylen)
+{
+   if (keylen == AES_KEYSIZE_128 || keylen == AES_KEYSIZE_192 ||
+   keylen == AES_KEYSIZE_256)
+   return ablkcipher_setkey(cipher, key, keylen);
+
+   crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+   return -EINVAL;
+}
+
 static void common_nonsnoop_unmap(struct device *dev,
  struct talitos_edesc *edesc,
  struct ablkcipher_request *areq)
@@ -2782,6 +2794,7 @@ static struct talitos_alg_template driver_algs[] = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+   .setkey = ablkcipher_aes_setkey,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2798,6 +2811,7 @@ static struct talitos_alg_template driver_algs[] = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+   .setkey = ablkcipher_aes_setkey,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2815,6 +2829,7 @@ static struct talitos_alg_template driver_algs[] = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+   .setkey = ablkcipher_aes_setkey,
}
},
.desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP |
-- 
2.13.3



[PATCH v1 02/15] crypto: talitos - rename alternative AEAD algos.

2019-05-21 Thread Christophe Leroy
The talitos driver has two ways to perform AEAD depending on the
HW capability. Some HW support both. It is needed to give them
different names to distingish which one it is for instance when
a test fails.

Signed-off-by: Christophe Leroy 
Fixes: 7405c8d7ff97 ("crypto: talitos - templates for AEAD using 
HMAC_SNOOP_NO_AFEU")
Cc: sta...@vger.kernel.org
---
 drivers/crypto/talitos.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index f443cbe7da80..6f8bc6467706 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2356,7 +2356,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha1),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha1-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2401,7 +2401,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha1),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha1-"
-  "cbc-3des-talitos",
+  "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2444,7 +2444,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha224),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha224-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2489,7 +2489,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha224),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha224-"
-  "cbc-3des-talitos",
+  "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2532,7 +2532,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha256),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha256-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2577,7 +2577,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha256),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha256-"
-  "cbc-3des-talitos",
+  "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2706,7 +2706,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(aes))",
.cra_driver_name = "authenc-hmac-md5-"
-  "cbc-aes-talitos",
+  "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2749,7 +2749,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-md5-"
-  "cbc-3des-talitos",
+  

[PATCH v1 01/15] crypto: talitos - fix skcipher failure due to wrong output IV

2019-05-21 Thread Christophe Leroy
Selftests report the following:

[2.984845] alg: skcipher: cbc-aes-talitos encryption test failed (wrong 
output IV) on test vector 0, cfg="in-place"
[2.995377] : 3d af ba 42 9d 9e b4 30 b4 22 da 80 2c 9f ac 41
[3.032673] alg: skcipher: cbc-des-talitos encryption test failed (wrong 
output IV) on test vector 0, cfg="in-place"
[3.043185] : fe dc ba 98 76 54 32 10
[3.063238] alg: skcipher: cbc-3des-talitos encryption test failed (wrong 
output IV) on test vector 0, cfg="in-place"
[3.073818] : 7d 33 88 93 0f 93 b2 42

This above dumps show that the actual output IV is indeed the input IV.
This is due to the IV not being copied back into the request.

This patch fixes that.

Signed-off-by: Christophe Leroy 
Fixes: 4de9d0b547b9 ("crypto: talitos - Add ablkcipher algorithms")
Cc: sta...@vger.kernel.org
Reviewed-by: Horia Geanta 
---
 drivers/crypto/talitos.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 1d429fc073d1..f443cbe7da80 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1637,11 +1637,15 @@ static void ablkcipher_done(struct device *dev,
int err)
 {
struct ablkcipher_request *areq = context;
+   struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+   struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+   unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
struct talitos_edesc *edesc;
 
edesc = container_of(desc, struct talitos_edesc, desc);
 
common_nonsnoop_unmap(dev, edesc, areq);
+   memcpy(areq->info, ctx->iv, ivsize);
 
kfree(edesc);
 
-- 
2.13.3



[PATCH v1 03/15] crypto: talitos - reduce max key size for SEC1

2019-05-21 Thread Christophe Leroy
SEC1 doesn't support SHA384/512, so it doesn't require
longer keys.

This patch reduces the max key size when the driver
is built for SEC1 only.

Signed-off-by: Christophe Leroy 
Fixes: 03d2c5114c95 ("crypto: talitos - Extend max key length for 
SHA384/512-HMAC and AEAD")
---
 drivers/crypto/talitos.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6f8bc6467706..6312f8d501b1 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -837,7 +837,11 @@ static void talitos_unregister_rng(struct device *dev)
  * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP
  */
 #define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1)
+#ifdef CONFIG_CRYPTO_DEV_TALITOS_SEC2
 #define TALITOS_MAX_KEY_SIZE   (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE)
+#else
+#define TALITOS_MAX_KEY_SIZE   (AES_MAX_KEY_SIZE + SHA256_BLOCK_SIZE)
+#endif
 #define TALITOS_MAX_IV_LENGTH  16 /* max of AES_BLOCK_SIZE, 
DES3_EDE_BLOCK_SIZE */
 
 struct talitos_ctx {
-- 
2.13.3



[PATCH v1 00/15] Fixing selftests failure on Talitos driver

2019-05-21 Thread Christophe Leroy
Several test failures have popped up following recent changes to crypto
selftests.

This series fixes (most of) them.

The last three patches are trivial cleanups.

Christophe Leroy (15):
  crypto: talitos - fix skcipher failure due to wrong output IV
  crypto: talitos - rename alternative AEAD algos.
  crypto: talitos - reduce max key size for SEC1
  crypto: talitos - check AES key size
  crypto: talitos - fix CTR alg blocksize
  crypto: talitos - check data blocksize in ablkcipher.
  crypto: talitos - fix ECB algs ivsize
  crypto: talitos - Do not modify req->cryptlen on decryption.
  crypto: talitos - HMAC SNOOP NO AFEU mode requires SW icv checking.
  crypto: talitos - properly handle split ICV.
  crypto: talitos - Align SEC1 accesses to 32 bits boundaries.
  crypto: talitos - fix AEAD processing.
  Revert "crypto: talitos - export the talitos_submit function"
  crypto: talitos - use IS_ENABLED() in has_ftr_sec1()
  crypto: talitos - use SPDX-License-Identifier

 drivers/crypto/talitos.c | 281 ++-
 drivers/crypto/talitos.h |  45 ++--
 2 files changed, 139 insertions(+), 187 deletions(-)

-- 
2.13.3



Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Christian Brauner
On Tue, May 21, 2019 at 03:10:11PM +0200, Florian Weimer wrote:
> * Christian Brauner:
> 
> >> Solaris has an fdwalk function:
> >> 
> >>   
> >> 
> >> So a different way to implement this would expose a nextfd system call
> >
> > Meh. If nextfd() then I would like it to be able to:
> > - get the nextfd(fd) >= fd
> > - get highest open fd e.g. nextfd(-1)
> 
> The highest open descriptor isn't istering for fdwalk because nextfd
> would just fail.

Sure. I was thinking about other usecases. For example, sometimes in
userspace you want to do the following:
save_fd = dup(fd,  
> > But then I wonder if nextfd() needs to be a syscall and isn't just
> > either:
> > fcntl(fd, F_GET_NEXT)?
> > or
> > prctl(PR_GET_NEXT)?
> 
> I think the fcntl route is a bit iffy because you might need it to get
> the *first* valid descriptor.
> 
> >> to userspace, so that we can use that to implement both fdwalk and
> >> closefrom.  But maybe fdwalk is just too obscure, given the existence of
> >> /proc.
> >
> > Yeah we probably don't need fdwalk.
> 
> Agreed.  Just wanted to bring it up for completeness.  I certainly don't
> want to derail the implementation of close_range.
> 
> Thanks,
> Florian


Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Christian Brauner
On Tue, May 21, 2019 at 03:10:11PM +0200, Florian Weimer wrote:
> * Christian Brauner:
> 
> >> Solaris has an fdwalk function:
> >> 
> >>   
> >> 
> >> So a different way to implement this would expose a nextfd system call
> >
> > Meh. If nextfd() then I would like it to be able to:
> > - get the nextfd(fd) >= fd
> > - get highest open fd e.g. nextfd(-1)
> 
> The highest open descriptor isn't istering for fdwalk because nextfd
> would just fail.
> 
> > But then I wonder if nextfd() needs to be a syscall and isn't just
> > either:
> > fcntl(fd, F_GET_NEXT)?
> > or
> > prctl(PR_GET_NEXT)?
> 
> I think the fcntl route is a bit iffy because you might need it to get
> the *first* valid descriptor.

Oh, how would that be difficult? Maybe I'm missing context.
Couldn't you just do

fcntl(0, F_GET_NEXT)

> 
> >> to userspace, so that we can use that to implement both fdwalk and
> >> closefrom.  But maybe fdwalk is just too obscure, given the existence of
> >> /proc.
> >
> > Yeah we probably don't need fdwalk.
> 
> Agreed.  Just wanted to bring it up for completeness.  I certainly don't
> want to derail the implementation of close_range.

No, that's perfectly fine. I mean, you clearly need this and are one of
the major stakeholders. For example, Rust (probably also Python) will
call down into libc and not use the syscall directly. They kinda do this
with getfdtable rn already.
So what you say makes sense for libc has some relevance for the other
tools as well.

Christian


[PATCH v2] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Masahiro Yamada
With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
with gcc 9.1.1:

  arch/powerpc/mm/book3s64/radix_tlb.c: In function '_tlbiel_pid':
  arch/powerpc/mm/book3s64/radix_tlb.c:104:2: warning: asm operand 3 probably 
doesn't match constraints
104 |  asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
|  ^~~
  arch/powerpc/mm/book3s64/radix_tlb.c:104:2: error: impossible constraint in 
'asm'

Fixing _tlbiel_pid() is enough to address the warning above, but I
inlined more functions to fix all potential issues.

To meet the "i" (immediate) constraint for the asm operands, functions
propagating "ric" must be always inlined.

Fixes: 9012d011660e ("compiler: allow all arches to enable 
CONFIG_OPTIMIZE_INLINING")
Reported-by: Laura Abbott 
Signed-off-by: Masahiro Yamada 
---

Changes in v2:
  - Do not split lines

 arch/powerpc/mm/book3s64/hash_native.c |  2 +-
 arch/powerpc/mm/book3s64/radix_tlb.c   | 32 
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_native.c 
b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd..c854151 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -60,7 +60,7 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, 
unsigned int is)
  * tlbiel instruction for hash, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
  */
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned 
int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
 {
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4d84136..4d3dc10 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -29,7 +29,7 @@
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
  */
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned 
int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
 {
@@ -150,8 +150,8 @@ static __always_inline void __tlbie_lpid(unsigned long 
lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 }
 
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
-   unsigned long ric)
+static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
+   unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -167,8 +167,8 @@ static inline void __tlbiel_lpid_guest(unsigned long lpid, 
int set,
 }
 
 
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,
-  unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+   unsigned long ap, unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -183,8 +183,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned 
long pid,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
 }
 
-static inline void __tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+  unsigned long ap, unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -199,8 +199,8 @@ static inline void __tlbie_va(unsigned long va, unsigned 
long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long 
lpid,
+   unsigned long ap, unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -239,7 +239,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
 /*
  * We use 128 set in radix mode and 256 set in hpt mode.
  */
-static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 {
int set;
 
@@ -341,7 +341,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned 
long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
-static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
+static __always_inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned 
long ric)
 {
int set;
 
@@ -381,8 +381,8 @@ static inline void __tlbiel_va_range(unsigned long start, 
unsigned long end,
__tlbiel_va(addr, pid, 

Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Florian Weimer
* Christian Brauner:

>> Solaris has an fdwalk function:
>> 
>>   
>> 
>> So a different way to implement this would expose a nextfd system call
>
> Meh. If nextfd() then I would like it to be able to:
> - get the nextfd(fd) >= fd
> - get highest open fd e.g. nextfd(-1)

The highest open descriptor isn't istering for fdwalk because nextfd
would just fail.

> But then I wonder if nextfd() needs to be a syscall and isn't just
> either:
> fcntl(fd, F_GET_NEXT)?
> or
> prctl(PR_GET_NEXT)?

I think the fcntl route is a bit iffy because you might need it to get
the *first* valid descriptor.

>> to userspace, so that we can use that to implement both fdwalk and
>> closefrom.  But maybe fdwalk is just too obscure, given the existence of
>> /proc.
>
> Yeah we probably don't need fdwalk.

Agreed.  Just wanted to bring it up for completeness.  I certainly don't
want to derail the implementation of close_range.

Thanks,
Florian


Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Christian Brauner
On Tue, May 21, 2019 at 02:09:29PM +0200, Florian Weimer wrote:
> * Christian Brauner:
> 
> > +/**
> > + * __close_range() - Close all file descriptors in a given range.
> > + *
> > + * @fd: starting file descriptor to close
> > + * @max_fd: last file descriptor to close
> > + *
> > + * This closes a range of file descriptors. All file descriptors
> > + * from @fd up to and including @max_fd are closed.
> > + */
> > +int __close_range(struct files_struct *files, unsigned fd, unsigned max_fd)
> > +{
> > +   unsigned int cur_max;
> > +
> > +   if (fd > max_fd)
> > +   return -EINVAL;
> > +
> > +   rcu_read_lock();
> > +   cur_max = files_fdtable(files)->max_fds;
> > +   rcu_read_unlock();
> > +
> > +   /* cap to last valid index into fdtable */
> > +   if (max_fd >= cur_max)
> > +   max_fd = cur_max - 1;
> > +
> > +   while (fd <= max_fd)
> > +   __close_fd(files, fd++);
> > +
> > +   return 0;
> > +}
> 
> This seems rather drastic.  How long does this block in kernel mode?
> Maybe it's okay as long as the maximum possible value for cur_max stays
> around 4 million or so.

That's probably valid concern when you reach very high numbers though I
wonder how relevant this is in practice.
Also, you would only be blocking yourself I imagine, i.e. you can't DOS
another task with this unless your multi-threaded.

> 
> Solaris has an fdwalk function:
> 
>   
> 
> So a different way to implement this would expose a nextfd system call

Meh. If nextfd() then I would like it to be able to:
- get the nextfd(fd) >= fd
- get highest open fd e.g. nextfd(-1)

But then I wonder if nextfd() needs to be a syscall and isn't just
either:
fcntl(fd, F_GET_NEXT)?
or
prctl(PR_GET_NEXT)?

Technically, one could also do:

fd_range(unsigned fd, unsigend end_fd, unsigned flags);

fd_range(3, 50, FD_RANGE_CLOSE);

/* return highest fd within the range [3, 50] */
fd_range(3, 50, FD_RANGE_NEXT);

/* return highest fd */
fd_range(3, UINT_MAX, FD_RANGE_NEXT);

This syscall could also reasonably be extended.

> to userspace, so that we can use that to implement both fdwalk and
> closefrom.  But maybe fdwalk is just too obscure, given the existence of
> /proc.

Yeah we probably don't need fdwalk.

> 
> I'll happily implement closefrom on top of close_range in glibc (plus
> fallback for older kernels based on /proc—with an abort in case that
> doesn't work because the RLIMIT_NOFILE hack is unreliable
> unfortunately).
> 
> Thanks,
> Florian


Re: [PATCH v7 1/1] iommu: enhance IOMMU dma mode build options

2019-05-21 Thread John Garry

On 20/05/2019 14:59, Zhen Lei wrote:

First, add build option IOMMU_DEFAULT_{LAZY|STRICT}, so that we have the
opportunity to set {lazy|strict} mode as default at build time. Then put
the three config options in an choice, make people can only choose one of
the three at a time.

The default IOMMU dma modes on each ARCHs have no change.

Signed-off-by: Zhen Lei 


Apart from more minor comments, FWIW:

Reviewed-by: John Garry 


---
 arch/ia64/kernel/pci-dma.c|  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  3 ++-
 arch/s390/pci/pci_dma.c   |  2 +-
 arch/x86/kernel/pci-dma.c |  7 ++---
 drivers/iommu/Kconfig | 44 ++-
 drivers/iommu/amd_iommu_init.c|  3 ++-
 drivers/iommu/intel-iommu.c   |  2 +-
 drivers/iommu/iommu.c |  3 ++-
 8 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index fe988c49f01ce6a..655511dbf3c3b34 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -22,7 +22,7 @@
 int force_iommu __read_mostly;
 #endif

-int iommu_pass_through;
+int iommu_pass_through = IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH);


As commented privately, I could never see this set for ia64, and it 
seems to exist just to keep the linker happy. Anyway, I am not sure if 
ever suitable to be set.




 static int __init pci_iommu_init(void)
 {
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3ead4c237ed0ec9..383e082a9bb985c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -85,7 +85,8 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char 
*level,
va_end(args);
 }

-static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pnv_iommu_bypass_disabled __read_mostly =
+   !IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH);
 static bool pci_reset_phbs __read_mostly;

 static int __init iommu_setup(char *str)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9e52d1527f71495..784ad1e0acecfb1 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -17,7 +17,7 @@

 static struct kmem_cache *dma_region_table_cache;
 static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
+static int s390_iommu_strict = IS_ENABLED(CONFIG_IOMMU_DEFAULT_STRICT);

 static int zpci_refresh_global(struct zpci_dev *zdev)
 {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index d460998ae828514..fb2bab42a0a3173 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -43,11 +43,8 @@
  * It is also possible to disable by default in kernel config, and enable with
  * iommu=nopt at boot time.
  */
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-int iommu_pass_through __read_mostly = 1;
-#else
-int iommu_pass_through __read_mostly;
-#endif
+int iommu_pass_through __read_mostly =
+   IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH);

 extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6f07f3b21816c64..8a1f1793cde76b4 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -74,17 +74,47 @@ config IOMMU_DEBUGFS
  debug/iommu directory, and then populate a subdirectory with
  entries as required.

-config IOMMU_DEFAULT_PASSTHROUGH
-   bool "IOMMU passthrough by default"
+choice
+   prompt "IOMMU default DMA mode"
depends on IOMMU_API
-help
- Enable passthrough by default, removing the need to pass in
- iommu.passthrough=on or iommu=pt through command line. If this
- is enabled, you can still disable with iommu.passthrough=off
- or iommu=nopt depending on the architecture.
+   default IOMMU_DEFAULT_PASSTHROUGH if (PPC_POWERNV && PCI)
+   default IOMMU_DEFAULT_LAZY if (AMD_IOMMU || INTEL_IOMMU || S390_IOMMU)
+   default IOMMU_DEFAULT_STRICT
+   help
+ This option allows IOMMU DMA mode to be chose at build time, to


I'd say /s/allows IOMMU/allows an IOMMU/, /s/chose/chosen/


+ override the default DMA mode of each ARCHs, removing the need to


ARCHs should be singular


+ pass in kernel parameters through command line. You can still use
+ ARCHs specific boot options to override this option again.
+
+config IOMMU_DEFAULT_PASSTHROUGH
+   bool "passthrough"
+   help
+ In this mode, the DMA access through IOMMU without any addresses
+ translation. That means, the wrong or illegal DMA access can not
+ be caught, no error information will be reported.

  If unsure, say N here.

+config IOMMU_DEFAULT_LAZY
+   bool "lazy"
+   help
+ Support lazy mode, where for every IOMMU DMA unmap operation, the
+ flush operation 

Re: [PATCH 1/2] open: add close_range()

2019-05-21 Thread Florian Weimer
* Christian Brauner:

> +/**
> + * __close_range() - Close all file descriptors in a given range.
> + *
> + * @fd: starting file descriptor to close
> + * @max_fd: last file descriptor to close
> + *
> + * This closes a range of file descriptors. All file descriptors
> + * from @fd up to and including @max_fd are closed.
> + */
> +int __close_range(struct files_struct *files, unsigned fd, unsigned max_fd)
> +{
> + unsigned int cur_max;
> +
> + if (fd > max_fd)
> + return -EINVAL;
> +
> + rcu_read_lock();
> + cur_max = files_fdtable(files)->max_fds;
> + rcu_read_unlock();
> +
> + /* cap to last valid index into fdtable */
> + if (max_fd >= cur_max)
> + max_fd = cur_max - 1;
> +
> + while (fd <= max_fd)
> + __close_fd(files, fd++);
> +
> + return 0;
> +}

This seems rather drastic.  How long does this block in kernel mode?
Maybe it's okay as long as the maximum possible value for cur_max stays
around 4 million or so.

Solaris has an fdwalk function:

  

So a different way to implement this would expose a nextfd system call
to userspace, so that we can use that to implement both fdwalk and
closefrom.  But maybe fdwalk is just too obscure, given the existence of
/proc.

I'll happily implement closefrom on top of close_range in glibc (plus
fallback for older kernels based on /proc—with an abort in case that
doesn't work because the RLIMIT_NOFILE hack is unreliable
unfortunately).

Thanks,
Florian


Re: [PATCH] misc: remove redundant 'default n' from Kconfig-s

2019-05-21 Thread Michael Ellerman
Bartlomiej Zolnierkiewicz  writes:
> 'default n' is the default value for any bool or tristate Kconfig
> setting so there is no need to write it explicitly.
>
> Also since commit f467c5640c29 ("kconfig: only write '# CONFIG_FOO
> is not set' for visible symbols") the Kconfig behavior is the same
> regardless of 'default n' being present or not:
>
> ...
> One side effect of (and the main motivation for) this change is making
> the following two definitions behave exactly the same:
> 
> config FOO
> bool
> 
> config FOO
> bool
> default n
> 
> With this change, neither of these will generate a
> '# CONFIG_FOO is not set' line (assuming FOO isn't selected/implied).
> That might make it clearer to people that a bare 'default n' is
> redundant.
> ...
>
> Signed-off-by: Bartlomiej Zolnierkiewicz 
...
> Index: b/drivers/misc/ocxl/Kconfig
> ===
> --- a/drivers/misc/ocxl/Kconfig
> +++ b/drivers/misc/ocxl/Kconfig
> @@ -4,7 +4,6 @@
>  
>  config OCXL_BASE
>   bool
> - default n
>   select PPC_COPRO_BASE
>  
>  config OCXL

Acked-by: Michael Ellerman  (ocxl)

cheers


Re: [PATCH] powerpc/powernv: Return for invalid IMC domain

2019-05-21 Thread Michael Ellerman
Anju T Sudhakar  writes:
> Currently init_imc_pmu() can be failed either because
> an IMC unit with invalid domain(i.e an IMC node not
> supported by the kernel) is attempted a pmu-registration
> or something went wrong while registering a valid IMC unit.
> In both the cases kernel provides a 'Registration failed'
> error message.
>
> Example:
> Log message, when trace-imc node is not supported by the kernel, and the
> skiboot supports trace-imc node.
>
> So for kernel, trace-imc node is now an unknown domain.
>
> [1.731870] nest_phb5_imc performance monitor hardware support registered
> [1.731944] nest_powerbus0_imc performance monitor hardware support 
> registered
> [1.734458] thread_imc performance monitor hardware support registered
> [1.734460] IMC Unknown Device type
> [1.734462] IMC PMU (null) Register failed
> [1.734558] nest_xlink0_imc performance monitor hardware support registered
> [1.734614] nest_xlink1_imc performance monitor hardware support registered
> [1.734670] nest_xlink2_imc performance monitor hardware support registered
> [1.747043] Initialise system trusted keyrings
> [1.747054] Key type blacklist registered
>
>
> To avoid ambiguity on the error message, return for invalid domain
> before attempting a pmu registration. 

What do we print once the patch is applied?

cheers

> Fixes: 8f95faaac56c1 (`powerpc/powernv: Detect and create IMC device`)
> Reported-by: Pavaman Subramaniyam 
> Signed-off-by: Anju T Sudhakar 
> ---
>  arch/powerpc/platforms/powernv/opal-imc.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
> b/arch/powerpc/platforms/powernv/opal-imc.c
> index 58a0794..4e8b0e1 100644
> --- a/arch/powerpc/platforms/powernv/opal-imc.c
> +++ b/arch/powerpc/platforms/powernv/opal-imc.c
> @@ -161,6 +161,10 @@ static int imc_pmu_create(struct device_node *parent, 
> int pmu_index, int domain)
>   struct imc_pmu *pmu_ptr;
>   u32 offset;
>  
> + /* Return for unknown domain */
> + if (domain < 0)
> + return -EINVAL;
> +
>   /* memory for pmu */
>   pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
>   if (!pmu_ptr)
> -- 
> 1.8.3.1


[PATCH 2/2] tests: add close_range() tests

2019-05-21 Thread Christian Brauner
This adds basic tests for the new close_range() syscall.
- test that no invalid flags can be passed
- test that a range of file descriptors is correctly closed
- test that a range of file descriptors is correctly closed if there there
  are already closed file descriptors in the range
- test that max_fd is correctly capped to the current fdtable maximum

Signed-off-by: Christian Brauner 
Cc: Arnd Bergmann 
Cc: Jann Horn 
Cc: David Howells 
Cc: Dmitry V. Levin 
Cc: Oleg Nesterov 
Cc: Florian Weimer 
Cc: linux-...@vger.kernel.org
---
 tools/testing/selftests/Makefile  |   1 +
 tools/testing/selftests/core/.gitignore   |   1 +
 tools/testing/selftests/core/Makefile |   6 +
 .../testing/selftests/core/close_range_test.c | 128 ++
 4 files changed, 136 insertions(+)
 create mode 100644 tools/testing/selftests/core/.gitignore
 create mode 100644 tools/testing/selftests/core/Makefile
 create mode 100644 tools/testing/selftests/core/close_range_test.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 9781ca79794a..06e57fabbff9 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -4,6 +4,7 @@ TARGETS += bpf
 TARGETS += breakpoints
 TARGETS += capabilities
 TARGETS += cgroup
+TARGETS += core
 TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += drivers/dma-buf
diff --git a/tools/testing/selftests/core/.gitignore 
b/tools/testing/selftests/core/.gitignore
new file mode 100644
index ..6e6712ce5817
--- /dev/null
+++ b/tools/testing/selftests/core/.gitignore
@@ -0,0 +1 @@
+close_range_test
diff --git a/tools/testing/selftests/core/Makefile 
b/tools/testing/selftests/core/Makefile
new file mode 100644
index ..de3ae68aa345
--- /dev/null
+++ b/tools/testing/selftests/core/Makefile
@@ -0,0 +1,6 @@
+CFLAGS += -g -I../../../../usr/include/ -I../../../../include
+
+TEST_GEN_PROGS := close_range_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/core/close_range_test.c 
b/tools/testing/selftests/core/close_range_test.c
new file mode 100644
index ..ab10cd205ab9
--- /dev/null
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../kselftest.h"
+
+static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
+ unsigned int flags)
+{
+   return syscall(__NR_close_range, fd, max_fd, flags);
+}
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+int main(int argc, char **argv)
+{
+   const char *test_name = "close_range";
+   int i, ret;
+   int open_fds[100];
+   int fd_max, fd_mid, fd_min;
+
+   ksft_set_plan(7);
+
+   for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+   int fd;
+
+   fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+   if (fd < 0) {
+   if (errno == ENOENT)
+   ksft_exit_skip(
+   "%s test: skipping test since /dev/null 
does not exist\n",
+   test_name);
+
+   ksft_exit_fail_msg(
+   "%s test: %s - failed to open /dev/null\n",
+   strerror(errno), test_name);
+   }
+
+   open_fds[i] = fd;
+   }
+
+   fd_min = open_fds[0];
+   fd_max = open_fds[99];
+
+   ret = sys_close_range(fd_min, fd_max, 1);
+   if (!ret)
+   ksft_exit_fail_msg(
+   "%s test: managed to pass invalid flag value\n",
+   test_name);
+   ksft_test_result_pass("do not allow invalid flag values for 
close_range()\n");
+
+   fd_mid = open_fds[50];
+   ret = sys_close_range(fd_min, fd_mid, 0);
+   if (ret < 0)
+   ksft_exit_fail_msg(
+   "%s test: Failed to close range of file descriptors 
from 4 to 50\n",
+   test_name);
+   ksft_test_result_pass("close_range() from %d to %d\n", fd_min, fd_mid);
+
+   for (i = 0; i <= 50; i++) {
+   ret = fcntl(open_fds[i], F_GETFL);
+   if (ret >= 0)
+   ksft_exit_fail_msg(
+   "%s test: Failed to close range of file 
descriptors from 4 to 50\n",
+   test_name);
+   }
+   ksft_test_result_pass("fcntl() verify closed range from %d to %d\n", 
fd_min, fd_mid);
+
+   /* create a couple of gaps */
+   close(57);
+   close(78);
+   close(81);
+   close(82);
+   close(84);
+   close(90);
+
+   fd_mid = open_fds[51];
+   /* Choose slightly lower limit and leave some fds for a later test */
+   fd_max = open_fds[92];
+   ret 

[PATCH 1/2] open: add close_range()

2019-05-21 Thread Christian Brauner
This adds the close_range() syscall. It allows to efficiently close a range
of file descriptors up to all file descriptors of a calling task.

The syscall came up in a recent discussion around the new mount API and
making new file descriptor types cloexec by default. During this
discussion, Al suggested the close_range() syscall (cf. [1]). Note, a
syscall in this manner has been requested by various people over time.

First, it helps to close all file descriptors of an exec()ing task. This
can be done safely via (quoting Al's example from [1] verbatim):

/* that exec is sensitive */
unshare(CLONE_FILES);
/* we don't want anything past stderr here */
close_range(3, ~0U);
execve();

The code snippet above is one way of working around the problem that file
descriptors are not cloexec by default. This is aggravated by the fact that
we can't just switch them over without massively regressing userspace. For
a whole class of programs having an in-kernel method of closing all file
descriptors is very helpful (e.g. demons, service managers, programming
language standard libraries, container managers etc.).
(Please note, unshare(CLONE_FILES) should only be needed if the calling
 task is multi-threaded and shares the file descriptor table with another
 thread in which case two threads could race with one thread allocating
 file descriptors and the other one closing them via close_range(). For the
 general case close_range() before the execve() is sufficient.)

Second, it allows userspace to avoid implementing closing all file
descriptors by parsing through /proc//fd/* and calling close() on each
file descriptor. From looking at various large(ish) userspace code bases
this or similar patterns are very common in:
- service managers (cf. [4])
- libcs (cf. [6])
- container runtimes (cf. [5])
- programming language runtimes/standard libraries
  - Python (cf. [2])
  - Rust (cf. [7], [8])
As Dmitry pointed out there's even a long-standing glibc bug about missing
kernel support for this task (cf. [3]).
In addition, the syscall will also work for tasks that do not have procfs
mounted and on kernels that do not have procfs support compiled in. In such
situations the only way to make sure that all file descriptors are closed
is to call close() on each file descriptor up to UINT_MAX or RLIMIT_NOFILE,
OPEN_MAX trickery (cf. comment [8] on Rust).

The performance is striking. For good measure, comparing the following
simple close_all_fds() userspace implementation that is essentially just
glibc's version in [6]:

static int close_all_fds(void)
{
DIR *dir;
struct dirent *direntp;

dir = opendir("/proc/self/fd");
if (!dir)
return -1;

while ((direntp = readdir(dir))) {
int fd;
if (strcmp(direntp->d_name, ".") == 0)
continue;
if (strcmp(direntp->d_name, "..") == 0)
continue;
fd = atoi(direntp->d_name);
if (fd == 0 || fd == 1 || fd == 2)
continue;
close(fd);
}

closedir(dir); /* cannot fail */
return 0;
}

to close_range() yields:
1. closing 4 open files:
   - close_all_fds(): ~280 us
   - close_range():~24 us

2. closing 1000 open files:
   - close_all_fds(): ~5000 us
   - close_range():   ~800 us

close_range() is designed to allow for some flexibility. Specifically, it
does not simply always close all open file descriptors of a task. Instead,
callers can specify an upper bound.
This is e.g. useful for scenarios where specific file descriptors are
created with well-known numbers that are supposed to be excluded from
getting closed.
For extra paranoia close_range() comes with a flags argument. This can e.g.
be used to implement extension. Once can imagine userspace wanting to stop
at the first error instead of ignoring errors under certain circumstances.
There might be other valid ideas in the future. In any case, a flag
argument doesn't hurt and keeps us on the safe side.

>From an implementation side this is kept rather dumb. It saw some input
from David and Jann but all nonsense is obviously my own!
- Errors to close file descriptors are currently ignored. (Could be changed
  by setting a flag in the future if needed.)
- __close_range() is a rather simplistic wrapper around __close_fd().
  My reasoning behind this is based on the nature of how __close_fd() needs
  to release an fd. But maybe I misunderstood specifics:
  We take the files_lock and rcu-dereference the fdtable of the calling
  task, we find the entry in the fdtable, get the file and need to release
  files_lock before calling filp_close().
  In the meantime the fdtable might have been altered so we can't just
  retake the spinlock and keep the old rcu-reference of the fdtable
  around. Instead we need to grab a fresh reference to the fdtable.
  If my reasoning is correct then 

Re: [PATCH v2] powerpc/mm/hash: Fix get_region_id() for invalid addresses

2019-05-21 Thread Michael Ellerman
On Fri, 2019-05-17 at 13:29:58 UTC, Michael Ellerman wrote:
> From: "Aneesh Kumar K.V" 
> 
> Accesses by userspace to random addresses outside the user or kernel
> address range will generate an SLB fault. When we handle that fault we
> classify the effective address into several classes, eg. user, kernel
> linear, kernel virtual etc.
> 
> For addresses that are completely outside of any valid range, we
> should not insert an SLB entry at all, and instead immediately an
> exception.
> 
> In the past this was handled in two ways. Firstly we would check the
> top nibble of the address (using REGION_ID(ea)) and that would tell us
> if the address was user (0), kernel linear (c), kernel virtual (d), or
> vmemmap (f). If the address didn't match any of these it was invalid.
> 
> Then for each type of address we would do a secondary check. For the
> user region we check against H_PGTABLE_RANGE, for kernel linear we
> would mask the top nibble of the address and then check the address
> against MAX_PHYSMEM_BITS.
> 
> As part of commit 0034d395f89d ("powerpc/mm/hash64: Map all the kernel
> regions in the same 0xc range") we replaced REGION_ID() with
> get_region_id() and changed the masking of the top nibble to only mask
> the top two bits, which introduced a bug.
> 
> Addresses less than (4 << 60) are still handled correctly, they are
> either less than (1 << 60) in which case they are subject to the
> H_PGTABLE_RANGE check, or they are correctly checked against
> MAX_PHYSMEM_BITS.
> 
> However addresses from (4 << 60) to ((0xc << 60) - 1), are incorrectly
> treated as kernel linear addresses in get_region_id(). Then the top
> two bits are cleared by EA_MASK in slb_allocate_kernel() and the
> address is checked against MAX_PHYSMEM_BITS, which it passes due to
> the masking. The end result is we incorrectly insert SLB entries for
> those addresses.
> 
> That is not actually catastrophic, having inserted the SLB entry we
> will then go on to take a page fault for the address and at that point
> we detect the problem and report it as a bad fault.
> 
> Still we should not be inserting those entries, or treating them as
> kernel linear addresses in the first place. So fix get_region_id() to
> detect addresses in that range and return an invalid region id, which
> we cause use to not insert an SLB entry and directly report an
> exception.
> 
> Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the 
> same 0xc range")
> Signed-off-by: Aneesh Kumar K.V 
> [mpe: Drop change to EA_MASK for now, rewrite change log]
> Signed-off-by: Michael Ellerman 

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/c179976cf4cbd2e65f29741d5bc07ccf

cheers


Re: [PATCH] powerpc/mm/hash: Improve address limit checks

2019-05-21 Thread Michael Ellerman
Michael Ellerman  writes:
> On Thu, 2019-05-16 at 11:50:54 UTC, "Aneesh Kumar K.V" wrote:
>> Different parts of the code do the limit check by ignoring the top nibble
>> of EA. ie. we do checks like
>> 
>>  if ((ea & EA_MASK)  >= H_PGTABLE_RANGE)
>>  error
>> 
>> This patch makes sure we don't insert SLB entries for addresses whose top 
>> nibble
>> doesn't match the ignored bits.
>> 
>> With an address like 0x4800, we can result in wrong slb entries 
>> like
>> 
>> 13 4800 400ea1b217000510   1T ESID=   40 VSID=   ea1b217000 
>> LLP:110
>> 
>> without this patch we will map that EA with LINEAR_MAP_REGION_ID and further
>> those addr limit check will return false.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>
> Applied to powerpc fixes, thanks.
>
> https://git.kernel.org/powerpc/c/c179976cf4cbd2e65f29741d5bc07ccf

Actually this patch was superseeded. This should have been a reply to:

  
https://lore.kernel.org/linuxppc-dev/20190517132958.22299-1-...@ellerman.id.au/

cheers


Re: [PATCH] mm/nvdimm: Use correct #defines instead of opencoding

2019-05-21 Thread Aneesh Kumar K.V
Dan Williams  writes:

> On Mon, May 13, 2019 at 9:46 PM Aneesh Kumar K.V
>  wrote:
>>
>> On 5/14/19 9:42 AM, Dan Williams wrote:
>> > On Mon, May 13, 2019 at 9:05 PM Aneesh Kumar K.V
>> >  wrote:
>> >>
>> >> On 5/14/19 9:28 AM, Dan Williams wrote:
>> >>> On Mon, May 13, 2019 at 7:56 PM Aneesh Kumar K.V
>> >>>  wrote:
>> 
>>  The nfpn related change is needed to fix the kernel message
>> 
>>  "number of pfns truncated from 2617344 to 163584"
>> 
>>  The change makes sure the nfpns stored in the superblock is right value.
>> 
>>  Signed-off-by: Aneesh Kumar K.V 
>>  ---
>> drivers/nvdimm/pfn_devs.c| 6 +++---
>> drivers/nvdimm/region_devs.c | 8 
>> 2 files changed, 7 insertions(+), 7 deletions(-)
>> 
>>  diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
>>  index 347cab166376..6751ff0296ef 100644
>>  --- a/drivers/nvdimm/pfn_devs.c
>>  +++ b/drivers/nvdimm/pfn_devs.c
>>  @@ -777,8 +777,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
>> * when populating the vmemmap. This *should* be 
>>  equal to
>> * PMD_SIZE for most architectures.
>> */
>>  -   offset = ALIGN(start + reserve + 64 * npfns,
>>  -   max(nd_pfn->align, PMD_SIZE)) - start;
>>  +   offset = ALIGN(start + reserve + sizeof(struct page) * 
>>  npfns,
>>  +  max(nd_pfn->align, PMD_SIZE)) - start;
>> >>>
>> >>> No, I think we need to record the page-size into the superblock format
>> >>> otherwise this breaks in debug builds where the struct-page size is
>> >>> extended.
>> >>>
>>    } else if (nd_pfn->mode == PFN_MODE_RAM)
>>    offset = ALIGN(start + reserve, nd_pfn->align) - 
>>  start;
>>    else
>>  @@ -790,7 +790,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
>>    return -ENXIO;
>>    }
>> 
>>  -   npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
>>  +   npfns = (size - offset - start_pad - end_trunc) / PAGE_SIZE;
>> >>>
>> >>> Similar comment, if the page size is variable then the superblock
>> >>> needs to explicitly account for it.
>> >>>
>> >>
>> >> PAGE_SIZE is not really variable. What we can run into is the issue you
>> >> mentioned above. The size of struct page can change which means the
>> >> reserved space for keeping vmemmap in device may not be sufficient for
>> >> certain kernel builds.
>> >>
>> >> I was planning to add another patch that fails namespace init if we
>> >> don't have enough space to keep the struct page.
>> >>
>> >> Why do you suggest we need to have PAGE_SIZE as part of pfn superblock?
>> >
>> > So that the kernel has a chance to identify cases where the superblock
>> > it is handling was created on a system with different PAGE_SIZE
>> > assumptions.
>> >
>>
>> The reason to do that is we don't have enough space to keep struct page
>> backing the total number of pfns? If so, what i suggested above should
>> handle that.
>>
>> or are you finding any other reason why we should fail a namespace init
>> with a different PAGE_SIZE value?
>
> I want the kernel to be able to start understand cross-architecture
> and cross-configuration geometries. Which to me means incrementing the
> info-block version and recording PAGE_SIZE and sizeof(struct page) in
> the info-block directly.
>
>> My another patch handle the details w.r.t devdax alignment for which
>> devdax got created with PAGE_SIZE 4K but we are now trying to load that
>> in a kernel with PAGE_SIZE 64k.
>
> Sure, but what about the reverse? These info-block format assumptions
> are as fundamental as the byte-order of the info-block, it needs to be
> cross-arch compatible and the x86 assumptions need to be fully lifted.

Something like the below (Not tested). I am not sure what we will init the 
page_size
for minor version < 3. This will mark the namespace disabled if the
PAGE_SIZE and sizeof(struct page) doesn't match with the values used
during namespace create. 

diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index dde9853453d3..d6e0933d0dd4 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -36,6 +36,9 @@ struct nd_pfn_sb {
__le32 end_trunc;
/* minor-version-2 record the base alignment of the mapping */
__le32 align;
+   /* minor-version-3 record the page size and struct page size */
+   __le32 page_size;
+   __le32 page_struct_size;
u8 padding[4000];
__le64 checksum;
 };
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 6f9f78858018..bbc1d792d7f3 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -477,6 +477,15 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
if (__le16_to_cpu(pfn_sb->version_minor) < 2)
   

[Bug 203517] WARNING: inconsistent lock state. inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.

2019-05-21 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=203517

David Sterba (dste...@suse.com) changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||dste...@suse.com
 Resolution|--- |CODE_FIX

--- Comment #8 from David Sterba (dste...@suse.com) ---
https://patchwork.kernel.org/patch/10948813/ fixes the problem and is scheduled
for 5.2 and for stable.

Thanks for the bisecting efforts!

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[PATCH][next] soc: fsl: fix spelling mistake "Firmaware" -> "Firmware"

2019-05-21 Thread Colin King
From: Colin Ian King 

There is a spelling mistake in a pr_err message. Fix it.

Signed-off-by: Colin Ian King 
---
 drivers/soc/fsl/dpaa2-console.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/dpaa2-console.c b/drivers/soc/fsl/dpaa2-console.c
index 9168d8ddc932..27243f706f37 100644
--- a/drivers/soc/fsl/dpaa2-console.c
+++ b/drivers/soc/fsl/dpaa2-console.c
@@ -73,7 +73,7 @@ static u64 get_mc_fw_base_address(void)
 
mcfbaregs = ioremap(mc_base_addr.start, resource_size(_base_addr));
if (!mcfbaregs) {
-   pr_err("could not map MC Firmaware Base registers\n");
+   pr_err("could not map MC Firmware Base registers\n");
return 0;
}
 
-- 
2.20.1



Re: [PATCH] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Joe Perches
On Tue, 2019-05-21 at 16:27 +0900, Masahiro Yamada wrote:
> On Tue, May 21, 2019 at 3:54 PM Christophe Leroy
> > powerpc accepts lines up to 90 chars, see arch/powerpc/tools/checkpatch.pl
> 
> Ugh, I did not know this. Horrible.
> 
> The Linux coding style should be global in the kernel tree.
> No subsystem should adopts its own coding style.

I don't see a problem using 90 column lines by arch/

There are other subsystem specific variations like the net/

/* multiline comments without initial blank comment lines
 * look like this...
 */

If there were arch specific drivers with style variations
in say drivers/net, then that might be more of an issue.



Re: [PATCH] mm/nvdimm: Use correct #defines instead of opencoding

2019-05-21 Thread Dan Williams
On Mon, May 13, 2019 at 9:46 PM Aneesh Kumar K.V
 wrote:
>
> On 5/14/19 9:42 AM, Dan Williams wrote:
> > On Mon, May 13, 2019 at 9:05 PM Aneesh Kumar K.V
> >  wrote:
> >>
> >> On 5/14/19 9:28 AM, Dan Williams wrote:
> >>> On Mon, May 13, 2019 at 7:56 PM Aneesh Kumar K.V
> >>>  wrote:
> 
>  The nfpn related change is needed to fix the kernel message
> 
>  "number of pfns truncated from 2617344 to 163584"
> 
>  The change makes sure the nfpns stored in the superblock is right value.
> 
>  Signed-off-by: Aneesh Kumar K.V 
>  ---
> drivers/nvdimm/pfn_devs.c| 6 +++---
> drivers/nvdimm/region_devs.c | 8 
> 2 files changed, 7 insertions(+), 7 deletions(-)
> 
>  diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
>  index 347cab166376..6751ff0296ef 100644
>  --- a/drivers/nvdimm/pfn_devs.c
>  +++ b/drivers/nvdimm/pfn_devs.c
>  @@ -777,8 +777,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
> * when populating the vmemmap. This *should* be equal 
>  to
> * PMD_SIZE for most architectures.
> */
>  -   offset = ALIGN(start + reserve + 64 * npfns,
>  -   max(nd_pfn->align, PMD_SIZE)) - start;
>  +   offset = ALIGN(start + reserve + sizeof(struct page) * 
>  npfns,
>  +  max(nd_pfn->align, PMD_SIZE)) - start;
> >>>
> >>> No, I think we need to record the page-size into the superblock format
> >>> otherwise this breaks in debug builds where the struct-page size is
> >>> extended.
> >>>
>    } else if (nd_pfn->mode == PFN_MODE_RAM)
>    offset = ALIGN(start + reserve, nd_pfn->align) - start;
>    else
>  @@ -790,7 +790,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
>    return -ENXIO;
>    }
> 
>  -   npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
>  +   npfns = (size - offset - start_pad - end_trunc) / PAGE_SIZE;
> >>>
> >>> Similar comment, if the page size is variable then the superblock
> >>> needs to explicitly account for it.
> >>>
> >>
> >> PAGE_SIZE is not really variable. What we can run into is the issue you
> >> mentioned above. The size of struct page can change which means the
> >> reserved space for keeping vmemmap in device may not be sufficient for
> >> certain kernel builds.
> >>
> >> I was planning to add another patch that fails namespace init if we
> >> don't have enough space to keep the struct page.
> >>
> >> Why do you suggest we need to have PAGE_SIZE as part of pfn superblock?
> >
> > So that the kernel has a chance to identify cases where the superblock
> > it is handling was created on a system with different PAGE_SIZE
> > assumptions.
> >
>
> The reason to do that is we don't have enough space to keep struct page
> backing the total number of pfns? If so, what i suggested above should
> handle that.
>
> or are you finding any other reason why we should fail a namespace init
> with a different PAGE_SIZE value?

I want the kernel to be able to start understand cross-architecture
and cross-configuration geometries. Which to me means incrementing the
info-block version and recording PAGE_SIZE and sizeof(struct page) in
the info-block directly.

> My another patch handle the details w.r.t devdax alignment for which
> devdax got created with PAGE_SIZE 4K but we are now trying to load that
> in a kernel with PAGE_SIZE 64k.

Sure, but what about the reverse? These info-block format assumptions
are as fundamental as the byte-order of the info-block, it needs to be
cross-arch compatible and the x86 assumptions need to be fully lifted.


Re: [PATCH] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Michael Ellerman
Masahiro Yamada  writes:
> On Tue, May 21, 2019 at 3:54 PM Christophe Leroy
>  wrote:
>> Le 21/05/2019 à 08:16, Masahiro Yamada a écrit :
>> > With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
>> > with gcc 9.1.1:
>> >
>> >arch/powerpc/mm/book3s64/radix_tlb.c: In function '_tlbiel_pid':
>> >arch/powerpc/mm/book3s64/radix_tlb.c:104:2: warning: asm operand 3 
>> > probably doesn't match constraints
>> >  104 |  asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
>> >  |  ^~~
>> >arch/powerpc/mm/book3s64/radix_tlb.c:104:2: error: impossible 
>> > constraint in 'asm'
>> >
>> > Fixing _tlbiel_pid() is enough to address the warning above, but I
>> > inlined more functions to fix all potential issues.
>> >
>> > To meet the 'i' (immediate) constraint for the asm operands, functions
>> > propagating propagated 'ric' must be always inlined.
>> >
>> > Fixes: 9012d011660e ("compiler: allow all arches to enable 
>> > CONFIG_OPTIMIZE_INLINING")
>> > Reported-by: Laura Abbott 
>> > Signed-off-by: Masahiro Yamada 
>> > ---
>> >
>> >   arch/powerpc/mm/book3s64/hash_native.c |  8 +++--
>> >   arch/powerpc/mm/book3s64/radix_tlb.c   | 44 +++---
>> >   2 files changed, 30 insertions(+), 22 deletions(-)
>> >
>> > diff --git a/arch/powerpc/mm/book3s64/hash_native.c 
>> > b/arch/powerpc/mm/book3s64/hash_native.c
>> > index aaa28fd918fe..bc2c35c0d2b1 100644
>> > --- a/arch/powerpc/mm/book3s64/hash_native.c
>> > +++ b/arch/powerpc/mm/book3s64/hash_native.c
>> > @@ -60,9 +60,11 @@ static inline void tlbiel_hash_set_isa206(unsigned int 
>> > set, unsigned int is)
>> >* tlbiel instruction for hash, set invalidation
>> >* i.e., r=1 and is=01 or is=10 or is=11
>> >*/
>> > -static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int 
>> > is,
>> > - unsigned int pid,
>> > - unsigned int ric, unsigned int prs)
>> > +static __always_inline void tlbiel_hash_set_isa300(unsigned int set,
>> > +unsigned int is,
>> > +unsigned int pid,
>> > +unsigned int ric,
>> > +unsigned int prs)
>>
>> Please don't split the line more than it is.
>>
>> powerpc accepts lines up to 90 chars, see arch/powerpc/tools/checkpatch.pl
>
> Ugh, I did not know this. Horrible.
>
> The Linux coding style should be global in the kernel tree.
> No subsystem should adopts its own coding style.

Well that ship sailed long ago.

But we don't have our own coding style, we just don't enforce 80 columns
rigidly, there are cases where a slightly longer line (up to ~90) is
preferable to a split line.

In a case like this with a long attribute and function name I think this
is probably the least worst option:

static __always_inline
void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, unsigned int pid,
unsigned int ric, unsigned int prs)
{
...

cheers


Re: [PATCH] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Masahiro Yamada
On Tue, May 21, 2019 at 3:54 PM Christophe Leroy
 wrote:
>
>
>
> Le 21/05/2019 à 08:16, Masahiro Yamada a écrit :
> > With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
> > with gcc 9.1.1:
> >
> >arch/powerpc/mm/book3s64/radix_tlb.c: In function '_tlbiel_pid':
> >arch/powerpc/mm/book3s64/radix_tlb.c:104:2: warning: asm operand 3 
> > probably doesn't match constraints
> >  104 |  asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
> >  |  ^~~
> >arch/powerpc/mm/book3s64/radix_tlb.c:104:2: error: impossible constraint 
> > in 'asm'
> >
> > Fixing _tlbiel_pid() is enough to address the warning above, but I
> > inlined more functions to fix all potential issues.
> >
> > To meet the 'i' (immediate) constraint for the asm operands, functions
> > propagating propagated 'ric' must be always inlined.
> >
> > Fixes: 9012d011660e ("compiler: allow all arches to enable 
> > CONFIG_OPTIMIZE_INLINING")
> > Reported-by: Laura Abbott 
> > Signed-off-by: Masahiro Yamada 
> > ---
> >
> >   arch/powerpc/mm/book3s64/hash_native.c |  8 +++--
> >   arch/powerpc/mm/book3s64/radix_tlb.c   | 44 +++---
> >   2 files changed, 30 insertions(+), 22 deletions(-)
> >
> > diff --git a/arch/powerpc/mm/book3s64/hash_native.c 
> > b/arch/powerpc/mm/book3s64/hash_native.c
> > index aaa28fd918fe..bc2c35c0d2b1 100644
> > --- a/arch/powerpc/mm/book3s64/hash_native.c
> > +++ b/arch/powerpc/mm/book3s64/hash_native.c
> > @@ -60,9 +60,11 @@ static inline void tlbiel_hash_set_isa206(unsigned int 
> > set, unsigned int is)
> >* tlbiel instruction for hash, set invalidation
> >* i.e., r=1 and is=01 or is=10 or is=11
> >*/
> > -static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int 
> > is,
> > - unsigned int pid,
> > - unsigned int ric, unsigned int prs)
> > +static __always_inline void tlbiel_hash_set_isa300(unsigned int set,
> > +unsigned int is,
> > +unsigned int pid,
> > +unsigned int ric,
> > +unsigned int prs)
>
> Please don't split the line more than it is.
>
> powerpc accepts lines up to 90 chars, see arch/powerpc/tools/checkpatch.pl

Ugh, I did not know this. Horrible.

The Linux coding style should be global in the kernel tree.
No subsystem should adopts its own coding style.


-- 
Best Regards
Masahiro Yamada


Re: [PATCH] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Joe Perches
On Tue, 2019-05-21 at 08:53 +0200, Christophe Leroy wrote:
> powerpc accepts lines up to 90 chars, see arch/powerpc/tools/checkpatch.pl

arch/powerpc/tools/checkpatch.sh




Re: [PATCH] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Christophe Leroy




Le 21/05/2019 à 08:16, Masahiro Yamada a écrit :

With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
with gcc 9.1.1:

   arch/powerpc/mm/book3s64/radix_tlb.c: In function '_tlbiel_pid':
   arch/powerpc/mm/book3s64/radix_tlb.c:104:2: warning: asm operand 3 probably 
doesn't match constraints
 104 |  asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 |  ^~~
   arch/powerpc/mm/book3s64/radix_tlb.c:104:2: error: impossible constraint in 
'asm'

Fixing _tlbiel_pid() is enough to address the warning above, but I
inlined more functions to fix all potential issues.

To meet the 'i' (immediate) constraint for the asm operands, functions
propagating propagated 'ric' must be always inlined.

Fixes: 9012d011660e ("compiler: allow all arches to enable 
CONFIG_OPTIMIZE_INLINING")
Reported-by: Laura Abbott 
Signed-off-by: Masahiro Yamada 
---

  arch/powerpc/mm/book3s64/hash_native.c |  8 +++--
  arch/powerpc/mm/book3s64/radix_tlb.c   | 44 +++---
  2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_native.c 
b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd918fe..bc2c35c0d2b1 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -60,9 +60,11 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, 
unsigned int is)
   * tlbiel instruction for hash, set invalidation
   * i.e., r=1 and is=01 or is=10 or is=11
   */
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
-   unsigned int pid,
-   unsigned int ric, unsigned int prs)
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set,
+  unsigned int is,
+  unsigned int pid,
+  unsigned int ric,
+  unsigned int prs)


Please don't split the line more than it is.

powerpc accepts lines up to 90 chars, see arch/powerpc/tools/checkpatch.pl


  {
unsigned long rb;
unsigned long rs;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4d841369399f..91c4242c1be3 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -29,9 +29,11 @@
   * tlbiel instruction for radix, set invalidation
   * i.e., r=1 and is=01 or is=10 or is=11
   */
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
-   unsigned int pid,
-   unsigned int ric, unsigned int prs)
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set,
+   unsigned int is,
+   unsigned int pid,
+   unsigned int ric,
+   unsigned int prs)


Please don't split the line more than it is.


  {
unsigned long rb;
unsigned long rs;
@@ -150,8 +152,8 @@ static __always_inline void __tlbie_lpid(unsigned long 
lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
  }
  
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,

-   unsigned long ric)
+static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
+   unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -167,8 +169,8 @@ static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,

  }
  
  
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,

-  unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+   unsigned long ap, unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -183,8 +185,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,

trace_tlbie(0, 1, rb, rs, ric, prs, r);
  }
  
-static inline void __tlbie_va(unsigned long va, unsigned long pid,

- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+  unsigned long ap, unsigned long ric)
  {
unsigned long rb,rs,prs,r;
  
@@ -199,8 +201,10 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,

trace_tlbie(0, 0, rb, rs, ric, prs, r);
  }
  
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,

- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va,
+   unsigned long lpid,

[PATCH] powerpc/mm: mark more tlb functions as __always_inline

2019-05-21 Thread Masahiro Yamada
With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
with gcc 9.1.1:

  arch/powerpc/mm/book3s64/radix_tlb.c: In function '_tlbiel_pid':
  arch/powerpc/mm/book3s64/radix_tlb.c:104:2: warning: asm operand 3 probably 
doesn't match constraints
104 |  asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
|  ^~~
  arch/powerpc/mm/book3s64/radix_tlb.c:104:2: error: impossible constraint in 
'asm'

Fixing _tlbiel_pid() is enough to address the warning above, but I
inlined more functions to fix all potential issues.

To meet the 'i' (immediate) constraint for the asm operands, functions
propagating propagated 'ric' must be always inlined.

Fixes: 9012d011660e ("compiler: allow all arches to enable 
CONFIG_OPTIMIZE_INLINING")
Reported-by: Laura Abbott 
Signed-off-by: Masahiro Yamada 
---

 arch/powerpc/mm/book3s64/hash_native.c |  8 +++--
 arch/powerpc/mm/book3s64/radix_tlb.c   | 44 +++---
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_native.c 
b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd918fe..bc2c35c0d2b1 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -60,9 +60,11 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, 
unsigned int is)
  * tlbiel instruction for hash, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
  */
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
-   unsigned int pid,
-   unsigned int ric, unsigned int prs)
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set,
+  unsigned int is,
+  unsigned int pid,
+  unsigned int ric,
+  unsigned int prs)
 {
unsigned long rb;
unsigned long rs;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4d841369399f..91c4242c1be3 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -29,9 +29,11 @@
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
  */
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
-   unsigned int pid,
-   unsigned int ric, unsigned int prs)
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set,
+   unsigned int is,
+   unsigned int pid,
+   unsigned int ric,
+   unsigned int prs)
 {
unsigned long rb;
unsigned long rs;
@@ -150,8 +152,8 @@ static __always_inline void __tlbie_lpid(unsigned long 
lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 }
 
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
-   unsigned long ric)
+static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
+   unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -167,8 +169,8 @@ static inline void __tlbiel_lpid_guest(unsigned long lpid, 
int set,
 }
 
 
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,
-  unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+   unsigned long ap, unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -183,8 +185,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned 
long pid,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
 }
 
-static inline void __tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+  unsigned long ap, unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -199,8 +201,10 @@ static inline void __tlbie_va(unsigned long va, unsigned 
long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va,
+   unsigned long lpid,
+   unsigned long ap,
+   unsigned long ric)
 {
unsigned long rb,rs,prs,r;
 
@@ -239,7 +243,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
 /*
  * We use 128 set