[PATCH v2 3/3] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-03-06 Thread Arnd Bergmann
From: Arnd Bergmann 

Most architectures only support a single hardcoded page size. In order
to ensure that each one of these sets the corresponding Kconfig symbols,
change over the PAGE_SHIFT definition to the common one and allow
only the hardware page size to be selected.

Acked-by: Guo Ren 
Acked-by: Heiko Carstens 
Acked-by: Stafford Horne 
Acked-by: Johannes Berg 
Signed-off-by: Arnd Bergmann 
---
No changes from v1

 arch/alpha/Kconfig | 1 +
 arch/alpha/include/asm/page.h  | 2 +-
 arch/arm/Kconfig   | 1 +
 arch/arm/include/asm/page.h| 2 +-
 arch/csky/Kconfig  | 1 +
 arch/csky/include/asm/page.h   | 2 +-
 arch/m68k/Kconfig  | 3 +++
 arch/m68k/Kconfig.cpu  | 2 ++
 arch/m68k/include/asm/page.h   | 6 +-
 arch/microblaze/Kconfig| 1 +
 arch/microblaze/include/asm/page.h | 2 +-
 arch/nios2/Kconfig | 1 +
 arch/nios2/include/asm/page.h  | 2 +-
 arch/openrisc/Kconfig  | 1 +
 arch/openrisc/include/asm/page.h   | 2 +-
 arch/riscv/Kconfig | 1 +
 arch/riscv/include/asm/page.h  | 2 +-
 arch/s390/Kconfig  | 1 +
 arch/s390/include/asm/page.h   | 2 +-
 arch/sparc/Kconfig | 2 ++
 arch/sparc/include/asm/page_32.h   | 2 +-
 arch/sparc/include/asm/page_64.h   | 3 +--
 arch/um/Kconfig| 1 +
 arch/um/include/asm/page.h | 2 +-
 arch/x86/Kconfig   | 1 +
 arch/x86/include/asm/page_types.h  | 2 +-
 arch/xtensa/Kconfig| 1 +
 arch/xtensa/include/asm/page.h | 2 +-
 28 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index d6968d090d49..4f490250d323 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,6 +14,7 @@ config ALPHA
select PCI_DOMAINS if PCI
select PCI_SYSCALL if PCI
select HAVE_ASM_MODVERSIONS
+   select HAVE_PAGE_SIZE_8KB
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select NEED_DMA_MAP_STATE
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 4db1ebc0ed99..70419e6be1a3 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -6,7 +6,7 @@
 #include 
 
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 13
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~(PAGE_SIZE-1))
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0af6709570d1..9d52ba3a8ad1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -116,6 +116,7 @@ config ARM
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPTPROBES if !THUMB2_KERNEL
+   select HAVE_PAGE_SIZE_4KB
select HAVE_PCI if MMU
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 119aa85d1feb..62af9f7f9e96 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -8,7 +8,7 @@
 #define _ASMARM_PAGE_H
 
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~((1 << PAGE_SHIFT) - 1))
 
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index cf2a6fd7dff8..9c2723ab1c94 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -89,6 +89,7 @@ config CSKY
select HAVE_KPROBES if !CPU_CK610
select HAVE_KPROBES_ON_FTRACE if !CPU_CK610
select HAVE_KRETPROBES if !CPU_CK610
+   select HAVE_PAGE_SIZE_4KB
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index 866855e1ab43..0ca6c408c07f 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -10,7 +10,7 @@
 /*
  * PAGE_SHIFT determines the page size: 4KB
  */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~(PAGE_SIZE - 1))
 #define THREAD_SIZE(PAGE_SIZE * 2)
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 4b3e93cac723..7b709453d5e7 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -84,12 +84,15 @@ config MMU
 
 config MMU_MOTOROLA
bool
+   select HAVE_PAGE_SIZE_4KB
 
 config MMU_COLDFIRE
+   select HAVE_PAGE_SIZE_8KB
bool
 
 config MMU_SUN3
bool
+   select HAVE_PAGE_SIZE_8KB
depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE
 
 config ARCH_SUPPORTS_KEXEC
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 9dcf245c9cbf..c777a129768a 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -30,6 +30,7 @@ config COLDFIRE
se

[PATCH v2 2/3] arch: simplify architecture specific page size configuration

2024-03-06 Thread Arnd Bergmann
From: Arnd Bergmann 

arc, arm64, parisc and powerpc all have their own Kconfig symbols
in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these
so the common symbols are the ones that are actually used, while
leaving the arhcitecture specific ones as the user visible
place for configuring it, to avoid breaking user configs.

Reviewed-by: Christophe Leroy  (powerpc32)
Acked-by: Catalin Marinas 
Acked-by: Helge Deller  # parisc
Signed-off-by: Arnd Bergmann 
---
No changes from v1

 arch/arc/Kconfig  |  3 +++
 arch/arc/include/uapi/asm/page.h  |  6 ++
 arch/arm64/Kconfig| 29 +
 arch/arm64/include/asm/page-def.h |  2 +-
 arch/parisc/Kconfig   |  3 +++
 arch/parisc/include/asm/page.h| 10 +-
 arch/powerpc/Kconfig  | 31 ++-
 arch/powerpc/include/asm/page.h   |  2 +-
 scripts/gdb/linux/constants.py.in |  2 +-
 scripts/gdb/linux/mm.py   |  2 +-
 10 files changed, 32 insertions(+), 58 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 1b0483c51cc1..4092bec198be 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -284,14 +284,17 @@ choice
 
 config ARC_PAGE_SIZE_8K
bool "8KB"
+   select HAVE_PAGE_SIZE_8KB
help
  Choose between 8k vs 16k
 
 config ARC_PAGE_SIZE_16K
+   select HAVE_PAGE_SIZE_16KB
bool "16KB"
 
 config ARC_PAGE_SIZE_4K
bool "4KB"
+   select HAVE_PAGE_SIZE_4KB
depends on ARC_MMU_V3 || ARC_MMU_V4
 
 endchoice
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a4ad619abfb..7fd9e741b527 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -13,10 +13,8 @@
 #include 
 
 /* PAGE_SHIFT determines the page size */
-#if defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define PAGE_SHIFT 14
-#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define PAGE_SHIFT 12
+#ifdef __KERNEL__
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #else
 /*
  * Default 8k
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index aa7c1d435139..29290b8cb36d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -277,27 +277,21 @@ config 64BIT
 config MMU
def_bool y
 
-config ARM64_PAGE_SHIFT
-   int
-   default 16 if ARM64_64K_PAGES
-   default 14 if ARM64_16K_PAGES
-   default 12
-
 config ARM64_CONT_PTE_SHIFT
int
-   default 5 if ARM64_64K_PAGES
-   default 7 if ARM64_16K_PAGES
+   default 5 if PAGE_SIZE_64KB
+   default 7 if PAGE_SIZE_16KB
default 4
 
 config ARM64_CONT_PMD_SHIFT
int
-   default 5 if ARM64_64K_PAGES
-   default 5 if ARM64_16K_PAGES
+   default 5 if PAGE_SIZE_64KB
+   default 5 if PAGE_SIZE_16KB
default 4
 
 config ARCH_MMAP_RND_BITS_MIN
-   default 14 if ARM64_64K_PAGES
-   default 16 if ARM64_16K_PAGES
+   default 14 if PAGE_SIZE_64KB
+   default 16 if PAGE_SIZE_16KB
default 18
 
 # max bits determined by the following formula:
@@ -1259,11 +1253,13 @@ choice
 
 config ARM64_4K_PAGES
bool "4KB"
+   select HAVE_PAGE_SIZE_4KB
help
  This feature enables 4KB pages support.
 
 config ARM64_16K_PAGES
bool "16KB"
+   select HAVE_PAGE_SIZE_16KB
help
  The system will use 16KB pages support. AArch32 emulation
  requires applications compiled with 16K (or a multiple of 16K)
@@ -1271,6 +1267,7 @@ config ARM64_16K_PAGES
 
 config ARM64_64K_PAGES
bool "64KB"
+   select HAVE_PAGE_SIZE_64KB
help
  This feature enables 64KB pages support (4KB by default)
  allowing only two levels of page tables and faster TLB
@@ -1291,19 +1288,19 @@ choice
 
 config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
-   depends on ARM64_16K_PAGES
+   depends on PAGE_SIZE_16KB
 
 config ARM64_VA_BITS_39
bool "39-bit"
-   depends on ARM64_4K_PAGES
+   depends on PAGE_SIZE_4KB
 
 config ARM64_VA_BITS_42
bool "42-bit"
-   depends on ARM64_64K_PAGES
+   depends on PAGE_SIZE_64KB
 
 config ARM64_VA_BITS_47
bool "47-bit"
-   depends on ARM64_16K_PAGES
+   depends on PAGE_SIZE_16KB
 
 config ARM64_VA_BITS_48
bool "48-bit"
diff --git a/arch/arm64/include/asm/page-def.h 
b/arch/arm64/include/asm/page-def.h
index 2403f7b4cdbf..792e9fe881dc 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -11,7 +11,7 @@
 #include 
 
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~(PAGE_SIZE-1))
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 5c845e8d59d9..b180e684fa0d 

[PATCH v2 1/3] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-03-06 Thread Arnd Bergmann
From: Arnd Bergmann 

These four architectures define the same Kconfig symbols for configuring
the page size. Move the logic into a common place where it can be shared
with all other architectures.

Signed-off-by: Arnd Bergmann 
---
Changes from v1:
 - improve Kconfig help texts
 - fix Hexagon Kconfig

 arch/Kconfig  | 92 ++-
 arch/hexagon/Kconfig  | 24 ++--
 arch/hexagon/include/asm/page.h   |  6 +-
 arch/loongarch/Kconfig| 21 ++-
 arch/loongarch/include/asm/page.h | 10 +---
 arch/mips/Kconfig | 58 ++-
 arch/mips/include/asm/page.h  | 16 +-
 arch/sh/include/asm/page.h| 13 +
 arch/sh/mm/Kconfig| 42 --
 9 files changed, 121 insertions(+), 161 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a5af0edd3eb8..c63034e092d0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1078,17 +1078,105 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
  and vice-versa 32-bit applications to call 64-bit mmap().
  Required for applications doing different bitness syscalls.
 
+config HAVE_PAGE_SIZE_4KB
+   bool
+
+config HAVE_PAGE_SIZE_8KB
+   bool
+
+config HAVE_PAGE_SIZE_16KB
+   bool
+
+config HAVE_PAGE_SIZE_32KB
+   bool
+
+config HAVE_PAGE_SIZE_64KB
+   bool
+
+config HAVE_PAGE_SIZE_256KB
+   bool
+
+choice
+   prompt "MMU page size"
+
+config PAGE_SIZE_4KB
+   bool "4KiB pages"
+   depends on HAVE_PAGE_SIZE_4KB
+   help
+ This option select the standard 4KiB Linux page size and the only
+ available option on many architectures. Using 4KiB page size will
+ minimize memory consumption and is therefore recommended for low
+ memory systems.
+ Some software that is written for x86 systems makes incorrect
+ assumptions about the page size and only runs on 4KiB pages.
+
+config PAGE_SIZE_8KB
+   bool "8KiB pages"
+   depends on HAVE_PAGE_SIZE_8KB
+   help
+ This option is the only supported page size on a few older
+ processors, and can be slightly faster than 4KiB pages.
+
+config PAGE_SIZE_16KB
+   bool "16KiB pages"
+   depends on HAVE_PAGE_SIZE_16KB
+   help
+ This option is usually a good compromise between memory
+ consumption and performance for typical desktop and server
+ workloads, often saving a level of page table lookups compared
+ to 4KB pages as well as reducing TLB pressure and overhead of
+ per-page operations in the kernel at the expense of a larger
+ page cache.
+
+config PAGE_SIZE_32KB
+   bool "32KiB pages"
+   depends on HAVE_PAGE_SIZE_32KB
+ Using 32KiB page size will result in slightly higher performance
+ kernel at the price of higher memory consumption compared to
+ 16KiB pages.  This option is available only on cnMIPS cores.
+ Note that you will need a suitable Linux distribution to
+ support this.
+
+config PAGE_SIZE_64KB
+   bool "64KiB pages"
+   depends on HAVE_PAGE_SIZE_64KB
+ Using 64KiB page size will result in slightly higher performance
+ kernel at the price of much higher memory consumption compared to
+ 4KiB or 16KiB pages.
+ This is not suitable for general-purpose workloads but the
+ better performance may be worth the cost for certain types of
+ supercomputing or database applications that work mostly with
+ large in-memory data rather than small files.
+
+config PAGE_SIZE_256KB
+   bool "256KiB pages"
+   depends on HAVE_PAGE_SIZE_256KB
+   help
+ 256KiB pages have little practical value due to their extreme
+ memory usage.  The kernel will only be able to run applications
+ that have been compiled with '-zmax-page-size' set to 256KiB
+ (the default is 64KiB or 4KiB on most architectures).
+
+endchoice
+
 config PAGE_SIZE_LESS_THAN_64KB
def_bool y
-   depends on !ARM64_64K_PAGES
depends on !PAGE_SIZE_64KB
-   depends on !PARISC_PAGE_SIZE_64KB
depends on PAGE_SIZE_LESS_THAN_256KB
 
 config PAGE_SIZE_LESS_THAN_256KB
def_bool y
depends on !PAGE_SIZE_256KB
 
+config PAGE_SHIFT
+   int
+   default 12 if PAGE_SIZE_4KB
+   default 13 if PAGE_SIZE_8KB
+   default 14 if PAGE_SIZE_16KB
+   default 15 if PAGE_SIZE_32KB
+   default 16 if PAGE_SIZE_64KB
+   default 18 if PAGE_SIZE_256KB
+
 # This allows to use a set of generic functions to determine mmap base
 # address by giving priority to top-down scheme only if the process
 # is not in legacy mode (compat task, unlimited stack size or
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index a880ee067d2e..1414052e7d6b 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -8,6 +8,10 @@ con

[v2 PATCH 0/3] arch: mm, vdso: consolidate PAGE_SIZE definition

2024-03-06 Thread Arnd Bergmann
From: Arnd Bergmann 

Naresh noticed that the newly added usage of the PAGE_SIZE macro in
include/vdso/datapage.h introduced a build regression. I had an older
patch that I revived to have this defined through Kconfig rather than
through including asm/page.h, which is not allowed in vdso code.

The vdso patch series now has a temporary workaround, but I still want to
get this into v6.9 so we can place the hack with CONFIG_PAGE_SIZE
in the vdso.

I've applied this to the asm-generic tree already, please let me know if
there are still remaining issues. It's really close to the merge window
already, so I'd probably give this a few more days before I send a pull
request, or defer it to v6.10 if anything goes wrong.

Sorry for the delay, I was still waiting to resolve the m68k question,
but there were no further replies in the end, so I kept my original
version.

Changes from v1:

 - improve Kconfig help texts
 - remove an extraneous line in hexagon

  Arnd

Link: 
https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/
Link: https://lore.kernel.org/all/65dc6c14.170a0220.f4a3f.9...@mx.google.com/
Link: https://lore.kernel.org/lkml/20240226161414.2316610-1-a...@kernel.org/

Arnd Bergmann (3):
  arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
  arch: simplify architecture specific page size configuration
  arch: define CONFIG_PAGE_SIZE_*KB on all architectures

 arch/Kconfig   | 92 +-
 arch/alpha/Kconfig |  1 +
 arch/alpha/include/asm/page.h  |  2 +-
 arch/arc/Kconfig   |  3 +
 arch/arc/include/uapi/asm/page.h   |  6 +-
 arch/arm/Kconfig   |  1 +
 arch/arm/include/asm/page.h|  2 +-
 arch/arm64/Kconfig | 29 +-
 arch/arm64/include/asm/page-def.h  |  2 +-
 arch/csky/Kconfig  |  1 +
 arch/csky/include/asm/page.h   |  2 +-
 arch/hexagon/Kconfig   | 24 ++--
 arch/hexagon/include/asm/page.h|  6 +-
 arch/loongarch/Kconfig | 21 ++-
 arch/loongarch/include/asm/page.h  | 10 +---
 arch/m68k/Kconfig  |  3 +
 arch/m68k/Kconfig.cpu  |  2 +
 arch/m68k/include/asm/page.h   |  6 +-
 arch/microblaze/Kconfig|  1 +
 arch/microblaze/include/asm/page.h |  2 +-
 arch/mips/Kconfig  | 58 ++-
 arch/mips/include/asm/page.h   | 16 +-
 arch/nios2/Kconfig |  1 +
 arch/nios2/include/asm/page.h  |  2 +-
 arch/openrisc/Kconfig  |  1 +
 arch/openrisc/include/asm/page.h   |  2 +-
 arch/parisc/Kconfig|  3 +
 arch/parisc/include/asm/page.h | 10 +---
 arch/powerpc/Kconfig   | 31 ++
 arch/powerpc/include/asm/page.h|  2 +-
 arch/riscv/Kconfig |  1 +
 arch/riscv/include/asm/page.h  |  2 +-
 arch/s390/Kconfig  |  1 +
 arch/s390/include/asm/page.h   |  2 +-
 arch/sh/include/asm/page.h | 13 +
 arch/sh/mm/Kconfig | 42 --
 arch/sparc/Kconfig |  2 +
 arch/sparc/include/asm/page_32.h   |  2 +-
 arch/sparc/include/asm/page_64.h   |  3 +-
 arch/um/Kconfig|  1 +
 arch/um/include/asm/page.h |  2 +-
 arch/x86/Kconfig   |  1 +
 arch/x86/include/asm/page_types.h  |  2 +-
 arch/xtensa/Kconfig|  1 +
 arch/xtensa/include/asm/page.h |  2 +-
 scripts/gdb/linux/constants.py.in  |  2 +-
 scripts/gdb/linux/mm.py|  2 +-
 47 files changed, 185 insertions(+), 238 deletions(-)

-- 
2.39.2

To: Thomas Gleixner 
To: Vincenzo Frascino 
To: Kees Cook 
To: Anna-Maria Behnsen 
Cc: Matt Turner 
Cc: Vineet Gupta 
Cc: Russell King 
Cc: Catalin Marinas 
Cc: Guo Ren 
Cc: Brian Cain 
Cc: Huacai Chen 
Cc: Geert Uytterhoeven 
Cc: Michal Simek 
Cc: Thomas Bogendoerfer 
Cc: Helge Deller 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Palmer Dabbelt 
Cc: John Paul Adrian Glaubitz 
Cc: Andreas Larsson 
Cc: Richard Weinberger 
Cc: x...@kernel.org
Cc: Max Filippov 
Cc: Andy Lutomirski 
Cc: Vincenzo Frascino 
Cc: Jan Kiszka 
Cc: Kieran Bingham 
Cc: Andrew Morton 
Cc: Arnd Bergmann 
Cc: linux-ker...@vger.kernel.org
Cc: linux-alpha@vger.kernel.org
Cc: linux-snps-...@lists.infradead.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-c...@vger.kernel.org
Cc: linux-hexa...@vger.kernel.org
Cc: loonga...@lists.linux.dev
Cc: linux-m...@lists.linux-m68k.org
Cc: linux-m...@vger.kernel.org
Cc: linux-openr...@vger.kernel.org
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-...@lists.ozlabs.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-s...@vger.kernel.org
Cc: linux...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
Cc: linux...@lists.infradead.org



Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 16:44, Christophe Leroy wrote:
> Le 27/02/2024 à 16:40, Arnd Bergmann a écrit :
>> On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote:
>
>
> For 256K pages, powerpc has the following help. I think you should have 
> it too:
>
> The kernel will only be able to run applications that have been
> compiled with '-zmax-page-size' set to 256K (the default is 64K) using
> binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE
> definition from 0x1 to 0x4 in older versions.

I don't think we need to mention pre-2.18 binutils any more, but the
rest seems useful, changed the text now to

config PAGE_SIZE_256KB
bool "256KiB pages"
depends on HAVE_PAGE_SIZE_256KB
help
  256KiB pages have little practical value due to their extreme
  memory usage.  The kernel will only be able to run applications
  that have been compiled with '-zmax-page-size' set to 256KiB
  (the default is 64KiB or 4KiB on most architectures).

  Arnd



Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 09:45, Geert Uytterhoeven wrote:
>
>> +config PAGE_SIZE_4KB
>> +   bool "4KB pages"
>
> Now you got rid of the 4000-byte ("4kB") pages and friends, please
> do not replace these by Kelvin-bytes, and use the official binary
> prefixes => "4 KiB".
>

Done, thanks.

Arnd



Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Mon, Feb 26, 2024, at 20:02, Christophe Leroy wrote:
> Le 26/02/2024 à 17:14, Arnd Bergmann a écrit :
>> From: Arnd Bergmann 
>
> That's a nice re-factor.
>
> The only drawback I see is that we are loosing several interesting 
> arch-specific comments/help text. Don't know if there could be an easy 
> way to keep them.

This is what I have now, trying to write it as generic as
possible while still giving useful advice:

config PAGE_SIZE_4KB
bool "4KiB pages"
depends on HAVE_PAGE_SIZE_4KB
help
  This option select the standard 4KiB Linux page size and the only
  available option on many architectures. Using 4KiB page size will
  minimize memory consumption and is therefore recommended for low
  memory systems.
  Some software that is written for x86 systems makes incorrect
  assumptions about the page size and only runs on 4KiB pages.

config PAGE_SIZE_8KB
bool "8KiB pages"
depends on HAVE_PAGE_SIZE_8KB
help
  This option is the only supported page size on a few older
  processors, and can be slightly faster than 4KiB pages.

config PAGE_SIZE_16KB
bool "16KiB pages"
depends on HAVE_PAGE_SIZE_16KB
help
  This option is usually a good compromise between memory
  consumption and performance for typical desktop and server
  workloads, often saving a level of page table lookups compared
  to 4KB pages as well as reducing TLB pressure and overhead of
  per-page operations in the kernel at the expense of a larger
  page cache.

config PAGE_SIZE_32KB
bool "32KiB pages"
depends on HAVE_PAGE_SIZE_32KB
  Using 32KiB page size will result in slightly higher performance
  kernel at the price of higher memory consumption compared to
  16KiB pages.  This option is available only on cnMIPS cores.
  Note that you will need a suitable Linux distribution to
  support this.

config PAGE_SIZE_64KB
bool "64KiB pages"
depends on HAVE_PAGE_SIZE_64KB
  Using 64KiB page size will result in slightly higher performance
  kernel at the price of much higher memory consumption compared to
  4KiB or 16KiB pages.
  This is not suitable for general-purpose workloads but the
  better performance may be worth the cost for certain types of
  supercomputing or database applications that work mostly with
  large in-memory data rather than small files.

config PAGE_SIZE_256KB
bool "256KiB pages"
depends on HAVE_PAGE_SIZE_256KB
help
  256KB pages have little practical value due to their extreme
  memory usage.

Let me know if you think some of this should be adapted further.

>>   
>> +#define PAGE_SHIFT CONFIG_PAGE_SHIFT
>>   #define PAGE_SIZE  (1UL << PAGE_SHIFT)
>>   #define PAGE_MASK  (~((1 << PAGE_SHIFT) - 1))
>>   
>
> Could we move PAGE_SIZE and PAGE_MASK in a generic/core header instead 
> of having it duplicated for each arch ?

Yes, but I'm leaving this for a follow-up series, since I had
to stop somewhere and there is always room for cleanup up headers
further ;-)

  Arnd



Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote:
> On 2024-02-26 10:14 AM, Arnd Bergmann wrote:
>>  
>> +config HAVE_PAGE_SIZE_4KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_8KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_16KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_32KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_64KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_256KB
>> +bool
>> +
>> +choice
>> +prompt "MMU page size"
>
> Should this have some generic help text (at least a warning about 
> compatibility)?

Good point. I've added some of this now, based on the mips
text with some generalizations for other architectures:

config PAGE_SIZE_4KB
bool "4KiB pages" 
depends on HAVE_PAGE_SIZE_4KB
help 
  This option select the standard 4KiB Linux page size and the only
  available option on many architectures. Using 4KiB page size will
  minimize memory consumption and is therefore recommended for low
  memory systems.
  Some software that is written for x86 systems makes incorrect
  assumptions about the page size and only runs on 4KiB pages.

config PAGE_SIZE_8KB
bool "8KiB pages"
depends on HAVE_PAGE_SIZE_8KB
help
  This option is the only supported page size on a few older
  processors, and can be slightly faster than 4KiB pages.

config PAGE_SIZE_16KB
bool "16KiB pages"
depends on HAVE_PAGE_SIZE_16KB
help 
  This option is usually a good compromise between memory
  consumption and performance for typical desktop and server
  workloads, often saving a level of page table lookups compared
  to 4KB pages as well as reducing TLB pressure and overhead of
  per-page operations in the kernel at the expense of a larger
  page cache. 

config PAGE_SIZE_32KB
bool "32KiB pages"
depends on HAVE_PAGE_SIZE_32KB
  Using 32KiB page size will result in slightly higher performance
  kernel at the price of higher memory consumption compared to
  16KiB pages.  This option is available only on cnMIPS cores.
  Note that you will need a suitable Linux distribution to
  support this.

config PAGE_SIZE_64KB
bool "64KiB pages"
depends on HAVE_PAGE_SIZE_64KB
  Using 64KiB page size will result in slightly higher performance
  kernel at the price of much higher memory consumption compared to
  4KiB or 16KiB pages.
  This is not suitable for general-purpose workloads but the
  better performance may be worth the cost for certain types of
  supercomputing or database applications that work mostly with
  large in-memory data rather than small files.

config PAGE_SIZE_256KB
bool "256KiB pages"
depends on HAVE_PAGE_SIZE_256KB
help
  256KB pages have little practical value due to their extreme
  memory usage.

>> diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
>> index a880ee067d2e..aac46ee1a000 100644
>> --- a/arch/hexagon/Kconfig
>> +++ b/arch/hexagon/Kconfig
>> @@ -8,6 +8,11 @@ config HEXAGON
>>  select ARCH_HAS_SYNC_DMA_FOR_DEVICE
>>  select ARCH_NO_PREEMPT
>>  select DMA_GLOBAL_POOL
>> +select FRAME_POINTER
>
> Looks like a paste error.
>

Fixed, thanks! I think that happened during a rebase.

>>  #ifdef CONFIG_PAGE_SIZE_1MB
>> -#define PAGE_SHIFT 20
>>  #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB
>>  #endif
>
> The corresponding Kconfig option does not exist (and did not exist before this
> patch).

Yes, I noticed that as well. It's clearly harmless.

 Arnd



Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 12:12, Geert Uytterhoeven wrote:
> On Tue, Feb 27, 2024 at 11:59 AM Arnd Bergmann  wrote:
>> On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote:
>> I was a bit unsure about how to best do this since there
>> is not really a need for a fixed page size on nommu kernels,
>> whereas the three MMU configs clearly tie the page size to
>> the MMU rather than the platform.
>>
>> There should be no reason for coldfire to have a different
>> page size from dragonball if neither of them actually uses
>> hardware pages, so one of them could be changed later.
>
> Indeed, in theory, PAGE_SIZE doesn't matter for nommu, but the concept
> of pages is used all over the place in Linux.
>
> I'm mostly worried about some Coldfire code relying on the actual value
> of PAGE_SIZE in some other context. e.g. for configuring non-cacheable
> regions.

Right, any change here would have to be carefully tested. I would
expect that a 4K page size would reduce memory consumption even on
NOMMU systems that should have the same tradeoffs for representing
files in the page cache and in mem_map[].

> And does this impact running nommu binaries on a system with MMU?
> I.e. if nommu binaries were built with a 4 KiB PAGE_SIZE, do they
> still run on MMU systems with an 8 KiB PAGE_SIZE (coldfire and sun3),
> or are there some subtleties to take into account?

As far as I understand, binaries have to be built and linked for
the largest page size they can run on, so running them on a kernel
with smaller page size usually works.

One notable exception is sys_mmap2(), which on most architectures
takes units of 4KiB but on m68k is actually written to take
PAGE_SIZE units. As Al pointed out in f8b7256096a2 ("Unify
sys_mmap*"), it has always been wrong on sun3, presumably
because users of that predate modern glibc. Running coldfire
nommu binaries on coldfire mmu kernels would run into the same
bug if either of them changes PAGE_SIZE. If you can run
coldfire nommu binaries on classic m68k, that is already
broken in the same way.

  Arnd



Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote:
> Hi Arnd,
>> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
>> index 9dcf245c9cbf..c777a129768a 100644
>> --- a/arch/m68k/Kconfig.cpu
>> +++ b/arch/m68k/Kconfig.cpu
>> @@ -30,6 +30,7 @@ config COLDFIRE
>> select GENERIC_CSUM
>> select GPIOLIB
>> select HAVE_LEGACY_CLK
>> +   select HAVE_PAGE_SIZE_8KB if !MMU
>
>  if you would drop the !MMU-dependency here.
>
>>
>>  endchoice
>>
>> @@ -45,6 +46,7 @@ config M68000
>> select GENERIC_CSUM
>> select CPU_NO_EFFICIENT_FFS
>> select HAVE_ARCH_HASH
>> +   select HAVE_PAGE_SIZE_4KB
>
> Perhaps replace this by
>
> config M68KCLASSIC
> bool "Classic M68K CPU family support"
> select HAVE_ARCH_PFN_VALID
>   + select HAVE_PAGE_SIZE_4KB if !MMU
>
> so it covers all 680x0 CPUs without MMU?

I was a bit unsure about how to best do this since there
is not really a need for a fixed page size on nommu kernels,
whereas the three MMU configs clearly tie the page size to
the MMU rather than the platform.

There should be no reason for coldfire to have a different
page size from dragonball if neither of them actually uses
hardware pages, so one of them could be changed later.

Let me know if that makes sense to you, or you still
prefer me to change it like you suggested.

  Arnd



[PATCH 4/4] vdso: avoid including asm/page.h

2024-02-26 Thread Arnd Bergmann
From: Arnd Bergmann 

The recent change to the vdso_data_store broke building compat VDSO
on at least arm64 because it includes headers outside of the include/vdso/
namespace:

In file included from arch/arm64/include/asm/lse.h:5,
 from arch/arm64/include/asm/cmpxchg.h:14,
 from arch/arm64/include/asm/atomic.h:16,
 from include/linux/atomic.h:7,
 from include/asm-generic/bitops/atomic.h:5,
 from arch/arm64/include/asm/bitops.h:25,
 from include/linux/bitops.h:68,
 from arch/arm64/include/asm/memory.h:209,
 from arch/arm64/include/asm/page.h:46,
 from include/vdso/datapage.h:22,
 from lib/vdso/gettimeofday.c:5,
 from :
arch/arm64/include/asm/atomic_ll_sc.h:298:9: error: unknown type name 'u128'
  298 | u128 full;

Use an open-coded page size calculation based on the new CONFIG_PAGE_SHIFT
Kconfig symbol instead.

Reported-by: Linux Kernel Functional Testing 
Fixes: a0d2fcd62ac2 ("vdso/ARM: Make union vdso_data_store available for all 
architectures")
Link: 
https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/
Signed-off-by: Arnd Bergmann 
---
 include/vdso/datapage.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 7ba44379a095..2c39a67d7e23 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -19,8 +19,6 @@
 #include 
 #include 
 
-#include 
-
 #ifdef CONFIG_ARCH_HAS_VDSO_DATA
 #include 
 #else
@@ -128,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] 
__attribute__((visibility("hidden
  */
 union vdso_data_store {
struct vdso_datadata[CS_BASES];
-   u8  page[PAGE_SIZE];
+   u8  page[1ul << CONFIG_PAGE_SHIFT];
 };
 
 /*
-- 
2.39.2




[PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-26 Thread Arnd Bergmann
From: Arnd Bergmann 

Most architectures only support a single hardcoded page size. In order
to ensure that each one of these sets the corresponding Kconfig symbols,
change over the PAGE_SHIFT definition to the common one and allow
only the hardware page size to be selected.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/Kconfig | 1 +
 arch/alpha/include/asm/page.h  | 2 +-
 arch/arm/Kconfig   | 1 +
 arch/arm/include/asm/page.h| 2 +-
 arch/csky/Kconfig  | 1 +
 arch/csky/include/asm/page.h   | 2 +-
 arch/m68k/Kconfig  | 3 +++
 arch/m68k/Kconfig.cpu  | 2 ++
 arch/m68k/include/asm/page.h   | 6 +-
 arch/microblaze/Kconfig| 1 +
 arch/microblaze/include/asm/page.h | 2 +-
 arch/nios2/Kconfig | 1 +
 arch/nios2/include/asm/page.h  | 2 +-
 arch/openrisc/Kconfig  | 1 +
 arch/openrisc/include/asm/page.h   | 2 +-
 arch/riscv/Kconfig | 1 +
 arch/riscv/include/asm/page.h  | 2 +-
 arch/s390/Kconfig  | 1 +
 arch/s390/include/asm/page.h   | 2 +-
 arch/sparc/Kconfig | 2 ++
 arch/sparc/include/asm/page_32.h   | 2 +-
 arch/sparc/include/asm/page_64.h   | 3 +--
 arch/um/Kconfig| 1 +
 arch/um/include/asm/page.h | 2 +-
 arch/x86/Kconfig   | 1 +
 arch/x86/include/asm/page_types.h  | 2 +-
 arch/xtensa/Kconfig| 1 +
 arch/xtensa/include/asm/page.h | 2 +-
 28 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index d6968d090d49..4f490250d323 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,6 +14,7 @@ config ALPHA
select PCI_DOMAINS if PCI
select PCI_SYSCALL if PCI
select HAVE_ASM_MODVERSIONS
+   select HAVE_PAGE_SIZE_8KB
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select NEED_DMA_MAP_STATE
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 4db1ebc0ed99..70419e6be1a3 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -6,7 +6,7 @@
 #include 
 
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 13
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~(PAGE_SIZE-1))
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0af6709570d1..9d52ba3a8ad1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -116,6 +116,7 @@ config ARM
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPTPROBES if !THUMB2_KERNEL
+   select HAVE_PAGE_SIZE_4KB
select HAVE_PCI if MMU
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 119aa85d1feb..62af9f7f9e96 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -8,7 +8,7 @@
 #define _ASMARM_PAGE_H
 
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~((1 << PAGE_SHIFT) - 1))
 
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index cf2a6fd7dff8..9c2723ab1c94 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -89,6 +89,7 @@ config CSKY
select HAVE_KPROBES if !CPU_CK610
select HAVE_KPROBES_ON_FTRACE if !CPU_CK610
select HAVE_KRETPROBES if !CPU_CK610
+   select HAVE_PAGE_SIZE_4KB
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index 4a0502e324a6..f70f37402d75 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -10,7 +10,7 @@
 /*
  * PAGE_SHIFT determines the page size: 4KB
  */
-#define PAGE_SHIFT 12
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~(PAGE_SIZE - 1))
 #define THREAD_SIZE(PAGE_SIZE * 2)
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 4b3e93cac723..7b709453d5e7 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -84,12 +84,15 @@ config MMU
 
 config MMU_MOTOROLA
bool
+   select HAVE_PAGE_SIZE_4KB
 
 config MMU_COLDFIRE
+   select HAVE_PAGE_SIZE_8KB
bool
 
 config MMU_SUN3
bool
+   select HAVE_PAGE_SIZE_8KB
depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE
 
 config ARCH_SUPPORTS_KEXEC
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 9dcf245c9cbf..c777a129768a 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -30,6 +30,7 @@ config COLDFIRE
select GENERIC_CSUM
select GPIOLIB
select HAVE_LEGACY_CLK
+   select HAVE_PAGE_SIZE_8KB if !MMU
 
 end

[PATCH 2/4] arch: simplify architecture specific page size configuration

2024-02-26 Thread Arnd Bergmann
From: Arnd Bergmann 

arc, arm64, parisc and powerpc all have their own Kconfig symbols
in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these
so the common symbols are the ones that are actually used, while
leaving the arhcitecture specific ones as the user visible
place for configuring it, to avoid breaking user configs.

Signed-off-by: Arnd Bergmann 
---
 arch/arc/Kconfig  |  3 +++
 arch/arc/include/uapi/asm/page.h  |  6 ++
 arch/arm64/Kconfig| 29 +
 arch/arm64/include/asm/page-def.h |  2 +-
 arch/parisc/Kconfig   |  3 +++
 arch/parisc/include/asm/page.h| 10 +-
 arch/powerpc/Kconfig  | 31 ++-
 arch/powerpc/include/asm/page.h   |  2 +-
 scripts/gdb/linux/constants.py.in |  2 +-
 scripts/gdb/linux/mm.py   |  2 +-
 10 files changed, 32 insertions(+), 58 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 1b0483c51cc1..4092bec198be 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -284,14 +284,17 @@ choice
 
 config ARC_PAGE_SIZE_8K
bool "8KB"
+   select HAVE_PAGE_SIZE_8KB
help
  Choose between 8k vs 16k
 
 config ARC_PAGE_SIZE_16K
+   select HAVE_PAGE_SIZE_16KB
bool "16KB"
 
 config ARC_PAGE_SIZE_4K
bool "4KB"
+   select HAVE_PAGE_SIZE_4KB
depends on ARC_MMU_V3 || ARC_MMU_V4
 
 endchoice
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a4ad619abfb..7fd9e741b527 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -13,10 +13,8 @@
 #include 
 
 /* PAGE_SHIFT determines the page size */
-#if defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define PAGE_SHIFT 14
-#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define PAGE_SHIFT 12
+#ifdef __KERNEL__
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #else
 /*
  * Default 8k
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index aa7c1d435139..29290b8cb36d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -277,27 +277,21 @@ config 64BIT
 config MMU
def_bool y
 
-config ARM64_PAGE_SHIFT
-   int
-   default 16 if ARM64_64K_PAGES
-   default 14 if ARM64_16K_PAGES
-   default 12
-
 config ARM64_CONT_PTE_SHIFT
int
-   default 5 if ARM64_64K_PAGES
-   default 7 if ARM64_16K_PAGES
+   default 5 if PAGE_SIZE_64KB
+   default 7 if PAGE_SIZE_16KB
default 4
 
 config ARM64_CONT_PMD_SHIFT
int
-   default 5 if ARM64_64K_PAGES
-   default 5 if ARM64_16K_PAGES
+   default 5 if PAGE_SIZE_64KB
+   default 5 if PAGE_SIZE_16KB
default 4
 
 config ARCH_MMAP_RND_BITS_MIN
-   default 14 if ARM64_64K_PAGES
-   default 16 if ARM64_16K_PAGES
+   default 14 if PAGE_SIZE_64KB
+   default 16 if PAGE_SIZE_16KB
default 18
 
 # max bits determined by the following formula:
@@ -1259,11 +1253,13 @@ choice
 
 config ARM64_4K_PAGES
bool "4KB"
+   select HAVE_PAGE_SIZE_4KB
help
  This feature enables 4KB pages support.
 
 config ARM64_16K_PAGES
bool "16KB"
+   select HAVE_PAGE_SIZE_16KB
help
  The system will use 16KB pages support. AArch32 emulation
  requires applications compiled with 16K (or a multiple of 16K)
@@ -1271,6 +1267,7 @@ config ARM64_16K_PAGES
 
 config ARM64_64K_PAGES
bool "64KB"
+   select HAVE_PAGE_SIZE_64KB
help
  This feature enables 64KB pages support (4KB by default)
  allowing only two levels of page tables and faster TLB
@@ -1291,19 +1288,19 @@ choice
 
 config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
-   depends on ARM64_16K_PAGES
+   depends on PAGE_SIZE_16KB
 
 config ARM64_VA_BITS_39
bool "39-bit"
-   depends on ARM64_4K_PAGES
+   depends on PAGE_SIZE_4KB
 
 config ARM64_VA_BITS_42
bool "42-bit"
-   depends on ARM64_64K_PAGES
+   depends on PAGE_SIZE_64KB
 
 config ARM64_VA_BITS_47
bool "47-bit"
-   depends on ARM64_16K_PAGES
+   depends on PAGE_SIZE_16KB
 
 config ARM64_VA_BITS_48
bool "48-bit"
diff --git a/arch/arm64/include/asm/page-def.h 
b/arch/arm64/include/asm/page-def.h
index 2403f7b4cdbf..792e9fe881dc 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -11,7 +11,7 @@
 #include 
 
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK  (~(PAGE_SIZE-1))
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 5c845e8d59d9..b180e684fa0d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -273,6 +273,7 @@ choice
 
 config PARISC_PAGE_SIZE_4KB

[PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-26 Thread Arnd Bergmann
From: Arnd Bergmann 

These four architectures define the same Kconfig symbols for configuring
the page size. Move the logic into a common place where it can be shared
with all other architectures.

Signed-off-by: Arnd Bergmann 
---
 arch/Kconfig  | 58 +--
 arch/hexagon/Kconfig  | 25 +++--
 arch/hexagon/include/asm/page.h   |  6 +---
 arch/loongarch/Kconfig| 21 ---
 arch/loongarch/include/asm/page.h | 10 +-
 arch/mips/Kconfig | 58 +++
 arch/mips/include/asm/page.h  | 16 +
 arch/sh/include/asm/page.h| 13 +--
 arch/sh/mm/Kconfig| 42 +++---
 9 files changed, 88 insertions(+), 161 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a5af0edd3eb8..237cea01ed9b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1078,17 +1078,71 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
  and vice-versa 32-bit applications to call 64-bit mmap().
  Required for applications doing different bitness syscalls.
 
+config HAVE_PAGE_SIZE_4KB
+   bool
+
+config HAVE_PAGE_SIZE_8KB
+   bool
+
+config HAVE_PAGE_SIZE_16KB
+   bool
+
+config HAVE_PAGE_SIZE_32KB
+   bool
+
+config HAVE_PAGE_SIZE_64KB
+   bool
+
+config HAVE_PAGE_SIZE_256KB
+   bool
+
+choice
+   prompt "MMU page size"
+
+config PAGE_SIZE_4KB
+   bool "4KB pages"
+   depends on HAVE_PAGE_SIZE_4KB
+
+config PAGE_SIZE_8KB
+   bool "8KB pages"
+   depends on HAVE_PAGE_SIZE_8KB
+
+config PAGE_SIZE_16KB
+   bool "16KB pages"
+   depends on HAVE_PAGE_SIZE_16KB
+
+config PAGE_SIZE_32KB
+   bool "32KB pages"
+   depends on HAVE_PAGE_SIZE_32KB
+
+config PAGE_SIZE_64KB
+   bool "64KB pages"
+   depends on HAVE_PAGE_SIZE_64KB
+
+config PAGE_SIZE_256KB
+   bool "256KB pages"
+   depends on HAVE_PAGE_SIZE_256KB
+
+endchoice
+
 config PAGE_SIZE_LESS_THAN_64KB
def_bool y
-   depends on !ARM64_64K_PAGES
depends on !PAGE_SIZE_64KB
-   depends on !PARISC_PAGE_SIZE_64KB
depends on PAGE_SIZE_LESS_THAN_256KB
 
 config PAGE_SIZE_LESS_THAN_256KB
def_bool y
depends on !PAGE_SIZE_256KB
 
+config PAGE_SHIFT
+   int
+   default 12 if PAGE_SIZE_4KB
+   default 13 if PAGE_SIZE_8KB
+   default 14 if PAGE_SIZE_16KB
+   default 15 if PAGE_SIZE_32KB
+   default 16 if PAGE_SIZE_64KB
+   default 18 if PAGE_SIZE_256KB
+
 # This allows to use a set of generic functions to determine mmap base
 # address by giving priority to top-down scheme only if the process
 # is not in legacy mode (compat task, unlimited stack size or
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index a880ee067d2e..aac46ee1a000 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -8,6 +8,11 @@ config HEXAGON
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_PREEMPT
select DMA_GLOBAL_POOL
+   select FRAME_POINTER
+   select HAVE_PAGE_SIZE_4KB
+   select HAVE_PAGE_SIZE_16KB
+   select HAVE_PAGE_SIZE_64KB
+   select HAVE_PAGE_SIZE_256KB
# Other pending projects/to-do items.
# select HAVE_REGS_AND_STACK_ACCESS_API
# select HAVE_HW_BREAKPOINT if PERF_EVENTS
@@ -120,26 +125,6 @@ config NR_CPUS
  This is purely to save memory - each supported CPU adds
  approximately eight kilobytes to the kernel image.
 
-choice
-   prompt "Kernel page size"
-   default PAGE_SIZE_4KB
-   help
- Changes the default page size; use with caution.
-
-config PAGE_SIZE_4KB
-   bool "4KB"
-
-config PAGE_SIZE_16KB
-   bool "16KB"
-
-config PAGE_SIZE_64KB
-   bool "64KB"
-
-config PAGE_SIZE_256KB
-   bool "256KB"
-
-endchoice
-
 source "kernel/Kconfig.hz"
 
 endmenu
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
index 10f1bc07423c..65c9bac639fa 100644
--- a/arch/hexagon/include/asm/page.h
+++ b/arch/hexagon/include/asm/page.h
@@ -13,27 +13,22 @@
 /*  This is probably not the most graceful way to handle this.  */
 
 #ifdef CONFIG_PAGE_SIZE_4KB
-#define PAGE_SHIFT 12
 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_4KB
 #endif
 
 #ifdef CONFIG_PAGE_SIZE_16KB
-#define PAGE_SHIFT 14
 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_16KB
 #endif
 
 #ifdef CONFIG_PAGE_SIZE_64KB
-#define PAGE_SHIFT 16
 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_64KB
 #endif
 
 #ifdef CONFIG_PAGE_SIZE_256KB
-#define PAGE_SHIFT 18
 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_256KB
 #endif
 
 #ifdef CONFIG_PAGE_SIZE_1MB
-#define PAGE_SHIFT 20
 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB
 #endif
 
@@ -50,6 +45,7 @@
 #define HVM_HUGEPAGE_SIZE 0x5
 #endif
 
+#define PAGE_SHIFT CONFIG_PAGE_SHIFT
 #define PAGE_SIZE  (1UL << PAGE_SHIFT)
 #define PAGE_MASK  (~((1 <<

[PATCH 0/4] arch: mm, vdso: consolidate PAGE_SIZE definition

2024-02-26 Thread Arnd Bergmann
From: Arnd Bergmann 

Naresh noticed that the newly added usage of the PAGE_SIZE macro in
include/vdso/datapage.h introduced a build regression. I had an older
patch that I revived to have this defined through Kconfig rather than
through including asm/page.h, which is not allowed in vdso code.

I rebased and tested on top of the tip/timers/core branch that
introduced the regression. If these patches get added, the
compat VDSOs all build again, but the changes are a bit invasive.

  Arnd

Link: 
https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/
Link: https://lore.kernel.org/all/65dc6c14.170a0220.f4a3f.9...@mx.google.com/

Arnd Bergmann (4):
  arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
  arch: simplify architecture specific page size configuration
  arch: define CONFIG_PAGE_SIZE_*KB on all architectures
  vdso: avoid including asm/page.h

 arch/Kconfig   | 58 --
 arch/alpha/Kconfig |  1 +
 arch/alpha/include/asm/page.h  |  2 +-
 arch/arc/Kconfig   |  3 ++
 arch/arc/include/uapi/asm/page.h   |  6 ++--
 arch/arm/Kconfig   |  1 +
 arch/arm/include/asm/page.h|  2 +-
 arch/arm64/Kconfig | 29 +++
 arch/arm64/include/asm/page-def.h  |  2 +-
 arch/csky/Kconfig  |  1 +
 arch/csky/include/asm/page.h   |  2 +-
 arch/hexagon/Kconfig   | 25 +++--
 arch/hexagon/include/asm/page.h|  6 +---
 arch/loongarch/Kconfig | 21 ---
 arch/loongarch/include/asm/page.h  | 10 +-
 arch/m68k/Kconfig  |  3 ++
 arch/m68k/Kconfig.cpu  |  2 ++
 arch/m68k/include/asm/page.h   |  6 +---
 arch/microblaze/Kconfig|  1 +
 arch/microblaze/include/asm/page.h |  2 +-
 arch/mips/Kconfig  | 58 +++---
 arch/mips/include/asm/page.h   | 16 +
 arch/nios2/Kconfig |  1 +
 arch/nios2/include/asm/page.h  |  2 +-
 arch/openrisc/Kconfig  |  1 +
 arch/openrisc/include/asm/page.h   |  2 +-
 arch/parisc/Kconfig|  3 ++
 arch/parisc/include/asm/page.h | 10 +-
 arch/powerpc/Kconfig   | 31 
 arch/powerpc/include/asm/page.h|  2 +-
 arch/riscv/Kconfig |  1 +
 arch/riscv/include/asm/page.h  |  2 +-
 arch/s390/Kconfig  |  1 +
 arch/s390/include/asm/page.h   |  2 +-
 arch/sh/include/asm/page.h | 13 +--
 arch/sh/mm/Kconfig | 42 +++---
 arch/sparc/Kconfig |  2 ++
 arch/sparc/include/asm/page_32.h   |  2 +-
 arch/sparc/include/asm/page_64.h   |  3 +-
 arch/um/Kconfig|  1 +
 arch/um/include/asm/page.h |  2 +-
 arch/x86/Kconfig   |  1 +
 arch/x86/include/asm/page_types.h  |  2 +-
 arch/xtensa/Kconfig|  1 +
 arch/xtensa/include/asm/page.h |  2 +-
 include/vdso/datapage.h|  4 +--
 scripts/gdb/linux/constants.py.in  |  2 +-
 scripts/gdb/linux/mm.py|  2 +-
 48 files changed, 153 insertions(+), 241 deletions(-)

-- 
2.39.2
To: Thomas Gleixner 
To: Vincenzo Frascino 
To: Kees Cook 
To: Anna-Maria Behnsen 
Cc: Matt Turner 
Cc: Vineet Gupta 
Cc: Russell King 
Cc: Catalin Marinas 
Cc: Guo Ren 
Cc: Brian Cain 
Cc: Huacai Chen 
Cc: Geert Uytterhoeven 
Cc: Michal Simek 
Cc: Thomas Bogendoerfer 
Cc: Helge Deller 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Palmer Dabbelt 
Cc: John Paul Adrian Glaubitz 
Cc: Andreas Larsson 
Cc: Richard Weinberger 
Cc: x...@kernel.org
Cc: Max Filippov 
Cc: Andy Lutomirski 
Cc: Vincenzo Frascino 
Cc: Jan Kiszka 
Cc: Kieran Bingham 
Cc: Andrew Morton 
Cc: Arnd Bergmann 
Cc: linux-ker...@vger.kernel.org
Cc: linux-alpha@vger.kernel.org
Cc: linux-snps-...@lists.infradead.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-c...@vger.kernel.org
Cc: linux-hexa...@vger.kernel.org
Cc: loonga...@lists.linux.dev
Cc: linux-m...@lists.linux-m68k.org
Cc: linux-m...@vger.kernel.org
Cc: linux-openr...@vger.kernel.org
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-...@lists.ozlabs.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-s...@vger.kernel.org
Cc: linux...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
Cc: linux...@lists.infradead.org



[PATCH 2/2] rtc/alpha: remove legacy rtc driver

2019-10-23 Thread Arnd Bergmann
The old drivers/char/rtc.c driver was originally the implementation
for x86 PCs but got subsequently replaced by the rtc class driver
on all architectures except alpha.

Move alpha over to the portable driver and remove the old one
for good.

The CONFIG_JS_RTC option was only ever used on SPARC32 but
has not been available for many years, this was used to build
the same rtc driver with a different module name.

Cc: Richard Henderson 
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: linux-alpha@vger.kernel.org
Cc: Paul Gortmaker 
Signed-off-by: Arnd Bergmann 
---
This was last discussed in early 2018 in
https://lore.kernel.org/lkml/CAK8P3a0QZNY+K+V1HG056xCerz=_l2jh5ufz+2lwkdqkw5z...@mail.gmail.com/

Nobody ever replied there, so let's try this instead.
If there is any reason to keep the driver after all,
please let us know.
---
 arch/alpha/configs/defconfig |3 +-
 drivers/char/Kconfig |   56 --
 drivers/char/Makefile|4 -
 drivers/char/rtc.c   | 1311 --
 4 files changed, 2 insertions(+), 1372 deletions(-)
 delete mode 100644 drivers/char/rtc.c

diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index f4ec420d7f2d..e10c1be3c0d1 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -53,7 +53,8 @@ CONFIG_NET_PCI=y
 CONFIG_YELLOWFIN=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_RTC=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
 CONFIG_EXT2_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_ISO9660_FS=y
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index dabbf3f519c6..c2ac4f257c82 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -243,62 +243,6 @@ config NVRAM
  To compile this driver as a module, choose M here: the
  module will be called nvram.
 
-#
-# These legacy RTC drivers just cause too many conflicts with the generic
-# RTC framework ... let's not even try to coexist any more.
-#
-if RTC_LIB=n
-
-config RTC
-   tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)"
-   depends on ALPHA
-   ---help---
- If you say Y here and create a character special file /dev/rtc with
- major number 10 and minor number 135 using mknod ("man mknod"), you
- will get access to the real time clock (or hardware clock) built
- into your computer.
-
- Every PC has such a clock built in. It can be used to generate
- signals from as low as 1Hz up to 8192Hz, and can also be used
- as a 24 hour alarm. It reports status information via the file
- /proc/driver/rtc and its behaviour is set by various ioctls on
- /dev/rtc.
-
- If you run Linux on a multiprocessor machine and said Y to
- "Symmetric Multi Processing" above, you should say Y here to read
- and set the RTC in an SMP compatible fashion.
-
- If you think you have a use for such a device (such as periodic data
- sampling), then say Y here, and read 

- for details.
-
- To compile this driver as a module, choose M here: the
- module will be called rtc.
-
-config JS_RTC
-   tristate "Enhanced Real Time Clock Support"
-   depends on SPARC32 && PCI
-   ---help---
- If you say Y here and create a character special file /dev/rtc with
- major number 10 and minor number 135 using mknod ("man mknod"), you
- will get access to the real time clock (or hardware clock) built
- into your computer.
-
- Every PC has such a clock built in. It can be used to generate
- signals from as low as 1Hz up to 8192Hz, and can also be used
- as a 24 hour alarm. It reports status information via the file
- /proc/driver/rtc and its behaviour is set by various ioctls on
- /dev/rtc.
-
- If you think you have a use for such a device (such as periodic data
- sampling), then say Y here, and read 

- for details.
-
- To compile this driver as a module, choose M here: the
- module will be called js-rtc.
-
-endif # RTC_LIB
-
 config DTLK
tristate "Double Talk PC internal speech card support"
depends on ISA
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index abe3138b1f5a..ffce287ef415 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_APM_EMULATION)   += apm-emulation.o
 obj-$(CONFIG_DTLK) += dtlk.o
 obj-$(CONFIG_APPLICOM) += applicom.o
 obj-$(CONFIG_SONYPI)   += sonypi.o
-obj-$(CONFIG_RTC)  += rtc.o
 obj-$(CONFIG_HPET) += hpet.o
 obj-$(CONFIG_XILINX_HWICAP)+= xilinx_hwicap/
 obj-$(CONFIG_NVRAM)+= nvram.o
@@ -45,9 +44,6 @@ obj-$(CONFIG_TCG_TPM) += tpm/
 
 obj-$(CONFIG_PS3_FLASH)+= ps3flash.o
 
-obj-$(CONFIG_JS_RTC)   += js-rtc.o
-js-rtc-y = rtc.o
-
 obj-$(

Re: [PATCH v9 08/10] open: openat2(2) syscall

2019-07-18 Thread Arnd Bergmann
On Thu, Jul 18, 2019 at 6:12 PM Aleksa Sarai  wrote:
> On 2019-07-18, Arnd Bergmann  wrote:
> > On Sat, Jul 6, 2019 at 5:00 PM Aleksa Sarai  wrote:
> >
> > In fact, that seems similar enough to the existing openat() that I think
> > you could also just add the fifth argument to the existing call when
> > a newly defined flag is set, similarly to how we only use the 'mode'
> > argument when O_CREAT or O_TMPFILE are set.
>
> I considered doing this (and even had a preliminary version of it), but
> I discovered that I was not in favour of this idea -- once I started to
> write tests using it -- for a few reasons:
>
>   1. It doesn't really allow for clean extension for a future 6th
>  argument (because you are using up O_* flags to signify "use the
>  next argument", and O_* flags don't give -EINVAL if they're
>  unknown). Now, yes you can do the on-start runtime check that
>  everyone does -- but I've never really liked having to do it.
>
>  Having reserved padding for later extensions (that is actually
>  checked and gives -EINVAL) matches more modern syscall designs.
>
>   2. I really was hoping that the variadic openat(2) could be done away
>  using this union setup (Linus said he didn't like it, and suggested
>  using something like 'struct stat' as an argument for openat(2) --
>  though personally I am not sure I would personally like to use an
>  interface like that).
>
>   3. In order to avoid wasting a syscall argument for mode/mask you need
>  to either have something like your suggested mode_mask (which makes
>  the syscall arguments less consistent) or have some sort of
>  mode-like argument that is treated specially (which is really awful
>  on multiple levels -- this one I also tried and even wrote my
>  original tests using). And in both cases, the shims for
>  open{,at}(2) are somewhat less clean.

These are all good reasons, thanks for providing the background.

> All of that being said, I'd be happy to switch to whatever you think
> makes the most sense. As long as it's possible to get an O_PATH with
> RESOLVE_IN_ROOT set, I'm happy.

I don't feel I should be in charge of making the decision. I'd still
prefer avoiding the indirect argument structure because

4. it's inconsistent with most other syscalls

5. you get the same problem with seccomp and strace that
   clone3() has -- these and others only track the register
   arguments by default.

6. copying the structure adds a small overhead compared to
   passing registers

7. the calling conventions may be inconvenient for  a user space
   library, so you end up with different prototypes for the low-level
   syscall and the libc abstraction.

I don't see any of the above seven points as a showstopper
either way, so I hope someone else has a strong opinion
and can make the decision easier for you.

In the meantime just keep what you have, so you don't have
to change it multiple times.

   Arnd


Re: [PATCH v9 08/10] open: openat2(2) syscall

2019-07-18 Thread Arnd Bergmann
On Sat, Jul 6, 2019 at 5:00 PM Aleksa Sarai  wrote:

> diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
> b/arch/alpha/kernel/syscalls/syscall.tbl
> index 9e7704e44f6d..1703d048c141 100644
> --- a/arch/alpha/kernel/syscalls/syscall.tbl
> +++ b/arch/alpha/kernel/syscalls/syscall.tbl
> @@ -461,6 +461,7 @@
>  530common  getegid sys_getegid
>  531common  geteuid sys_geteuid
>  532common  getppid sys_getppid
> +533common  openat2 sys_openat2
>  # all other architectures have common numbers for new syscall, alpha
>  # is the exception.
>  534common  pidfd_send_signal   sys_pidfd_send_signal

My plan here was to add new syscalls in the same order as everwhere else,
just with the number 110 higher. In the long run, I hope we can automate
this.

> diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
> index aaf479a9e92d..4ad262698396 100644
> --- a/arch/arm/tools/syscall.tbl
> +++ b/arch/arm/tools/syscall.tbl
> @@ -447,3 +447,4 @@
>  431common  fsconfigsys_fsconfig
>  432common  fsmount sys_fsmount
>  433common  fspick  sys_fspick
> +434common  openat2 sys_openat2

434 is already used in linux-next, I suggest you use 437 (Palmer
just submitted fchmodat4, which could become 436).

> +/**
> + * Arguments for how openat2(2) should open the target path. If @extra is 
> zero,
> + * then openat2(2) is identical to openat(2).
> + *
> + * @flags: O_* flags (unknown flags ignored).
> + * @mode: O_CREAT file mode (ignored otherwise).
> + * @upgrade_mask: restrict how the O_PATH may be re-opened (ignored 
> otherwise).
> + * @resolve: RESOLVE_* flags (-EINVAL on unknown flags).
> + * @reserved: reserved for future extensions, must be zeroed.
> + */
> +struct open_how {
> +   __u32 flags;
> +   union {
> +   __u16 mode;
> +   __u16 upgrade_mask;
> +   };
> +   __u16 resolve;
> +   __u64 reserved[7]; /* must be zeroed */
> +};

We can have system calls with up to six arguments on all architectures, so
this could still be done more conventionally without the indirection: like

long openat2(int dfd, const char __user * filename, int flags, mode_t
mode_mask, __u16 resolve);

In fact, that seems similar enough to the existing openat() that I think
you could also just add the fifth argument to the existing call when
a newly defined flag is set, similarly to how we only use the 'mode'
argument when O_CREAT or O_TMPFILE are set.

> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h

This file seems to lack a declaration for the system call, which means it
will cause a build failure on some architectures, e.g. arch/arc/kernel/sys.c:

#define __SYSCALL(nr, call) [nr] = (call),
void *sys_call_table[NR_syscalls] = {
[0 ... NR_syscalls-1] = sys_ni_syscall,
#include 
};

Arnd


Re: [PATCH v1 1/2] open: add close_range()

2019-05-24 Thread Arnd Bergmann
On Thu, May 23, 2019 at 6:33 PM Christian Brauner  wrote:
> On Thu, May 23, 2019 at 07:22:17PM +0300, Konstantin Khlebnikov wrote:
> > On 22.05.2019 18:52, Christian Brauner wrote:> This adds the close_range() 
> > syscall. It allows to efficiently close a range
> > >   22 files changed, 100 insertions(+), 9 deletions(-)
> > >
> >
> > It would be better to split arch/ wiring into separate patch for better 
> > readability.
>
> Ok. You mean only do x86 - seems to be the standard - and then move the
> others into a separate patch? Doesn't seem worth to have a patch
> per-arch, I'd think.

I think I would prefer the first patch to just add the call without wiring it up
anywhere, and a second patch do add it on all architectures including x86.

 Arnd


Re: [PATCH v2 1/2] pid: add pidfd_open()

2019-05-20 Thread Arnd Bergmann
On Mon, May 20, 2019 at 4:48 PM Christian Brauner  wrote:
>
> On Mon, May 20, 2019 at 04:37:03PM +0200, Arnd Bergmann wrote:
> > On Mon, May 20, 2019 at 3:46 PM Christian Brauner  
> > wrote:
> > >
> > > In line with Arnd's recent changes to consolidate syscall numbers across
> > > architectures, I have added the pidfd_open() syscall to all architectures
> > > at the same time.
> >
> > Thanks! I've checked that the ones you have added are all
> > done correctly. However, double-checking that you got all of them,
> > I noticed that you missed mips-o32 and mips-n64. With those added:
> >
> > Acked-by: Arnd Bergmann 
>
> Perfect, will plumb mips-o32 and mips-n64 and resend once more with your
> ack added.
> Sidenote: You plan on merging the common syscall tables or will there be
> a script to do this work per-arch in the future?

David Howells also asked about this. I think having a common table will
be best in the long run, patches welcome.

As you noticed, there are still a few minor differences between the files
on mips, arm, x86, alpha and s390, and we are missing the .tbl files
for arm-compat and asm-generic, as well as an architecture independent
script.

Once that is all taken care of, we can move the entries for syscall
403 and higher into a common file, and change the script to pick
up the contents from there in addition to the architecture specific
file.

  Arnd


Re: [PATCH v2 1/2] pid: add pidfd_open()

2019-05-20 Thread Arnd Bergmann
On Mon, May 20, 2019 at 3:46 PM Christian Brauner  wrote:
>
> In line with Arnd's recent changes to consolidate syscall numbers across
> architectures, I have added the pidfd_open() syscall to all architectures
> at the same time.

Thanks! I've checked that the ones you have added are all
done correctly. However, double-checking that you got all of them,
I noticed that you missed mips-o32 and mips-n64. With those added:

Acked-by: Arnd Bergmann 


Re: [GIT PULL 1/4] ARM: SoC platform updates

2019-05-16 Thread Arnd Bergmann
On Thu, May 16, 2019 at 5:34 PM Linus Torvalds
 wrote:
>
> On Wed, May 15, 2019 at 11:43 PM Olof Johansson  wrote:
> >
> > SoC updates, mostly refactorings and cleanups of old legacy platforms.
> > Major themes this release:
>
> Hmm. This brings in a new warning:
>
>   drivers/clocksource/timer-ixp4xx.c:78:20: warning:
> ‘ixp4xx_read_sched_clock’ defined but not used [-Wunused-function]
>
> because that drivers is enabled for build testing, but that function
> is only used under
>
>   #ifdef CONFIG_ARM
> sched_clock_register(ixp4xx_read_sched_clock, 32, timer_freq);
>   #endif
>
> It's not clear why that #ifdef is there. This driver only builds
> non-ARM when COMPILE_TEST is enabled, and that #ifdef actually breaks
> that build test.
>
> I'm going to remove that #ifdef in my merge, because I do *not* want
> to see new warnings, and it doesn't seem to make any sense.
>
> Maybe that's the wrong resolution, please holler and let me know if
> you want something else.

As far as I can tell, that is the best fix, thanks for the cleanup!

  Arnd


[PATCH net-next 3/3] net: socket: implement 64-bit timestamps

2019-04-16 Thread Arnd Bergmann
The 'timeval' and 'timespec' data structures used for socket timestamps
are going to be redefined in user space based on 64-bit time_t in future
versions of the C library to deal with the y2038 overflow problem,
which breaks the ABI definition.

Unlike many modern ioctl commands, SIOCGSTAMP and SIOCGSTAMPNS do not
use the _IOR() macro to encode the size of the transferred data, so it
remains ambiguous whether the application uses the old or new layout.

The best workaround I could find is rather ugly: we redefine the command
code based on the size of the respective data structure with a ternary
operator. This lets it get evaluated as late as possible, hopefully after
that structure is visible to the caller. We cannot use an #ifdef here,
because inux/sockios.h might have been included before any libc header
that could determine the size of time_t.

The ioctl implementation now interprets the new command codes as always
referring to the 64-bit structure on all architectures, while the old
architecture specific command code still refers to the old architecture
specific layout. The new command number is only used when they are
actually different.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/include/uapi/asm/sockios.h  |  4 ++--
 arch/mips/include/uapi/asm/sockios.h   |  4 ++--
 arch/sh/include/uapi/asm/sockios.h |  5 +++--
 arch/xtensa/include/uapi/asm/sockios.h |  4 ++--
 include/uapi/asm-generic/sockios.h |  4 ++--
 include/uapi/linux/sockios.h   | 21 +
 net/socket.c   | 24 ++--
 7 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/sockios.h 
b/arch/alpha/include/uapi/asm/sockios.h
index ba287e4b01bf..af92bc27c3be 100644
--- a/arch/alpha/include/uapi/asm/sockios.h
+++ b/arch/alpha/include/uapi/asm/sockios.h
@@ -11,7 +11,7 @@
 #define SIOCSPGRP  _IOW('s', 8, pid_t)
 #define SIOCGPGRP  _IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP 0x8906  /* Get stamp (timeval) */
-#define SIOCGSTAMPNS   0x8907  /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906  /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */
 
 #endif /* _ASM_ALPHA_SOCKIOS_H */
diff --git a/arch/mips/include/uapi/asm/sockios.h 
b/arch/mips/include/uapi/asm/sockios.h
index 5b40a88593fa..66f60234f290 100644
--- a/arch/mips/include/uapi/asm/sockios.h
+++ b/arch/mips/include/uapi/asm/sockios.h
@@ -21,7 +21,7 @@
 #define SIOCSPGRP  _IOW('s', 8, pid_t)
 #define SIOCGPGRP  _IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP 0x8906  /* Get stamp (timeval) */
-#define SIOCGSTAMPNS   0x8907  /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906  /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */
 
 #endif /* _ASM_SOCKIOS_H */
diff --git a/arch/sh/include/uapi/asm/sockios.h 
b/arch/sh/include/uapi/asm/sockios.h
index 17313d2c3527..ef18a668456d 100644
--- a/arch/sh/include/uapi/asm/sockios.h
+++ b/arch/sh/include/uapi/asm/sockios.h
@@ -10,6 +10,7 @@
 #define SIOCSPGRP  _IOW('s', 8, pid_t)
 #define SIOCGPGRP  _IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
-#define SIOCGSTAMPNS   _IOR('s', 101, struct timespec) /* Get stamp (timespec) 
*/
+#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp 
(timespec) */
+
 #endif /* __ASM_SH_SOCKIOS_H */
diff --git a/arch/xtensa/include/uapi/asm/sockios.h 
b/arch/xtensa/include/uapi/asm/sockios.h
index fb8ac3607189..1a1f58f4b75a 100644
--- a/arch/xtensa/include/uapi/asm/sockios.h
+++ b/arch/xtensa/include/uapi/asm/sockios.h
@@ -26,7 +26,7 @@
 #define SIOCSPGRP  _IOW('s', 8, pid_t)
 #define SIOCGPGRP  _IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP 0x8906  /* Get stamp (timeval) */
-#define SIOCGSTAMPNS   0x8907  /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906  /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */
 
 #endif /* _XTENSA_SOCKIOS_H */
diff --git a/include/uapi/asm-generic/sockios.h 
b/include/uapi/asm-generic/sockios.h
index 64f658c7cec2..44fa3ed70483 100644
--- a/include/uapi/asm-generic/sockios.h
+++ b/include/uapi/asm-generic/sockios.h
@@ -8,7 +8,7 @@
 #define FIOGETOWN  0x8903
 #define SIOCGPGRP  0x8904
 #define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906  /* Get stamp (timeval) */
-#define SIOCGSTAMPNS   0x8907  /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906  /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */
 
 #endif /* __ASM_GENERIC_SOCKIOS_H */
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index d393e9ed3964..7d1bccbbef78 100644
--- a/include/uapi/linux

[PATCH] [v2] arch: add pidfd and io_uring syscalls everywhere

2019-04-15 Thread Arnd Bergmann
Add the io_uring and pidfd_send_signal system calls to all architectures.

These system calls are designed to handle both native and compat tasks,
so all entries are the same across architectures, only arm-compat and
the generic tale still use an old format.

Acked-by: Michael Ellerman  (powerpc)
Acked-by: Heiko Carstens  (s390)
Acked-by: Geert Uytterhoeven 
Signed-off-by: Arnd Bergmann 
---
Changes since v1:
- fix s390 table
- use 'n64' tag in mips-n64 instead of common.
---
 arch/alpha/kernel/syscalls/syscall.tbl  | 4 
 arch/arm/tools/syscall.tbl  | 4 
 arch/arm64/include/asm/unistd.h | 2 +-
 arch/arm64/include/asm/unistd32.h   | 8 
 arch/ia64/kernel/syscalls/syscall.tbl   | 4 
 arch/m68k/kernel/syscalls/syscall.tbl   | 4 
 arch/microblaze/kernel/syscalls/syscall.tbl | 4 
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 4 
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 4 
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 4 
 arch/parisc/kernel/syscalls/syscall.tbl | 4 
 arch/powerpc/kernel/syscalls/syscall.tbl| 4 
 arch/s390/kernel/syscalls/syscall.tbl   | 4 
 arch/sh/kernel/syscalls/syscall.tbl | 4 
 arch/sparc/kernel/syscalls/syscall.tbl  | 4 
 arch/xtensa/kernel/syscalls/syscall.tbl | 4 
 16 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 63ed39cbd3bd..165f268beafc 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
 532common  getppid sys_getppid
 # all other architectures have common numbers for new syscall, alpha
 # is the exception.
+534common  pidfd_send_signal   sys_pidfd_send_signal
+535common  io_uring_setup  sys_io_uring_setup
+536common  io_uring_enter  sys_io_uring_enter
+537common  io_uring_register   sys_io_uring_register
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 9016f4081bb9..0393917eaa57 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
 421common  rt_sigtimedwait_time64  sys_rt_sigtimedwait
 422common  futex_time64sys_futex
 423common  sched_rr_get_interval_time64sys_sched_rr_get_interval
+424common  pidfd_send_signal   sys_pidfd_send_signal
+425common  io_uring_setup  sys_io_uring_setup
+426common  io_uring_enter  sys_io_uring_enter
+427common  io_uring_register   sys_io_uring_register
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 310d8f1cae7a..c6946fe640e6 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -49,7 +49,7 @@
 #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls   424
+#define __NR_compat_syscalls   428
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 5590f2623690..23f1a44acada 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, 
compat_sys_rt_sigtimedwait_time64)
 __SYSCALL(__NR_futex_time64, sys_futex)
 #define __NR_sched_rr_get_interval_time64 423
 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index ab9cda5f6136..56e3d0b685e1 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
 332common  pkey_free   sys_pkey_free
 333common  rseqsys_rseq
 # 334 through 423 are reserved to sync up with other architectures
+424common  pidfd_send_signal   sys_pidfd_send_signal
+425common  io_uring_setup  sys_io_uring_setup
+426common  io_uring_enter  sys_io_uring_enter
+427common  io_uring_register   sys_io_uring_register
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 125c14178979..df4ec3ec71d1 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl

Re: [PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere

2019-03-31 Thread Arnd Bergmann
On Sun, Mar 31, 2019 at 5:47 PM Michael Ellerman  wrote:
>
> Arnd Bergmann  writes:
> > Add the io_uring and pidfd_send_signal system calls to all architectures.
> >
> > These system calls are designed to handle both native and compat tasks,
> > so all entries are the same across architectures, only arm-compat and
> > the generic tale still use an old format.
> >
> > Signed-off-by: Arnd Bergmann 
> > ---
> >  arch/alpha/kernel/syscalls/syscall.tbl  | 4 
> >  arch/arm/tools/syscall.tbl  | 4 
> >  arch/arm64/include/asm/unistd.h | 2 +-
> >  arch/arm64/include/asm/unistd32.h   | 8 
> >  arch/ia64/kernel/syscalls/syscall.tbl   | 4 
> >  arch/m68k/kernel/syscalls/syscall.tbl   | 4 
> >  arch/microblaze/kernel/syscalls/syscall.tbl | 4 
> >  arch/mips/kernel/syscalls/syscall_n32.tbl   | 4 
> >  arch/mips/kernel/syscalls/syscall_n64.tbl   | 4 
> >  arch/mips/kernel/syscalls/syscall_o32.tbl   | 4 
> >  arch/parisc/kernel/syscalls/syscall.tbl | 4 
> >  arch/powerpc/kernel/syscalls/syscall.tbl| 4 
>
> Have you done any testing?
>
> I'd rather not wire up syscalls that have never been tested at all on
> powerpc.

No, I have not. I did review the system calls carefully and added the first
patch to fix the bug on x86 compat mode before adding the same bug
on the other compat architectures though ;-)

Generally, my feeling is that adding system calls is not fundamentally
different from adding other ABIs, and we should really do it at
the same time across all architectures, rather than waiting for each
maintainer to get around to reviewing and testing the new calls
first. This is not a problem on powerpc, but a lot of other architectures
are less active, which is how we have always ended up with
different sets of system calls across architectures.

The problem here is that this makes it harder for the C library to
know when a system call is guaranteed to be available. glibc
still needs a feature test for newly added syscalls to see if they
are working (they might be backported to an older kernel, or
disabled), but whenever the minimum kernel version is increased,
it makes sense to drop those checks and assume non-optional
system calls will work if they were part of that minimum version.

In the future, I'd hope that any new system calls get added
right away on all architectures when they land (it was a bit
tricky this time, because I still did a bunch of reworks that
conflicted with the new calls). Bugs will happen of course, but
I think adding them sooner makes it more likely to catch those
bugs early on so we have a chance to fix them properly,
and need fewer arch specific workarounds (ideally none)
for system calls.

   Arnd


Re: [PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere

2019-03-26 Thread Arnd Bergmann
On Mon, Mar 25, 2019 at 6:37 PM Paul Burton  wrote:
> On Mon, Mar 25, 2019 at 03:47:37PM +0100, Arnd Bergmann wrote:
> > Add the io_uring and pidfd_send_signal system calls to all architectures.
> >
> > These system calls are designed to handle both native and compat tasks,
> > so all entries are the same across architectures, only arm-compat and
> > the generic tale still use an old format.
> >
> > Signed-off-by: Arnd Bergmann 
> > ---
> >%
> > diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl 
> > b/arch/mips/kernel/syscalls/syscall_n64.tbl
> > index c85502e67b44..c4a49f7d57bb 100644
> > --- a/arch/mips/kernel/syscalls/syscall_n64.tbl
> > +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
> > @@ -338,3 +338,7 @@
> >  327  n64 rseqsys_rseq
> >  328  n64 io_pgetevents   sys_io_pgetevents
> >  # 329 through 423 are reserved to sync up with other architectures
> > +424  common  pidfd_send_signal   sys_pidfd_send_signal
> > +425  common  io_uring_setup  sys_io_uring_setup
> > +426  common  io_uring_enter  sys_io_uring_enter
> > +427  common  io_uring_register   sys_io_uring_register
>
> Shouldn't these declare the ABI as "n64"?
>
> I don't see anywhere that it would actually change the generated code,
> but a comment at the top of the file says that every entry should use
> "n64" and so far they all do. Did you have something else in mind here?

You are right, the use of 'common' here is unintentional but harmless,
and I should have used 'n64' here.

We may decide to do things differently in the future, i.e. we could
have just a single global file for newly added system calls once
it turns out that the tables are consistent across all architectures,
but I'd probably go on with the separate identical entries for a bit
before changing that.

 Arnd


[PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere

2019-03-25 Thread Arnd Bergmann
Add the io_uring and pidfd_send_signal system calls to all architectures.

These system calls are designed to handle both native and compat tasks,
so all entries are the same across architectures, only arm-compat and
the generic tale still use an old format.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/kernel/syscalls/syscall.tbl  | 4 
 arch/arm/tools/syscall.tbl  | 4 
 arch/arm64/include/asm/unistd.h | 2 +-
 arch/arm64/include/asm/unistd32.h   | 8 
 arch/ia64/kernel/syscalls/syscall.tbl   | 4 
 arch/m68k/kernel/syscalls/syscall.tbl   | 4 
 arch/microblaze/kernel/syscalls/syscall.tbl | 4 
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 4 
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 4 
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 4 
 arch/parisc/kernel/syscalls/syscall.tbl | 4 
 arch/powerpc/kernel/syscalls/syscall.tbl| 4 
 arch/s390/kernel/syscalls/syscall.tbl   | 4 
 arch/sh/kernel/syscalls/syscall.tbl | 4 
 arch/sparc/kernel/syscalls/syscall.tbl  | 4 
 arch/xtensa/kernel/syscalls/syscall.tbl | 4 
 16 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 63ed39cbd3bd..165f268beafc 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
 532common  getppid sys_getppid
 # all other architectures have common numbers for new syscall, alpha
 # is the exception.
+534common  pidfd_send_signal   sys_pidfd_send_signal
+535common  io_uring_setup  sys_io_uring_setup
+536common  io_uring_enter  sys_io_uring_enter
+537common  io_uring_register   sys_io_uring_register
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 9016f4081bb9..0393917eaa57 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
 421common  rt_sigtimedwait_time64  sys_rt_sigtimedwait
 422common  futex_time64sys_futex
 423common  sched_rr_get_interval_time64sys_sched_rr_get_interval
+424common  pidfd_send_signal   sys_pidfd_send_signal
+425common  io_uring_setup  sys_io_uring_setup
+426common  io_uring_enter  sys_io_uring_enter
+427common  io_uring_register   sys_io_uring_register
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 310d8f1cae7a..c6946fe640e6 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -49,7 +49,7 @@
 #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls   424
+#define __NR_compat_syscalls   428
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 5590f2623690..23f1a44acada 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, 
compat_sys_rt_sigtimedwait_time64)
 __SYSCALL(__NR_futex_time64, sys_futex)
 #define __NR_sched_rr_get_interval_time64 423
 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index ab9cda5f6136..56e3d0b685e1 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
 332common  pkey_free   sys_pkey_free
 333common  rseqsys_rseq
 # 334 through 423 are reserved to sync up with other architectures
+424common  pidfd_send_signal   sys_pidfd_send_signal
+425common  io_uring_setup  sys_io_uring_setup
+426common  io_uring_enter  sys_io_uring_enter
+427common  io_uring_register   sys_io_uring_register
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 125c14178979..df4ec3ec71d1 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
 421common  rt_sigtimedwait_time64  sys_rt_sigtimedwait
 422common  futex_time64sys_futex

Re: [PATCH] y2038: fix socket.h header inclusion

2019-03-18 Thread Arnd Bergmann
On Mon, Mar 18, 2019 at 2:12 PM Florian Weimer  wrote:
> > On Mon, Mar 18, 2019 at 10:25 AM Florian Weimer  wrote:
> >>
> >> * Arnd Bergmann:
> >>
> >> > Should we just remove __kernel_fd_set from the exported headers and
> >> > define the internal fd_set directly in include/linux/types.h? (Adding the
> >> > folks from the old thread to Cc).
> >>
> >> The type is used in the sanitizers, but incorrectly.  They assume that
> >> FD_SETSIZE is always 1024.  (The existence of __kernel_fd_set is
> >> itself somewhat questionable because it leads to such bugs.)
> >> Moving around the type could cause a build failure in the sanitizers, but 
> >> I'm
> >> not entirely clear how the UAPI headers are included there.
> >
> > It looks like sanitizer_platform_limits_posix.cc includes
> > linux/posix_types.h to ensure that __kernel_fd_set is the same
> > size as __sanitizer___kernel_fd_set, and then it uses the
> > latter afterwards.
> >
> > What I don't see here is what kind of operation is actually done
> > on the data, I only see a cast to void.
>
> I think it is used to assert that the select family of system calls
> writes to the 1024 bits for each of the passed pointers.

Yes, that is what I expected to see in libsanitizer, I just couldn't
find any code that actually does this check.

> Which is not actually true—the write size is controlled by the
> file descriptor count argument.

Yes, of course. In fact, I see multiple possible problems that

- kernel reading uninitialized data if 'FD_ZERO()' was
  used with a shorter size than the count argument.
- kernel writing beyond the fd_set data on stack
  when the declaration had a shorter size than the count
  argument.

Each one could happen either because __FD_SETSIZE
is smaller than 'count', or because kernel and user space
disagree on the element size (32 vs 64 bit on x32).

> > If libsanitizer actually does
> > anything interesting here, we should definitely fix it to use the
> > correct size, especially since this is actually something that
> > can trigger a buffer overflow in subtle ways when used carelessly.
> > See for example [1], which we still have not addressed
>
> The footnote is missing.

Sorry, I meant [1] https://patchwork.kernel.org/patch/10245053/

> > For this specific use (and probably others like it), renaming the
> > fds_bits member to __kernel_fds_bits or something like that
> > would keep user space still compiling. That would only break
> > if someone was using __kernel_fd_set, and actually doing
> > bit operations on it. glibc uses '__fds_bits' unless __USE_XOPEN
> > is set, so maybe we should use use that name unconditionally.
>
> Please use something that is more obviously Linux-specific.

Ok, so not '__fds_bits'.

Is '__kernel_fds_bits' ok? I would prefer to keep at least the
name __kernel_ namespace that we have for typedefs and the
occasional struct tag.

Arnd


Re: [PATCH] y2038: fix socket.h header inclusion

2019-03-18 Thread Arnd Bergmann
On Sun, Mar 17, 2019 at 7:20 PM Deepa Dinamani  wrote:
> On Fri, Mar 15, 2019 at 2:20 PM Florian Weimer  wrote:
> > > On Thu, Mar 14, 2019 at 7:41 PM Florian Weimer  wrote:
> > >> > diff --git a/arch/alpha/include/uapi/asm/socket.h
> > >> > b/arch/alpha/include/uapi/asm/socket.h
> > >> > index 0d0fddb7e738..976e89b116e5 100644
> > >> > --- a/arch/alpha/include/uapi/asm/socket.h
> > >> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > >> > @@ -2,8 +2,8 @@
> > >> >  #ifndef _UAPI_ASM_SOCKET_H
> > >> >  #define _UAPI_ASM_SOCKET_H
> > >> >
> > >> > +#include 
> > >> >  #include 
> > >> > -#include 
> > >>
> > >> This breaks POSIX conformance in glibc because the
> > >>  header is not namespace clean.  It contains the
> > >> identifiers fds_bits and val:
> > >>
> > >> unsigned long fds_bits[__FD_SETSIZE / (8 * sizeof(long))];
> > >>
> > >> int val[2];
> > >
> > > What is problematic about the struct members here? I had thought that
> > > only the struct names have to be in a namespace to be usable here,
> > > but not the members.
> >
> > According POSIX, a user can do this:
> >
> > #define fds_bits 1024
> >
> > before including the  header file.  Similarly for val.
> >
> > Since glibc pulls in  indirectly, the result is a parse
> > error, even though the programmer did nothing wrong (fds_bits is not
> > an identifier used by POSIX, nor is it in the implementation
> > namespace, ans  is a POSIX header).

Ok, I see. Thanks for the explanation!

> > > We could use asm/posix_types.h instead of linux/posix_types.h,
> > > would that address your concern?
> >
> > It should fix the fds_bits case, I think.  But
> >  still uses val, so that part of the issue
> > remains.
>
> Would moving kernel namespace types(__kernel prefix) to a different
> header file(kernel_types.h?) and then including this from
> linux/posix_types.h.
> And, for socket.h just including kernel_types.h make sense?

I fear we have considered linux/posix_types.h to be something that
can be included anywhere for a long time, so it may be better to
ensure that this is actually the case, and avoid the problem with those
two structures but leave the rest untouched.

I think we can move  __kernel_fsid_t into include/uapi/asm-generic/statfs.h,
which is the only thing that needs it anyway. We have two definitions of
it today, the non-generic one being for mips32, but incidentally there was
a patch the other day to remove that and use the generic one instead.

With that done, we can change asm/socket.h to just use asm/posix_types.h.

I would still prefer to solve the problem for linux/posix_types.h as well,
but I'm not sure even how __kernel_fd_set  is used today in
user space, if at all. Commit 8ded2bbc1845 ("posix_types.h: Cleanup
stale __NFDBITS and related definitions") removed most of the fd_set
definition after a long discussion [1], and since then it has been
basically impossible to use 'struct fd_set'  from the kernel in a
meaningful way without including the libc headers or duplicating
them.

Should we just remove __kernel_fd_set from the exported headers and
define the internal fd_set directly in include/linux/types.h? (Adding the
folks from the old thread to Cc).

  Arnd

[1] https://lore.kernel.org/lkml/20120724181209.ga10...@zod.bos.redhat.com/t/


Re: [PATCH] y2038: fix socket.h header inclusion

2019-03-15 Thread Arnd Bergmann
On Thu, Mar 14, 2019 at 7:41 PM Florian Weimer  wrote:
>
> * Arnd Bergmann:
>
> > diff --git a/arch/alpha/include/uapi/asm/socket.h 
> > b/arch/alpha/include/uapi/asm/socket.h
> > index 0d0fddb7e738..976e89b116e5 100644
> > --- a/arch/alpha/include/uapi/asm/socket.h
> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > @@ -2,8 +2,8 @@
> >  #ifndef _UAPI_ASM_SOCKET_H
> >  #define _UAPI_ASM_SOCKET_H
> >
> > +#include 
> >  #include 
> > -#include 
>
> This breaks POSIX conformance in glibc because the
>  header is not namespace clean.  It contains the
> identifiers fds_bits and val:
>
> unsigned long fds_bits[__FD_SETSIZE / (8 * sizeof(long))];
>
> int val[2];

What is problematic about the struct members here? I had thought that
only the struct names have to be in a namespace to be usable here,
but not the members.

The only part that might be problematic is

#undef __FD_SETSIZE
#define __FD_SETSIZE1024

but we already get that from a number of other inclusions of
linux/posix_types.h. Is this what you mean?

> We could duplicate some of the SO_* constants for POSIX mode in glibc,
> but it would be nice to avoid that.
>
> Is there a different way of fixing this on the kernel side that avoids
> including ?

We could use asm/posix_types.h instead of linux/posix_types.h,
would that address your concern?

   Arnd


Re: [PATCH] x86: Deprecate a.out support

2019-03-11 Thread Arnd Bergmann
On Mon, Mar 11, 2019 at 8:47 PM Måns Rullgård  wrote:
> Linus Torvalds  writes:
> > On Mon, Mar 11, 2019 at 11:08 AM Måns Rullgård  wrote:
> >>
> >> The latest version I have is 5.1, and that uses ECOFF.
> >
> > ECOFF _is_ a.out as far as Linux is concerned.
> >
> > So Linux basically treats ECOFF as "regular a.out with just some
> > header extensions".
> >
> > We don't have any specific support for ECOFF.
> >
> > I _think_. Again, it's been years and years.
>
> Right, which is why killing a.out entirely would have the unfortunate
> effect of also removing the OSF/1 compatibility on Alpha.
>
> If we are to support Alpha as an architecture at all, it makes sense to
> support the things people actually use it for.
>
> Now, personally I can live without it.  I just don't like to see
> features removed without due consideration.

The main historic use case I've heard of was running Netscape
Navigator on Alpha Linux, before there was an open source version.
Doing this today to connect to the open internet is probably
a bit pointless, but there may be other use cases.

Looking at the system call table in the kernel
(arch/alpha/kernel/syscalls/syscall.tbl), we seem to support a
specific subset that was required for a set of applications, and
not much more. Old system calls (osf_old_open, osf_execve,
osf_old_sigaction) are listed but not implemented, and the same
is true for most of the later calls (osf_fuser, osf_sigsendset,
osf_waitid, osf_signal, ...), just the ones in the middle are there.
This would also indicate that it never really worked as a
general-purpose emulation layer but was only there for a specific
set of applications.

Another data point I have is that osf1 emulation was broken
between linux-4.13 and linux-4.16 without anyone noticing, see
47669fb6b595 ("alpha: osf_sys.c: fix put_tv32 regression").

  Arnd


[PATCH] y2038: fix socket.h header inclusion

2019-03-11 Thread Arnd Bergmann
Referencing the __kernel_long_t type caused some user space applications
to stop compiling when they had not already included linux/posix_types.h,
e.g.

s/multicast.c -o ext/sockets/multicast.lo
In file included from /builddir/build/BUILD/php-7.3.3/main/php.h:468,
 from /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:27:
/builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c: In function 
'zm_startup_sockets':
/builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:776:40: error: 
'__kernel_long_t' undeclared (first use in this function)
  776 |  REGISTER_LONG_CONSTANT("SO_SNDTIMEO", SO_SNDTIMEO, CONST_CS | 
CONST_PERSISTENT);

It is safe to include that header here, since it only contains kernel
internal types that do not conflict with other user space types.

It's still possible that some related build failures remain, but those
are likely to be for code that is not already y2038 safe.

Reported-by: Laura Abbott 
Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW")
Signed-off-by: Arnd Bergmann 
---
 arch/alpha/include/uapi/asm/socket.h  | 2 +-
 arch/mips/include/uapi/asm/socket.h   | 2 +-
 arch/parisc/include/uapi/asm/socket.h | 2 +-
 arch/sparc/include/uapi/asm/socket.h  | 2 +-
 include/uapi/asm-generic/socket.h | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h 
b/arch/alpha/include/uapi/asm/socket.h
index 0d0fddb7e738..976e89b116e5 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -2,8 +2,8 @@
 #ifndef _UAPI_ASM_SOCKET_H
 #define _UAPI_ASM_SOCKET_H
 
+#include 
 #include 
-#include 
 
 /* For setsockopt(2) */
 /*
diff --git a/arch/mips/include/uapi/asm/socket.h 
b/arch/mips/include/uapi/asm/socket.h
index eb9f33f8a8b3..d41765cfbc6e 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -10,8 +10,8 @@
 #ifndef _UAPI_ASM_SOCKET_H
 #define _UAPI_ASM_SOCKET_H
 
+#include 
 #include 
-#include 
 
 /*
  * For setsockopt(2)
diff --git a/arch/parisc/include/uapi/asm/socket.h 
b/arch/parisc/include/uapi/asm/socket.h
index 16e428f03526..66c5dd245ac7 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -2,8 +2,8 @@
 #ifndef _UAPI_ASM_SOCKET_H
 #define _UAPI_ASM_SOCKET_H
 
+#include 
 #include 
-#include 
 
 /* For setsockopt(2) */
 #define SOL_SOCKET 0x
diff --git a/arch/sparc/include/uapi/asm/socket.h 
b/arch/sparc/include/uapi/asm/socket.h
index 88fe4f978aca..9265a9eece15 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -2,8 +2,8 @@
 #ifndef _ASM_SOCKET_H
 #define _ASM_SOCKET_H
 
+#include 
 #include 
-#include 
 
 /* For setsockopt(2) */
 #define SOL_SOCKET 0x
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index c8b430cb6dc4..8c1391c89171 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -2,8 +2,8 @@
 #ifndef __ASM_GENERIC_SOCKET_H
 #define __ASM_GENERIC_SOCKET_H
 
+#include 
 #include 
-#include 
 
 /* For setsockopt(2) */
 #define SOL_SOCKET 1
-- 
2.20.0



Re: [PATCH] x86: Deprecate a.out support

2019-03-11 Thread Arnd Bergmann
On Sun, Mar 10, 2019 at 11:46 PM Linus Torvalds
 wrote:
>
> On Sun, Mar 10, 2019 at 2:37 PM Matt Turner  wrote:
> >
> > I'm not aware of a reason to keep a.out support on alpha.
>
> Hmm. I was looking at removing a.out support entirely, but it's
> actually fairly incestuous on alpha.
>
> For example, arch/alpha/boot/tools/objstrip.c very much has some a.out
> support in it. Maybe it can just be removed entirely.
>
> There's also an a.out.h include in arch/alpha/kernel/binfmt_loader.c.
>
> Finally, note that CONFIG_OSF4_COMPAT also no longer makes sense
> without a.out support.
>
> So this attached patch does not compile on alpha, but it's been many
> many years since I had an alpha to test with, so I'm stuck.
>
> Matt, can you fill in the details and complete this patch?

I wonder if we could remove the osf time32 compat code as well,
this was one of the areas that kept causing problems with the y2038
rework. (I think it's all good now, but it's never been tested as far as I
can tell).

For some syscalls (e.g. brk, mmap, getxuid, ...) we definitely need to
keep the osf1 version, since it is the only supported ABI.

I just looked up some really old source trees and found that glibc-2.1
was the first release to use 64-bit time_t the way we do it today,
as implemented in [1], so all Debian and SuSE releases for alpha
had it, but any ELF binaries built on Red Hat Linux 4.x and 5.x
(released 1996 through 1998) or earlier would use 32-bit time_t
osf1 syscalls. Red Hat 2.x and 3.x were a.out based on alpha.

  Arnd

[1] https://repo.or.cz/glibc/history.git/commitdiff/64819b5c3a94e81e4


Re: [PATCH] add delay between port write and port read

2019-03-01 Thread Arnd Bergmann
On Fri, Mar 1, 2019 at 8:19 PM Linus Torvalds
 wrote:
>
> On Fri, Mar 1, 2019 at 11:13 AM Maciej W. Rozycki  
> wrote:
> >
> >  What do we do WRT straight-through vs byte-swapping properties of these
> > accessors?
>
> I think the whole point of __raw_xyz() is that it's the lowest level
> model. It gives you relaxed ordering (together with the ioremap
> model), and it gives you straight-through behavior.
>
> And yes, any driver using them needs to be aware of the byte ordering,
> which may or may not be the same as regular memory, and may or may not
> be the same as other devices.
>
> So __raw_xyz() is very much for low-level drivers that know what they
> are doing. Caveat user.
>
> "If it breaks, you get to keep both pieces"

I agree in principle, but I think we already have a lot of precedence
for __raw_xyz() being relied on having a specific behavior in
architecture independent drivers, and I think it makes sense for
architectures to provide that.

Specifically, I think we need __raw_xyz() to do the same as xyz()
on all little-endian kernels regarding byte ordering (not barriers), and
I would expect it to provide the same ordering and addressing
as swabX(xyz()) on big-endian kernels.

Without that, using __raw_xyz() to copy between RAM and
buffers in PCI memory space is broken, as you said, but the
assumption would be broken on certain older machines that
do a hardware endian swap by swizzling the address lines rather
than swapping bytes on the data bus.

The best idea I have for working around this is to never rely
on __raw_xyz() to not do byte swapping in platform specific
drivers with CPU-endian MMIO space, but to have a platform
specific set of wrappers around the normal I/O functions, and
make __raw_xyz() just do whatever we expect them to do on
PCI devices.

   Arnd


Re: [PATCH] add delay between port write and port read

2019-02-19 Thread Arnd Bergmann
On Tue, Feb 19, 2019 at 2:44 PM Mikulas Patocka  wrote:
> On Tue, 19 Feb 2019, Mikulas Patocka wrote:
>
> > The patches cd0e00c106722eca40b38ebf11cf134c01901086 and
> > 92d7223a74235054f2aa7227d207d9c57f84dca0 fix a theoretical issue where the
> > code didn't follow the specification. Unfortunatelly, they also reduce
> > timing when port write is followed by a port read.
> >
> > These reduced timing cause hang on boot on the Avanti platform when
> > probing serial ports. This patch adds memory barrier after the outb, outw,
> > outl functions, so that there is delay between port write and subsequent
> > port read - just like before.
> >
> > Fixes: cd0e00c10672 ("alpha: io: reorder barriers to guarantee writeX() and 
> > iowriteX() ordering")
> > Cc: sta...@vger.kernel.org# v4.17+
>
> you can also add:
>
> Tested-by: Mikulas Patocka 

Acked-by: Arnd Bergmann 

but I notice you are missing Signed-off-by.

We clearly need this patch, but I assumed the alpha maintainers would pick
it up, not me. I merged the original changes since they were cross-architecture,
but I don't normally take patches for a particular architecture through the
asm-generic tree (or the soc tree for that matter).

  Arnd


Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-22 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 7:50 PM Andy Lutomirski  wrote:
> On Fri, Jan 18, 2019 at 8:25 AM Arnd Bergmann  wrote:
>
> I have a patch that I'll send soon to make x32 use its own table.  As
> far as I'm concerned, 547 is *it*.  548 is just a normal number and is
> not special.  But let's please not reuse 512..547 for other purposes
> on x86 variants -- that way lies even more confusion, IMO.

(trimming Cc list, as this is getting a little off-topic most most)

Just so I understand: do you mean duplicating the .tbl file, or just
the resulting table of entry points?

In either way, how will that work with the new io_uring_setup()
system call that will have to use the compat entry point?
Are you planning to use the same syscall number as x86_64
but point it to the compat function, or do we still need a new
syscall number for x32 in the regular range?

   Arnd


Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-21 Thread Arnd Bergmann
On Mon, Jan 21, 2019 at 6:08 PM Arnd Bergmann  wrote:
> On Mon, Jan 21, 2019 at 9:19 AM Geert Uytterhoeven  
> wrote:
> > Regardless, I'm wondering what to do with the holes marked "room for
> > arch specific calls".
> > When is a syscall really arch-specific, and can it be added there, and
> > when does it turn out (later) that it isn't, breaking the
> > synchronization again?
>
> We've had a bit of that already, with cacheflush(), which exists on
> a couple of architectures, including some that use the first
> 'arch specific' slot (244) of the asm-generic table. I think this
> will be rare enough that we can figure out a solution when we
> get there.
>
> > The pkey syscalls may be a bad example, as AFAIU they can be implemented
> > on some architectures, but not on some others.  Still, I had skipped them
> > when adding new syscalls to m68k.
> >
> > Perhaps we should get rid of the notion of "arch-specific syscalls", and
> > reserve a slot everywhere anyway?
>
> I don't mind calling the hole something else if that helps. Out of
> principle I would already assume that anything we add for x86
> or the generic table should be added everywhere, but we can
> make it broader than that.

Applying this fixup below,

 ARnd

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl
b/arch/x86/entry/syscalls/syscall_32.tbl
index d9c2d2eea044..955ab6a3b61f 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,7 +398,7 @@
 384i386arch_prctl  sys_arch_prctl
 __ia32_compat_sys_arch_prctl
 385i386io_pgetevents   sys_io_pgetevents_time32
 __ia32_compat_sys_io_pgetevents
 386i386rseqsys_rseq
 __ia32_sys_rseq
-# room for arch specific syscalls
+# don't use numbers 387 through 392, add new calls at the end
 393i386semget  sys_semget
 __ia32_sys_semget
 394i386semctl  sys_semctl
 __ia32_compat_sys_semctl
 395i386shmget  sys_shmget
 __ia32_sys_shmget
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl
b/arch/x86/entry/syscalls/syscall_64.tbl
index 43a622aec07e..2ae92fddb6d5 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,8 @@
 332common  statx   __x64_sys_statx
 333common  io_pgetevents   __x64_sys_io_pgetevents
 334common  rseq__x64_sys_rseq
+# don't use numbers 387 through 423, add new calls after the last
+# 'common' entry

 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/uapi/asm-generic/unistd.h
b/include/uapi/asm-generic/unistd.h
index 53831e4a4c86..acf9a07ab2ff 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -740,7 +740,7 @@ __SC_COMP_3264(__NR_io_pgetevents,
sys_io_pgetevents_time32, sys_io_pgetevents,
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
-/* 295 through 402 are unassigned to sync up with generic numbers */
+/* 295 through 402 are unassigned to sync up with generic numbers, don't use */
 #if __BITS_PER_LONG == 32
 #define __NR_clock_gettime64 403
 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime)


Re: [PATCH v2 14/29] arch: add pkey and rseq syscall numbers everywhere

2019-01-21 Thread Arnd Bergmann
On Mon, Jan 21, 2019 at 9:56 AM Geert Uytterhoeven  wrote:
>
> Note that all architectures that already define pkey syscalls, list
> pkey_mprotect first.

It's easy enough to change, so I've reordered them for consistency now.

> Regardless, for m68k:
> Acked-by: Geert Uytterhoeven 

Thanks,

 Arnd


Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-21 Thread Arnd Bergmann
On Mon, Jan 21, 2019 at 9:19 AM Geert Uytterhoeven  wrote:
> On Sat, Jan 19, 2019 at 3:29 PM Russell King - ARM Linux admin
>  wrote:
> > On Fri, Jan 18, 2019 at 11:53:25AM -0800, Andy Lutomirski wrote:
> > > On Fri, Jan 18, 2019 at 11:33 AM Arnd Bergmann  wrote:
> > > > On Fri, Jan 18, 2019 at 7:50 PM Andy Lutomirski  wrote:
> > >
> > > Can we perhaps just start the consistent numbers above 547 or maybe
> > > block out 512..547 in the new regime?
> >
> > I don't think you gain much with that kind of scheme - it won't take
> > very long before an architecture misses having a syscall added, and
> > then someone else adds their own.  Been there with ARM - I was keeping
> > the syscall table in the same order as x86 for new syscalls, but now
>
> Same for m68k, and probably other architectures.
>
> > that others have been adding syscalls to the table since I converted
> > ARM to the tabular form, that's now gone out the window.
> >
> > So, I think it's completely pointless to do what you're suggesting.
> > We'll just end up with a big hole in the middle of the syscall table
> > and then revert back to random numbering of syscalls thereafter again.
>
> I believe the plan is to add future syscalls for all architectures in a
> single commit, to keep everything in sync.

Yes, that is the idea. This was not realistic before, since each one
of the old architectures had its own way of describing the system call
tables, and many needed a different set of quirks.

Since (almost) everything is now converted to the syscall.tbl format,
we have removed all obsolete architectures, and a lot of the quirks
(x32, spu, s390-31) won't matter as much in the future, I think it is
now possible to do it.

We could even extend scripts/checksyscalls.sh to warn if a new
syscall above 423 is not added to all 16 tables at the same time.

> Regardless, I'm wondering what to do with the holes marked "room for
> arch specific calls".
> When is a syscall really arch-specific, and can it be added there, and
> when does it turn out (later) that it isn't, breaking the
> synchronization again?

We've had a bit of that already, with cacheflush(), which exists on
a couple of architectures, including some that use the first
'arch specific' slot (244) of the asm-generic table. I think this
will be rare enough that we can figure out a solution when we
get there.

> The pkey syscalls may be a bad example, as AFAIU they can be implemented
> on some architectures, but not on some others.  Still, I had skipped them
> when adding new syscalls to m68k.
>
> Perhaps we should get rid of the notion of "arch-specific syscalls", and
> reserve a slot everywhere anyway?

I don't mind calling the hole something else if that helps. Out of
principle I would already assume that anything we add for x86
or the generic table should be added everywhere, but we can
make it broader than that.

  Arnd


Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-21 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 5:25 PM Arnd Bergmann  wrote:
>
> This adds 21 new system calls on each ABI that has 32-bit time_t
> today. All of these have the exact same semantics as their existing
> counterparts, and the new ones all have macro names that end in 'time64'
> for clarification.
>
> This gets us to the point of being able to safely use a C library
> that has 64-bit time_t in user space. There are still a couple of
> loose ends to tie up in various areas of the code, but this is the
> big one, and should be entirely uncontroversial at this point.

I've successfully tested this with musl and LTP now, using an
i386 kernel. The musl port I used is at
https://git.linaro.org/people/arnd.bergmann/musl-y2038.git/
This is just an updated version of what I used for testing last
year, using the current syscall assignment, and going back
to the time32 versions of getitimer/setitimer and
wait4/waitid/getusage.

It's certainly not intended for merging like this, but a proper
musl port is under discussion now, and this should be
sufficient if anyone else wants to try out the new syscall
ABI before we merge it.

The LTP I have is heavily hacked, and has a number of
failures resulting from differences between musl and glibc,
or from the way we convert between the kernel types and
the user space types.

The testing found one minor bug in all the kernel syscall tables:

> +418common  mq_timedsend_time64 sys_mq_timedsend
> +419common  mq_timedreceiv_time64   sys_mq_timedreceive

While this would have fit in with umount(), creat() and mknod(),
it was unintentional, and I've changed it back to
mq_timedreceive_time64 (with an added 'e').

   Arnd


Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-18 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 8:53 PM Andy Lutomirski  wrote:
> I think we have two issues if we reuse those numbers for new syscalls.
> First, I'd really like to see new syscalls be numbered consistently
> everywhere, or at least on all x86 variants, and we can't on x32
> because they mean something else.  Perhaps more importantly, due to
> what is arguably a rather severe bug, issuing a native x86_64 syscall
> (x32 bit clear) with nr in the range 512..547 does *not* return
> -ENOSYS on a kernel with x32 enabled.  Instead it does something that
> is somewhat arbitrary.  With my patch applied, it will return -ENOSYS,
> but old kernels will still exist, and this will break syscall probing.
>
> Can we perhaps just start the consistent numbers above 547 or maybe
> block out 512..547 in the new regime?

I'm definitely fine with not reusing them ever, and jumping from 511 to
548 when we get there on all architectures, if you think that helps.

While we could also jump to 548 *now*, I think that would be a
bit wasteful. Syscall numbers are fairly cheap, but not entirely
free, especially when you consider architectures like mips that
have an upper bound of 1000 syscalls before they have to get
inventive.

 Arnd


Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-18 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 7:50 PM Andy Lutomirski  wrote:
> On Fri, Jan 18, 2019 at 8:25 AM Arnd Bergmann  wrote:
> > - Once we get to 512, we clash with the x32 numbers (unless
> >   we remove x32 support first), and probably have to skip
> >   a few more. I also considered using the 512..547 space
> >   for 32-bit-only calls (which never clash with x32), but
> >   that also seems to add a bit of complexity.
>
> I have a patch that I'll send soon to make x32 use its own table.  As
> far as I'm concerned, 547 is *it*.  548 is just a normal number and is
> not special.  But let's please not reuse 512..547 for other purposes
> on x86 variants -- that way lies even more confusion, IMO.

Fair enough, the space for those numbers is cheap enough here.
I take it you mean we also should not reuse that number space if
we were to decide to remove x32 soon, but you are not worried
about clashing with arch/alpha when everything else uses consistent
numbers?

   Arnd


Re: [PATCH v2 13/29] arch: add split IPC system calls where needed

2019-01-18 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 6:20 PM Gabriel Paubert  wrote:
>
> On Fri, Jan 18, 2019 at 05:18:19PM +0100, Arnd Bergmann wrote:
> > The IPC system call handling is highly inconsistent across architectures,
> > some use sys_ipc, some use separate calls, and some use both.  We also
> > have some architectures that require passing IPC_64 in the flags, and
> > others that set it implicitly.
> >
> > For the additon of a y2083 safe semtimedop() system call, I chose to only
>
> It's not critical, but there are two typos in that line:
> additon -> addition
> 2083 -> 2038

Fixed both, thanks!

 Arnd


Re: [PATCH v2 00/29] y2038: add time64 syscalls

2019-01-18 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 5:57 PM Dennis Clarke  wrote:
>
> On 1/18/19 11:18 AM, Arnd Bergmann wrote:
> > This is a minor update of the patches I posted last week, I
> > would like to add this into linux-next now, but would still do
> > changes if there are concerns about the contents. The first
> > version did not see a lot of replies, which could mean that
> > either everyone is happy with it, or that it was largely ignored.
> >
> > See also the article at https://lwn.net/Articles/776435/.
>
> I would be happy to read "Approaching the kernel year-2038 end game"
> however it is behind a pay wall.  Perhaps it may be best to just
> host interesting articles about open source idea elsewhere.

It's a short summary of the current state. You can also find a
video and slides from my ELC presentation online for a little more
context.

Generally speaking, I'd recommend paying for the subscription to
lwn.net to anyone interested in the kernel, but it should become
visible to everyone with the next day (a week after the initial
publication). In the meantime, you can find the article at
https://lwn.net/SubscriberLink/776435/a59d93d01d1addfc/.

Finally, I've made a list of the remaining work that Deepa
and I are planning to still continue (this should be mostly
complete but may be missing a few things):

syscalls
 - merge big series for 5.1, to allow time64 syscalls
 - waitid/wait4/getrusage should get a replacement based on __kernel_timespec
 - getitimer/setitimer should probably follow getrusage
 - vdso, waiting for consolidation series from Vincenzo Frascino before
   adding time64 entry points

file systems
 - range checks on timestamps
 - xfs
 - NFS
 - hfs/hfsplus
 - coda
 - hostfs
 - relatime_need_update

drivers
 - media
 - alsa
 - sockets
 - af_packet
 - ppp ioctl
 - rtc ioctl
 - omap3isp

core kernel
 - fix ELF core files (elfcore.h)
 - syscall Audit code (kernel/audit.c, kernel/auditsc.c)
 - make all time32 code conditional
 - remove include/linux/timekeeping32.h
 - remove compat_time* from time32.h
 - remove timeval
 - remove timespec
 - remove time_t

 Arnd


[PATCH v2 06/29] ARM: add migrate_pages() system call

2019-01-18 Thread Arnd Bergmann
The migrate_pages system call has an assigned number on all architectures
except ARM. When it got added initially in commit d80ade7b3231 ("ARM:
Fix warning: #warning syscall migrate_pages not implemented"), it was
intentionally left out based on the observation that there are no 32-bit
ARM NUMA systems.

However, there are now arm64 NUMA machines that can in theory run 32-bit
kernels (actually enabling NUMA there would require additional work)
as well as 32-bit user space on 64-bit kernels, so that argument is no
longer very strong.

Assigning the number lets us use the system call on 64-bit kernels as well
as providing a more consistent set of syscalls across architectures.

Signed-off-by: Arnd Bergmann 
---
 arch/arm/include/asm/unistd.h | 1 -
 arch/arm/tools/syscall.tbl| 1 +
 arch/arm64/include/asm/unistd.h   | 2 +-
 arch/arm64/include/asm/unistd32.h | 2 ++
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 88ef2ce1f69a..d713587dfcf4 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -45,7 +45,6 @@
  * Unimplemented (or alternatively implemented) syscalls
  */
 #define __IGNORE_fadvise64_64
-#define __IGNORE_migrate_pages
 
 #ifdef __ARM_EABI__
 /*
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 8edf93b4490f..86de9eb34296 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -414,3 +414,4 @@
 397common  statx   sys_statx
 398common  rseqsys_rseq
 399common  io_pgetevents   sys_io_pgetevents
+400common  migrate_pages   sys_migrate_pages
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index a7b1fc58ffdf..261216c3336e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls   400
+#define __NR_compat_syscalls   401
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 04ee190b90fe..f15bcbacb8f6 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -821,6 +821,8 @@ __SYSCALL(__NR_statx, sys_statx)
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_io_pgetevents 399
 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_migrate_pages 400
+__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
 
 /*
  * Please add new compat syscalls above this comment and update
-- 
2.20.0



[PATCH v2 28/29] y2038: rename old time and utime syscalls

2019-01-18 Thread Arnd Bergmann
The time, stime, utime, utimes, and futimesat system calls are only
used on older architectures, and we do not provide y2038 safe variants
of them, as they are replaced by clock_gettime64, clock_settime64,
and utimensat_time64.

However, for consistency it seems better to have the 32-bit architectures
that still use them call the "time32" entry points (leaving the
traditional handlers for the 64-bit architectures), like we do for system
calls that now require two versions.

Note: We used to always define __ARCH_WANT_SYS_TIME and
__ARCH_WANT_SYS_UTIME and only set __ARCH_WANT_COMPAT_SYS_TIME and
__ARCH_WANT_SYS_UTIME32 for compat mode on 64-bit kernels. Now this is
reversed: only 64-bit architectures set __ARCH_WANT_SYS_TIME/UTIME, while
we need __ARCH_WANT_SYS_TIME32/UTIME32 for 32-bit architectures and compat
mode. The resulting asm/unistd.h changes look a bit counterintuitive.

This is only a cleanup patch and it should not change any behavior.

Signed-off-by: Arnd Bergmann 
---
 arch/arm/include/asm/unistd.h   |  4 ++--
 arch/arm/tools/syscall.tbl  | 10 +-
 arch/m68k/include/asm/unistd.h  |  4 ++--
 arch/m68k/kernel/syscalls/syscall.tbl   | 10 +-
 arch/microblaze/include/asm/unistd.h|  4 ++--
 arch/microblaze/kernel/syscalls/syscall.tbl | 10 +-
 arch/mips/include/asm/unistd.h  |  4 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 10 +-
 arch/parisc/include/asm/unistd.h|  9 ++---
 arch/parisc/kernel/syscalls/syscall.tbl | 15 ++-
 arch/powerpc/include/asm/unistd.h   |  8 
 arch/powerpc/kernel/syscalls/syscall.tbl| 19 ++-
 arch/s390/include/asm/unistd.h  |  2 +-
 arch/sh/include/asm/unistd.h|  4 ++--
 arch/sh/kernel/syscalls/syscall.tbl | 10 +-
 arch/sparc/include/asm/unistd.h |  8 
 arch/sparc/kernel/syscalls/syscall.tbl  | 14 +-
 arch/x86/entry/syscalls/syscall_32.tbl  | 10 +-
 arch/x86/include/asm/unistd.h   |  8 
 arch/xtensa/include/asm/unistd.h|  2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl |  6 +++---
 kernel/time/time.c  |  4 ++--
 22 files changed, 98 insertions(+), 77 deletions(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index d713587dfcf4..7a39e77984ef 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -26,10 +26,10 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
-#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
-#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_TIME32
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 200f4b878a46..a96d9b5ee04e 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -24,7 +24,7 @@
 10 common  unlink  sys_unlink
 11 common  execve  sys_execve
 12 common  chdir   sys_chdir
-13 oabitimesys_time
+13 oabitimesys_time32
 14 common  mknod   sys_mknod
 15 common  chmod   sys_chmod
 16 common  lchown  sys_lchown16
@@ -36,12 +36,12 @@
 22 oabiumount  sys_oldumount
 23 common  setuid  sys_setuid16
 24 common  getuid  sys_getuid16
-25 oabistime   sys_stime
+25 oabistime   sys_stime32
 26 common  ptrace  sys_ptrace
 27 oabialarm   sys_alarm
 # 28 was sys_fstat
 29 common  pause   sys_pause
-30 oabiutime   sys_utime
+30 oabiutime   sys_utime32
 # 31 was sys_stty
 # 32 was sys_gtty
 33 common  access  sys_access
@@ -283,7 +283,7 @@
 266common  statfs64sys_statfs64_wrapper
 267common  fstatfs64   sys_fstatfs64_wrapper
 268common  tgkill  sys_tgkill
-269common  utimes  sys_utimes
+269common  utimes  sys_utimes_time32
 270common  arm_fadvise64_64sys_arm_fadvise64_64
 271common  pciconfig_iobasesys_pciconfig_iobase
 272common  pciconfig_read  sys_pciconfig_read
@@ -340,7 +340,7 @@
 323common  mkdirat sys_mkdirat
 324common  mknodat sys_mknodat
 325common  fchownatsys_fchownat
-326common  futimesat   sys_futimesat
+326common  futimesat   sys_futimesat_time32
 327common 

[PATCH v2 21/29] sparc64: add custom adjtimex/clock_adjtime functions

2019-01-18 Thread Arnd Bergmann
sparc64 is the only architecture on Linux that has a 'timeval'
definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes
problems for sparc32 compat mode when we convert it to use the
new __kernel_timex type that has the same layout as all other
64-bit architectures.

To avoid adding sparc64 specific code into the generic adjtimex
implementation, this adds a wrapper in the sparc64 system call handling
that converts the sparc64 'timex' into the new '__kernel_timex'.

At this point, the two structures are defined to be identical,
but that will change in the next step once we convert sparc32.

Signed-off-by: Arnd Bergmann 
---
 arch/sparc/kernel/sys_sparc_64.c   | 59 +-
 arch/sparc/kernel/syscalls/syscall.tbl |  6 ++-
 include/linux/timex.h  |  2 +
 kernel/time/posix-timers.c | 24 +--
 4 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 1c079e7bab09..37de18a11207 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -28,8 +28,9 @@
 #include 
 #include 
 #include 
-
+#include 
 #include 
+
 #include 
 #include 
 
@@ -544,6 +545,62 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, 
len)
return err;
 }
 
+SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
+{
+   struct timex txc;   /* Local copy of parameter */
+   struct timex *kt = (void *)
+   int ret;
+
+   /* Copy the user data space into the kernel copy
+* structure. But bear in mind that the structures
+* may change
+*/
+   if (copy_from_user(, txc_p, sizeof(struct timex)))
+   return -EFAULT;
+
+   /*
+* override for sparc64 specific timeval type: tv_usec
+* is 32 bit wide instead of 64-bit in __kernel_timex
+*/
+   kt->time.tv_usec = txc.time.tv_usec;
+   ret = do_adjtimex(kt);
+   txc.time.tv_usec = kt->time.tv_usec;
+
+   return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret;
+}
+
+SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex 
__user *, txc_p)
+{
+   struct timex txc;   /* Local copy of parameter */
+   struct timex *kt = (void *)
+   int ret;
+
+   if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
+   pr_err_once("process %d (%s) attempted a POSIX timer syscall "
+   "while CONFIG_POSIX_TIMERS is not set\n",
+   current->pid, current->comm);
+
+   return -ENOSYS;
+   }
+
+   /* Copy the user data space into the kernel copy
+* structure. But bear in mind that the structures
+* may change
+*/
+   if (copy_from_user(, txc_p, sizeof(struct timex)))
+   return -EFAULT;
+
+   /*
+* override for sparc64 specific timeval type: tv_usec
+* is 32 bit wide instead of 64-bit in __kernel_timex
+*/
+   kt->time.tv_usec = txc.time.tv_usec;
+   ret = do_clock_adjtime(which_clock, kt);
+   txc.time.tv_usec = kt->time.tv_usec;
+
+   return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret;
+}
+
 SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
utrap_handler_t, new_p, utrap_handler_t, new_d,
utrap_handler_t __user *, old_p,
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl 
b/arch/sparc/kernel/syscalls/syscall.tbl
index 24ebef675184..e70110375399 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -258,7 +258,8 @@
 21664  sigreturn   sys_nis_syscall
 217common  clone   sys_clone
 218common  ioprio_get  sys_ioprio_get
-219common  adjtimexsys_adjtimex
compat_sys_adjtimex
+21932  adjtimexsys_adjtimex
compat_sys_adjtimex
+21964  adjtimexsys_sparc_adjtimex
 22032  sigprocmask sys_sigprocmask 
compat_sys_sigprocmask
 22064  sigprocmask sys_nis_syscall
 221common  create_module   sys_ni_syscall
@@ -377,7 +378,8 @@
 331common  prlimit64   sys_prlimit64
 332common  name_to_handle_at   sys_name_to_handle_at
 333common  open_by_handle_at   sys_open_by_handle_at   
compat_sys_open_by_handle_at
-334common  clock_adjtime   sys_clock_adjtime   
compat_sys_clock_adjtime
+33432  clock_adjtime   sys_clock_adjtime   
compat_sys_clock_adjtime
+33464  clock_adjtime   sys_sparc_clock_adjtime
 335common  syncfs  sys_syncfs
 336common  sendmmsgsys_sendmmsg
compat_sys_sendmmsg
 337common  setns   sys_setns
diff -

[PATCH v2 23/29] timex: change syscalls to use struct __kernel_timex

2019-01-18 Thread Arnd Bergmann
From: Deepa Dinamani 

struct timex is not y2038 safe.
Switch all the syscall apis to use y2038 safe __kernel_timex.

Note that sys_adjtimex() does not have a y2038 safe solution.  C libraries
can implement it by calling clock_adjtime(CLOCK_REALTIME, ...).

Signed-off-by: Deepa Dinamani 
Signed-off-by: Arnd Bergmann 
---
 include/linux/syscalls.h   | 6 +++---
 kernel/time/posix-timers.c | 2 +-
 kernel/time/time.c | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index baa4b70b02d3..09330d5bda0c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct __sysctl_args;
 struct sysinfo;
 struct timespec;
 struct timeval;
-struct timex;
+struct __kernel_timex;
 struct timezone;
 struct tms;
 struct utimbuf;
@@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv,
struct timezone __user *tz);
 asmlinkage long sys_settimeofday(struct timeval __user *tv,
struct timezone __user *tz);
-asmlinkage long sys_adjtimex(struct timex __user *txc_p);
+asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p);
 
 /* kernel/timer.c */
 asmlinkage long sys_getpid(void);
@@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd,
  struct file_handle __user *handle,
  int flags);
 asmlinkage long sys_clock_adjtime(clockid_t which_clock,
-   struct timex __user *tx);
+   struct __kernel_timex __user *tx);
 asmlinkage long sys_syncfs(int fd);
 asmlinkage long sys_setns(int fd, int nstype);
 asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 2d84b3db1ade..de79f85ae14f 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct 
__kernel_timex * ktx)
 }
 
 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
-   struct timex __user *, utx)
+   struct __kernel_timex __user *, utx)
 {
struct __kernel_timex ktx;
int err;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index d179d33f639a..78b5c8f1495a 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 
__user *, tv,
 }
 #endif
 
-SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p)
 {
struct __kernel_timex txc;  /* Local copy of parameter */
int ret;
@@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
ret = do_adjtimex();
return copy_to_user(txc_p, , sizeof(struct __kernel_timex)) ? 
-EFAULT : ret;
 }
+#endif
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 
__user *utp)
-- 
2.20.0



[PATCH v2 02/29] ia64: add statx and io_pgetevents syscalls

2019-01-18 Thread Arnd Bergmann
All architectures should implement these two, so assign numbers
and hook them up on ia64.

Signed-off-by: Arnd Bergmann 
---
 arch/ia64/kernel/syscalls/syscall.tbl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index e97caf51be42..52585281205b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -335,3 +335,5 @@
 323common  copy_file_range sys_copy_file_range
 324common  preadv2 sys_preadv2
 325common  pwritev2sys_pwritev2
+326common  statx   sys_statx
+327common  io_pgetevents   sys_io_pgetevents
-- 
2.20.0



[PATCH v2 03/29] ia64: assign syscall numbers for perf and seccomp

2019-01-18 Thread Arnd Bergmann
Most architectures have assigned numbers for both seccomp and
perf_event_open, even when they do not implement either.

ia64 is an exception here, so for consistency lets add numbers for both
of them. Unless CONFIG_PERF_EVENTS and CONFIG_SECCOMP are implemented,
the system calls just return -ENOSYS.

Signed-off-by: Arnd Bergmann 
---
 arch/ia64/kernel/syscalls/syscall.tbl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index 52585281205b..2e93dbdcdb80 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -337,3 +337,5 @@
 325common  pwritev2sys_pwritev2
 326common  statx   sys_statx
 327common  io_pgetevents   sys_io_pgetevents
+328common  perf_event_open sys_perf_event_open
+329common  seccomp sys_seccomp
-- 
2.20.0



[PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-18 Thread Arnd Bergmann
This adds 21 new system calls on each ABI that has 32-bit time_t
today. All of these have the exact same semantics as their existing
counterparts, and the new ones all have macro names that end in 'time64'
for clarification.

This gets us to the point of being able to safely use a C library
that has 64-bit time_t in user space. There are still a couple of
loose ends to tie up in various areas of the code, but this is the
big one, and should be entirely uncontroversial at this point.

In particular, there are four system calls (getitimer, setitimer,
waitid, and getrusage) that don't have a 64-bit counterpart yet,
but these can all be safely implemented in the C library by wrapping
around the existing system calls because the 32-bit time_t they
pass only counts elapsed time, not time since the epoch. They
will be dealt with later.

Signed-off-by: Arnd Bergmann 
---
The one point that still needs to be agreed on is the actual
number assignment. Following the earlier patch that added
the sysv IPC calls with common numbers where possible, I also
tried the same here, using consistent numbers on all 32-bit
architectures.

There are a couple of minor issues with this:

- On asm-generic, we now leave the numbers from 295 to 402
  unassigned, which wastes a small amount of kernel .data
  segment. Originally I had asm-generic start at 300 and
  everyone else start at 400 here, which was also not
  perfect, and we have gone beyond 400 already, so I ended
  up just using the same numbers as the rest here.

- Once we get to 512, we clash with the x32 numbers (unless
  we remove x32 support first), and probably have to skip
  a few more. I also considered using the 512..547 space
  for 32-bit-only calls (which never clash with x32), but
  that also seems to add a bit of complexity.

- On alpha, we have already used up the space up to 527
  (with a small hole between 261 and 299). We could sync
  up with that as well, but my feeling was that alpha syscalls
  are already special enough that I don't care.

Let me know if you have other ideas.
---
 arch/alpha/kernel/syscalls/syscall.tbl  |  2 +
 arch/arm/tools/syscall.tbl  | 21 ++
 arch/arm64/include/asm/unistd.h |  2 +-
 arch/arm64/include/asm/unistd32.h   | 41 +++
 arch/ia64/kernel/syscalls/syscall.tbl   |  1 +
 arch/m68k/kernel/syscalls/syscall.tbl   | 20 +
 arch/microblaze/kernel/syscalls/syscall.tbl | 21 ++
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 21 ++
 arch/mips/kernel/syscalls/syscall_n64.tbl   |  1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 20 +
 arch/parisc/kernel/syscalls/syscall.tbl | 21 ++
 arch/powerpc/kernel/syscalls/syscall.tbl| 20 +
 arch/s390/kernel/syscalls/syscall.tbl   | 20 +
 arch/sh/kernel/syscalls/syscall.tbl | 20 +
 arch/sparc/kernel/syscalls/syscall.tbl  | 20 +
 arch/x86/entry/syscalls/syscall_32.tbl  | 20 +
 arch/xtensa/kernel/syscalls/syscall.tbl | 21 ++
 include/uapi/asm-generic/unistd.h   | 45 -
 scripts/checksyscalls.sh| 40 ++
 19 files changed, 375 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 337b8108771a..936a33fae3c9 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -461,3 +461,5 @@
 530common  getegid sys_getegid
 531common  geteuid sys_geteuid
 532common  getppid sys_getppid
+# all other architectures have common numbers for new syscall, alpha
+# is the exception.
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index a96d9b5ee04e..286afdc43283 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -416,3 +416,24 @@
 399common  io_pgetevents   sys_io_pgetevents_time32
 400common  migrate_pages   sys_migrate_pages
 401common  kexec_file_load sys_kexec_file_load
+# 402 is unused
+403common  clock_gettime64 sys_clock_gettime
+404common  clock_settime64 sys_clock_settime
+405common  clock_adjtime64 sys_clock_adjtime
+406common  clock_getres_time64 sys_clock_getres
+407common  clock_nanosleep_time64  sys_clock_nanosleep
+408common  timer_gettime64 sys_timer_gettime
+409common  timer_settime64 sys_timer_settime
+410common  timerfd_gettime64   sys_timerfd_gettime
+411common  timerfd_settime64   sys_timerfd_settime
+412common  utimensat_time64sys_utimensat
+413common  pselect6_time64 sys_pselect6
+414common  ppoll_time64sys_ppoll
+416common  io_pgetevents_time64

[PATCH v2 04/29] alpha: wire up io_pgetevents system call

2019-01-18 Thread Arnd Bergmann
The io_pgetevents system call was added in linux-4.18 but has
no entry for alpha:

warning: #warning syscall io_pgetevents not implemented [-Wcpp]

Assign a the next system call number here.

Cc: sta...@vger.kernel.org
Signed-off-by: Arnd Bergmann 
---
 arch/alpha/kernel/syscalls/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 7b56a53be5e3..e09558edae73 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
 520common  preadv2 sys_preadv2
 521common  pwritev2sys_pwritev2
 522common  statx   sys_statx
+523common  io_pgetevents   sys_io_pgetevents
-- 
2.20.0



[PATCH v2 07/29] ARM: add kexec_file_load system call number

2019-01-18 Thread Arnd Bergmann
A couple of architectures including arm64 already implement the
kexec_file_load system call, on many others we have assigned a system
call number for it, but not implemented it yet.

Adding the number in arch/arm/ lets us use the system call on arm64
systems in compat mode, and also reduces the number of differences
between architectures. If we want to implement kexec_file_load on ARM
in the future, the number assignment means that kexec tools can already
be built with the now current set of kernel headers.

Signed-off-by: Arnd Bergmann 
---
 arch/arm/tools/syscall.tbl| 1 +
 arch/arm64/include/asm/unistd.h   | 2 +-
 arch/arm64/include/asm/unistd32.h | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 86de9eb34296..20ed7e026723 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -415,3 +415,4 @@
 398common  rseqsys_rseq
 399common  io_pgetevents   sys_io_pgetevents
 400common  migrate_pages   sys_migrate_pages
+401common  kexec_file_load sys_kexec_file_load
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 261216c3336e..2c30e6f145ff 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls   401
+#define __NR_compat_syscalls   402
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index f15bcbacb8f6..8ca1d4c304f4 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -823,6 +823,8 @@ __SYSCALL(__NR_rseq, sys_rseq)
 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_migrate_pages 400
 __SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
+#define __NR_kexec_file_load 401
+__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
 
 /*
  * Please add new compat syscalls above this comment and update
-- 
2.20.0



[PATCH v2 10/29] sh: add statx system call

2019-01-18 Thread Arnd Bergmann
statx is available on almost all other architectures but
got missed on sh, so add it now.

Signed-off-by: Arnd Bergmann 
---
 arch/sh/kernel/syscalls/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/kernel/syscalls/syscall.tbl 
b/arch/sh/kernel/syscalls/syscall.tbl
index 21ec75288562..a70db013dbc7 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -390,3 +390,4 @@
 380common  copy_file_range sys_copy_file_range
 381common  preadv2 sys_preadv2
 382common  pwritev2sys_pwritev2
+383common  statx   sys_statx
-- 
2.20.0



[PATCH v2 01/29] ia64: add __NR_umount2 definition

2019-01-18 Thread Arnd Bergmann
Other architectures commonly use __NR_umount2 for sys_umount,
only ia64 and alpha use __NR_umount here. In order to synchronize
the generated tables, use umount2 like everyone else, and add back
the old name from asm/unistd.h for compatibility.

The __IGNORE_* lines are now all obsolete and can be removed as
a side-effect.

Signed-off-by: Arnd Bergmann 
---
 arch/ia64/include/asm/unistd.h| 14 --
 arch/ia64/include/uapi/asm/unistd.h   |  2 ++
 arch/ia64/kernel/syscalls/syscall.tbl |  2 +-
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 0b08ebd2dfde..9ba6110b10b9 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -12,20 +12,6 @@
 
 #define NR_syscalls__NR_syscalls /* length of syscall table */
 
-/*
- * The following defines stop scripts/checksyscalls.sh from complaining about
- * unimplemented system calls.  Glibc provides for each of these by using
- * more modern equivalent system calls.
- */
-#define __IGNORE_fork  /* clone() */
-#define __IGNORE_time  /* gettimeofday() */
-#define __IGNORE_alarm /* setitimer(ITIMER_REAL, ... */
-#define __IGNORE_pause /* rt_sigprocmask(), rt_sigsuspend() */
-#define __IGNORE_utime /* utimes() */
-#define __IGNORE_getpgrp   /* getpgid() */
-#define __IGNORE_vfork /* clone() */
-#define __IGNORE_umount2   /* umount() */
-
 #define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SYS_UTIME
 
diff --git a/arch/ia64/include/uapi/asm/unistd.h 
b/arch/ia64/include/uapi/asm/unistd.h
index b2513922dcb5..013e0bcacc39 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -15,6 +15,8 @@
 
 #define __NR_Linux  1024
 
+#define __NR_umount __NR_umount2
+
 #include 
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index b22203b40bfe..e97caf51be42 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -29,7 +29,7 @@
 17 common  getpid  sys_getpid
 18 common  getppid sys_getppid
 19 common  mount   sys_mount
-20 common  umount  sys_umount
+20 common  umount2 sys_umount
 21 common  setuid  sys_setuid
 22 common  getuid  sys_getuid
 23 common  geteuid sys_geteuid
-- 
2.20.0



[PATCH v2 08/29] m68k: assign syscall number for seccomp

2019-01-18 Thread Arnd Bergmann
Most architectures have assigned a numbers for the seccomp syscall
even when they do not implement it.

m68k is an exception here, so for consistency lets add the number.
Unless CONFIG_SECCOMP is implemented, the system call just
returns -ENOSYS.

Signed-off-by: Arnd Bergmann 
---
 arch/m68k/kernel/syscalls/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 1a95c4a1bc0d..85779d6ef935 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -387,3 +387,4 @@
 377common  preadv2 sys_preadv2
 378common  pwritev2sys_pwritev2
 379common  statx   sys_statx
+380common  seccomp sys_seccomp
-- 
2.20.0



[PATCH v2 14/29] arch: add pkey and rseq syscall numbers everywhere

2019-01-18 Thread Arnd Bergmann
Most architectures define system call numbers for the rseq and pkey system
calls, even when they don't support the features, and perhaps never will.

Only a few architectures are missing these, so just define them anyway
for consistency. If we decide to add them later to one of these, the
system call numbers won't get out of sync then.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/include/asm/unistd.h | 4 
 arch/alpha/kernel/syscalls/syscall.tbl  | 4 
 arch/ia64/kernel/syscalls/syscall.tbl   | 4 
 arch/m68k/kernel/syscalls/syscall.tbl   | 4 
 arch/parisc/include/asm/unistd.h| 3 ---
 arch/parisc/kernel/syscalls/syscall.tbl | 4 
 arch/s390/include/asm/unistd.h  | 3 ---
 arch/s390/kernel/syscalls/syscall.tbl   | 3 +++
 arch/sh/kernel/syscalls/syscall.tbl | 4 
 arch/sparc/include/asm/unistd.h | 5 -
 arch/sparc/kernel/syscalls/syscall.tbl  | 4 
 arch/xtensa/kernel/syscalls/syscall.tbl | 1 +
 12 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 564ba87bdc38..31ad350b58a0 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -29,9 +29,5 @@
 #define __IGNORE_getppid
 #define __IGNORE_getuid
 
-/* Alpha doesn't have protection keys. */
-#define __IGNORE_pkey_mprotect
-#define __IGNORE_pkey_alloc
-#define __IGNORE_pkey_free
 
 #endif /* _ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index b0e247287908..25b4a7e76943 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -452,3 +452,7 @@
 521common  pwritev2sys_pwritev2
 522common  statx   sys_statx
 523common  io_pgetevents   sys_io_pgetevents
+524common  pkey_alloc  sys_pkey_alloc
+525common  pkey_free   sys_pkey_free
+526common  pkey_mprotect   sys_pkey_mprotect
+527common  rseqsys_rseq
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index 2e93dbdcdb80..84e03de00177 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -339,3 +339,7 @@
 327common  io_pgetevents   sys_io_pgetevents
 328common  perf_event_open sys_perf_event_open
 329common  seccomp sys_seccomp
+330common  pkey_alloc  sys_pkey_alloc
+331common  pkey_free   sys_pkey_free
+332common  pkey_mprotect   sys_pkey_mprotect
+333common  rseqsys_rseq
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 5354ba02eed2..ae88b85d068e 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -388,6 +388,10 @@
 378common  pwritev2sys_pwritev2
 379common  statx   sys_statx
 380common  seccomp sys_seccomp
+381common  pkey_alloc  sys_pkey_alloc
+382common  pkey_free   sys_pkey_free
+383common  pkey_mprotect   sys_pkey_mprotect
+384common  rseqsys_rseq
 # room for arch specific calls
 393common  semget  sys_semget
 394common  semctl  sys_semctl
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index c2c2afb28941..9ec1026af877 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -12,9 +12,6 @@
 
 #define __IGNORE_select/* newselect */
 #define __IGNORE_fadvise64 /* fadvise64_64 */
-#define __IGNORE_pkey_mprotect
-#define __IGNORE_pkey_alloc
-#define __IGNORE_pkey_free
 
 #ifndef ASM_LINE_SEP
 # define ASM_LINE_SEP ;
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index 9bbd2f9f56c8..e07231de3597 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -367,3 +367,7 @@
 348common  pwritev2sys_pwritev2
compat_sys_pwritev2
 349common  statx   sys_statx
 350common  io_pgetevents   sys_io_pgetevents   
compat_sys_io_pgetevents
+351common  pkey_alloc  sys_pkey_alloc
+352common  pkey_free   sys_pkey_free
+353common  pkey_mprotect   sys_pkey_mprotect
+354common  rseqsys_rseq
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index a1fbf15d53aa..ed08f114ee91 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm

Re: [PATCH v4 1/3] fs: hoist EFSCORRUPTED definition into uapi header

2019-01-18 Thread Arnd Bergmann
On Fri, Jan 18, 2019 at 5:15 PM Jann Horn  wrote:
>
> Multiple filesystems can already return EFSCORRUPTED errors to userspace;
> however, so far, definitions of EFSCORRUPTED were in filesystem-private
> headers.
>
> I wanted to use EUCLEAN to indicate data corruption in the VFS layer;
> Dave Chinner says that I should instead hoist the definitions of
> EFSCORRUPTED into the UAPI header and then use EFSCORRUPTED.
>
> This patch is marked for stable backport because it is a prerequisite for
> the following patch.
>
> Cc: sta...@vger.kernel.org
> Suggested-by: Dave Chinner 
> Signed-off-by: Jann Horn 
> ---
>  fs/ext2/ext2.h   | 1 -
>  fs/ext4/ext4.h   | 1 -
>  fs/xfs/xfs_linux.h   | 1 -
>  include/linux/jbd2.h | 1 -
>  include/uapi/asm-generic/errno.h | 1 +
>  5 files changed, 1 insertion(+), 4 deletions(-)


For asm-generic:

Acked-by: Arnd Bergmann 


[PATCH v2 13/29] arch: add split IPC system calls where needed

2019-01-18 Thread Arnd Bergmann
The IPC system call handling is highly inconsistent across architectures,
some use sys_ipc, some use separate calls, and some use both.  We also
have some architectures that require passing IPC_64 in the flags, and
others that set it implicitly.

For the additon of a y2083 safe semtimedop() system call, I chose to only
support the separate entry points, but that requires first supporting
the regular ones with their own syscall numbers.

The IPC_64 is now implied by the new semctl/shmctl/msgctl system
calls even on the architectures that require passing it with the ipc()
multiplexer.

I'm not adding the new semtimedop() or semop() on 32-bit architectures,
those will get implemented using the new semtimedop_time64() version
that gets added along with the other time64 calls.
Three 64-bit architectures (powerpc, s390 and sparc) get semtimedop().

Signed-off-by: Arnd Bergmann 
---
One aspect here that might be a bit controversial is the use of
the same system call numbers across all architectures, synchronizing
all of them with the x86-32 numbers. With the new syscall.tbl
files, I hope we can just keep doing that in the future, and no
longer require the architecture maintainers to assign a number.

This is mainly useful for implementers of the C libraries: if
we can add future system calls everywhere at the same time, using
a particular version of the kernel headers also guarantees that
the system call number macro is visible.
---
 arch/m68k/kernel/syscalls/syscall.tbl | 11 +++
 arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++
 arch/powerpc/kernel/syscalls/syscall.tbl  | 13 +
 arch/s390/kernel/syscalls/syscall.tbl | 12 
 arch/sh/kernel/syscalls/syscall.tbl   | 11 +++
 arch/sparc/kernel/syscalls/syscall.tbl| 12 
 arch/x86/entry/syscalls/syscall_32.tbl| 11 +++
 7 files changed, 81 insertions(+)

diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 85779d6ef935..5354ba02eed2 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -388,3 +388,14 @@
 378common  pwritev2sys_pwritev2
 379common  statx   sys_statx
 380common  seccomp sys_seccomp
+# room for arch specific calls
+393common  semget  sys_semget
+394common  semctl  sys_semctl
+395common  shmget  sys_shmget
+396common  shmctl  sys_shmctl
+397common  shmat   sys_shmat
+398common  shmdt   sys_shmdt
+399common  msgget  sys_msgget
+400common  msgsnd  sys_msgsnd
+401common  msgrcv  sys_msgrcv
+402common  msgctl  sys_msgctl
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 3d5a47b80d2b..fa47ea8cc6ef 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -380,3 +380,14 @@
 366o32 statx   sys_statx
 367o32 rseqsys_rseq
 368o32 io_pgetevents   sys_io_pgetevents   
compat_sys_io_pgetevents
+# room for arch specific calls
+393o32 semget  sys_semget
+394o32 semctl  sys_semctl  
compat_sys_semctl
+395o32 shmget  sys_shmget
+396o32 shmctl  sys_shmctl  
compat_sys_shmctl
+397o32 shmat   sys_shmat   
compat_sys_shmat
+398o32 shmdt   sys_shmdt
+399o32 msgget  sys_msgget
+400o32 msgsnd  sys_msgsnd  
compat_sys_msgsnd
+401o32 msgrcv  sys_msgrcv  
compat_sys_msgrcv
+402o32 msgctl  sys_msgctl  
compat_sys_msgctl
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index db3bbb8744af..7555874ce39c 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -414,6 +414,7 @@
 363spu switch_endian   sys_ni_syscall
 364common  userfaultfd sys_userfaultfd
 365common  membarrier  sys_membarrier
+# 366-377 originally left for IPC, now unused
 378nospu   mlock2  sys_mlock2
 379nospu   copy_file_range sys_copy_file_range
 380common  preadv2 sys_preadv2

[PATCH v2 15/29] alpha: add standard statfs64/fstatfs64 syscalls

2019-01-18 Thread Arnd Bergmann
As Joseph Myers points out, alpha has never had a standard statfs64
interface and instead returns only 32-bit numbers here.

While there is an old osf_statfs64 system call that returns additional
data, this has some other quirks and does not get used in glibc.

I considered making the stat64 structure layout compatible with
with the one used by the kernel on most other 64 bit architecture that
implement it (ia64, parisc, powerpc, and sparc), but in the end
decided to stay with the one that was traditionally defined in
the alpha headers but not used, since this is also what glibc
exposes to user space.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/kernel/syscalls/syscall.tbl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 25b4a7e76943..0ebd59fdcb8b 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -456,3 +456,5 @@
 525common  pkey_free   sys_pkey_free
 526common  pkey_mprotect   sys_pkey_mprotect
 527common  rseqsys_rseq
+528common  statfs64sys_statfs64
+529common  fstatfs64   sys_fstatfs64
-- 
2.20.0



[PATCH v2 09/29] sh: remove duplicate unistd_32.h file

2019-01-18 Thread Arnd Bergmann
When I merged this patch, the file was accidentally left intact
instead of being removed, which means any changes to syscall.tbl
have no effect.

Fixes: 2b3c5a99d5f3 ("sh: generate uapi header and syscall table header files")
Signed-off-by: Arnd Bergmann 
---
 arch/sh/include/uapi/asm/unistd_32.h | 403 ---
 1 file changed, 403 deletions(-)
 delete mode 100644 arch/sh/include/uapi/asm/unistd_32.h

diff --git a/arch/sh/include/uapi/asm/unistd_32.h 
b/arch/sh/include/uapi/asm/unistd_32.h
deleted file mode 100644
index 31c85aa251ab..
--- a/arch/sh/include/uapi/asm/unistd_32.h
+++ /dev/null
@@ -1,403 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __ASM_SH_UNISTD_32_H
-#define __ASM_SH_UNISTD_32_H
-
-/*
- * Copyright (C) 1999  Niibe Yutaka
- */
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_restart_syscall 0
-#define __NR_exit1
-#define __NR_fork2
-#define __NR_read3
-#define __NR_write   4
-#define __NR_open5
-#define __NR_close   6
-#define __NR_waitpid 7
-#define __NR_creat   8
-#define __NR_link9
-#define __NR_unlink 10
-#define __NR_execve 11
-#define __NR_chdir  12
-#define __NR_time   13
-#define __NR_mknod  14
-#define __NR_chmod  15
-#define __NR_lchown 16
-/* 17 was sys_break */
-#define __NR_oldstat18
-#define __NR_lseek  19
-#define __NR_getpid 20
-#define __NR_mount  21
-#define __NR_umount 22
-#define __NR_setuid 23
-#define __NR_getuid 24
-#define __NR_stime  25
-#define __NR_ptrace 26
-#define __NR_alarm  27
-#define __NR_oldfstat   28
-#define __NR_pause  29
-#define __NR_utime  30
-/* 31 was sys_stty */
-/* 32 was sys_gtty */
-#define __NR_access 33
-#define __NR_nice   34
-/* 35 was sys_ftime */
-#define __NR_sync   36
-#define __NR_kill   37
-#define __NR_rename 38
-#define __NR_mkdir  39
-#define __NR_rmdir  40
-#define __NR_dup41
-#define __NR_pipe   42
-#define __NR_times  43
-/* 44 was sys_prof */
-#define __NR_brk45
-#define __NR_setgid 46
-#define __NR_getgid 47
-#define __NR_signal 48
-#define __NR_geteuid49
-#define __NR_getegid50
-#define __NR_acct   51
-#define __NR_umount252
-/* 53 was sys_lock */
-#define __NR_ioctl  54
-#define __NR_fcntl  55
-/* 56 was sys_mpx */
-#define __NR_setpgid57
-/* 58 was sys_ulimit */
-/* 59 was sys_olduname */
-#define __NR_umask  60
-#define __NR_chroot 61
-#define __NR_ustat  62
-#define __NR_dup2   63
-#define __NR_getppid64
-#define __NR_getpgrp65
-#define __NR_setsid 66
-#define __NR_sigaction  67
-#define __NR_sgetmask   68
-#define __NR_ssetmask   69
-#define __NR_setreuid   70
-#define __NR_setregid   71
-#define __NR_sigsuspend 72
-#define __NR_sigpending 73
-#define __NR_sethostname74
-#define __NR_setrlimit  75
-#define __NR_getrlimit  76 /* Back compatible 2Gig limited rlimit 
*/
-#define __NR_getrusage  77
-#define __NR_gettimeofday   78
-#define __NR_settimeofday   79
-#define __NR_getgroups  80
-#define __NR_setgroups  81
-/* 82 was sys_oldselect */
-#define __NR_symlink83
-#define __NR_oldlstat   84
-#define __NR_readlink   85
-#define __NR_uselib 86
-#define __NR_swapon 87
-#define __NR_reboot 88
-#define __NR_readdir89
-#define __NR_mmap   90
-#define __NR_munmap 91
-#define __NR_truncate   92
-#define __NR_ftruncate  93
-#define __NR_fchmod 94
-#define __NR_fchown 95
-#define __NR_getpriority96
-#define __NR_setpriority97
-/* 98 was sys_profil */
-#define __NR_statfs 99
-#define __NR_fstatfs   100
-   /* 101 was sys_ioperm */
-#define __NR_socketcall102
-#define __NR_syslog103
-#define __NR_setitimer 104
-#define __NR

[PATCH v2 18/29] time: make adjtime compat handling available for 32 bit

2019-01-18 Thread Arnd Bergmann
We want to reuse the compat_timex handling on 32-bit architectures the
same way we are using the compat handling for timespec when moving to
64-bit time_t.

Move all definitions related to compat_timex out of the compat code
into the normal timekeeping code, along with a rename to old_timex32,
corresponding to the timespec/timeval structures, and make it controlled
by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select.

Signed-off-by: Arnd Bergmann 
---
 include/linux/compat.h | 35 ++-
 include/linux/time32.h | 32 -
 kernel/compat.c| 64 --
 kernel/time/posix-timers.c | 14 ++--
 kernel/time/time.c | 70 +++---
 5 files changed, 102 insertions(+), 113 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 056be0d03722..657ca6abd855 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -132,37 +132,6 @@ struct compat_tms {
compat_clock_t  tms_cstime;
 };
 
-struct compat_timex {
-   compat_uint_t modes;
-   compat_long_t offset;
-   compat_long_t freq;
-   compat_long_t maxerror;
-   compat_long_t esterror;
-   compat_int_t status;
-   compat_long_t constant;
-   compat_long_t precision;
-   compat_long_t tolerance;
-   struct old_timeval32 time;
-   compat_long_t tick;
-   compat_long_t ppsfreq;
-   compat_long_t jitter;
-   compat_int_t shift;
-   compat_long_t stabil;
-   compat_long_t jitcnt;
-   compat_long_t calcnt;
-   compat_long_t errcnt;
-   compat_long_t stbcnt;
-   compat_int_t tai;
-
-   compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
-   compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
-   compat_int_t:32; compat_int_t:32; compat_int_t:32;
-};
-
-struct timex;
-int compat_get_timex(struct timex *, const struct compat_timex __user *);
-int compat_put_timex(struct compat_timex __user *, const struct timex *);
-
 #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
 
 typedef struct {
@@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct 
old_timeval32 __user *tv,
struct timezone __user *tz);
 asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
struct timezone __user *tz);
-asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
+asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp);
 
 /* kernel/timer.c */
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
@@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 struct file_handle __user *handle,
 int flags);
 asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
-struct compat_timex __user *tp);
+struct old_timex32 __user *tp);
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags);
 asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 118b9977080c..820a22e2b98b 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -10,6 +10,7 @@
  */
 
 #include 
+#include 
 
 #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
@@ -35,13 +36,42 @@ struct old_utimbuf32 {
old_time32_tmodtime;
 };
 
+struct old_timex32 {
+   u32 modes;
+   s32 offset;
+   s32 freq;
+   s32 maxerror;
+   s32 esterror;
+   s32 status;
+   s32 constant;
+   s32 precision;
+   s32 tolerance;
+   struct old_timeval32 time;
+   s32 tick;
+   s32 ppsfreq;
+   s32 jitter;
+   s32 shift;
+   s32 stabil;
+   s32 jitcnt;
+   s32 calcnt;
+   s32 errcnt;
+   s32 stbcnt;
+   s32 tai;
+
+   s32:32; s32:32; s32:32; s32:32;
+   s32:32; s32:32; s32:32; s32:32;
+   s32:32; s32:32; s32:32;
+};
+
 extern int get_old_timespec32(struct timespec64 *, const void __user *);
 extern int put_old_timespec32(const struct timespec64 *, void __user *);
 extern int get_old_itimerspec32(struct itimerspec64 *its,
const struct old_itimerspec32 __user *uits);
 extern int put_old_itimerspec32(const struct itimerspec64 *its,
struct old_itimerspec32 __user *uits);
-
+struct timex;
+int get_old_timex32(struct timex *, const struct old_timex32 __user *);
+int put_old_timex32(struct old_timex32 __user *, const struct timex *);
 
 #if __BITS_PER_LONG == 64
 
diff --git a/kernel/compat.c b/kernel/compat.c
index f01affa17e22..d8a36c6ad7c9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -20,7 +20,6 

[PATCH v2 17/29] syscalls: remove obsolete __IGNORE_ macros

2019-01-18 Thread Arnd Bergmann
These are all for ignoring the lack of obsolete system calls,
which have been marked the same way in scripts/checksyscall.sh,
so these can be removed.

Signed-off-by: Arnd Bergmann 
---
 arch/mips/include/asm/unistd.h   | 16 
 arch/parisc/include/asm/unistd.h |  3 ---
 arch/s390/include/asm/unistd.h   |  2 --
 arch/xtensa/include/asm/unistd.h | 12 
 4 files changed, 33 deletions(-)

diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index b23d74a601b3..5e9eeb83d8d4 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -53,22 +53,6 @@
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
 
-/* whitelists for checksyscalls */
-#define __IGNORE_select
-#define __IGNORE_vfork
-#define __IGNORE_time
-#define __IGNORE_uselib
-#define __IGNORE_fadvise64_64
-#define __IGNORE_getdents64
-#if _MIPS_SIM == _MIPS_SIM_NABI32
-#define __IGNORE_truncate64
-#define __IGNORE_ftruncate64
-#define __IGNORE_stat64
-#define __IGNORE_lstat64
-#define __IGNORE_fstat64
-#define __IGNORE_fstatat64
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_UNISTD_H */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 9ec1026af877..385eae49ed02 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -10,9 +10,6 @@
 
 #define SYS_ify(syscall_name)   __NR_##syscall_name
 
-#define __IGNORE_select/* newselect */
-#define __IGNORE_fadvise64 /* fadvise64_64 */
-
 #ifndef ASM_LINE_SEP
 # define ASM_LINE_SEP ;
 #endif
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index ed08f114ee91..59202ceea1f6 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -10,8 +10,6 @@
 #include 
 #include 
 
-#define __IGNORE_time
-
 #define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 0d34629dafc5..81cc52ea1bd5 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -10,18 +10,6 @@
 #define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_GETPGRP
 
-/* 
- * Ignore legacy system calls in the checksyscalls.sh script
- */
-
-#define __IGNORE_fork  /* use clone */
-#define __IGNORE_time
-#define __IGNORE_alarm /* use setitimer */
-#define __IGNORE_pause
-#define __IGNORE_mmap  /* use mmap2 */
-#define __IGNORE_vfork /* use clone */
-#define __IGNORE_fadvise64 /* use fadvise64_64 */
-
 #define NR_syscalls__NR_syscalls
 
 #endif /* _XTENSA_UNISTD_H */
-- 
2.20.0



[PATCH v2 26/29] y2038: use time32 syscall names on 32-bit

2019-01-18 Thread Arnd Bergmann
This is the big flip, where all 32-bit architectures set COMPAT_32BIT_TIME
abd use the _time32 system calls from the former compat layer instead
of the system calls that take __kernel_timespec and similar arguments.

The temporary redirects for __kernel_timespec, __kernel_itimerspec
and __kernel_timex can get removed with this.

It would be easy to split this commit by architecture, but with the new
generated system call tables, it's easy enough to do it all at once,
which makes it a little easier to check that the changes are the same
in each table.

Signed-off-by: Arnd Bergmann 
---
 arch/Kconfig|  2 +-
 arch/arm/kernel/sys_oabi-compat.c   |  8 +-
 arch/arm/tools/syscall.tbl  | 46 ++--
 arch/m68k/kernel/syscalls/syscall.tbl   | 42 +--
 arch/microblaze/kernel/syscalls/syscall.tbl | 46 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 44 +--
 arch/parisc/kernel/syscalls/syscall.tbl | 69 +++--
 arch/powerpc/kernel/syscalls/syscall.tbl| 82 +++--
 arch/sh/kernel/syscalls/syscall.tbl | 42 +--
 arch/sparc/kernel/syscalls/syscall.tbl  | 64 ++--
 arch/x86/entry/syscalls/syscall_32.tbl  | 44 +--
 arch/xtensa/kernel/syscalls/syscall.tbl | 44 +--
 include/uapi/asm-generic/unistd.h   | 56 +++---
 13 files changed, 335 insertions(+), 254 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4cfb6de48f79..46db715a7f42 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -759,7 +759,7 @@ config 64BIT_TIME
  handling.
 
 config COMPAT_32BIT_TIME
-   def_bool (!64BIT && 64BIT_TIME) || COMPAT
+   def_bool !64BIT || COMPAT
help
  This enables 32 bit time_t support in addition to 64 bit time_t 
support.
  This is relevant on all 32-bit architectures, and 64-bit architectures
diff --git a/arch/arm/kernel/sys_oabi-compat.c 
b/arch/arm/kernel/sys_oabi-compat.c
index 92ab36f38795..acd054a42ba2 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -317,10 +317,10 @@ struct oabi_sembuf {
 asmlinkage long sys_oabi_semtimedop(int semid,
struct oabi_sembuf __user *tsops,
unsigned nsops,
-   const struct timespec __user *timeout)
+   const struct old_timespec32 __user *timeout)
 {
struct sembuf *sops;
-   struct timespec local_timeout;
+   struct old_timespec32 local_timeout;
long err;
int i;
 
@@ -350,7 +350,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
} else {
mm_segment_t fs = get_fs();
set_fs(KERNEL_DS);
-   err = sys_semtimedop(semid, sops, nsops, timeout);
+   err = sys_semtimedop_time32(semid, sops, nsops, timeout);
set_fs(fs);
}
kfree(sops);
@@ -375,7 +375,7 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int 
second, int third,
return  sys_oabi_semtimedop(first,
(struct oabi_sembuf __user *)ptr,
second,
-   (const struct timespec __user 
*)fifth);
+   (const struct old_timespec32 __user 
*)fifth);
default:
return sys_ipc(call, first, second, third, ptr, fifth);
}
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index b54b7f2bc24a..200f4b878a46 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -137,7 +137,7 @@
 121common  setdomainname   sys_setdomainname
 122common  uname   sys_newuname
 # 123 was sys_modify_ldt
-124common  adjtimexsys_adjtimex
+124common  adjtimexsys_adjtimex_time32
 125common  mprotectsys_mprotect
 126common  sigprocmask sys_sigprocmask
 # 127 was sys_create_module
@@ -174,8 +174,8 @@
 158common  sched_yield sys_sched_yield
 159common  sched_get_priority_max  sys_sched_get_priority_max
 160common  sched_get_priority_min  sys_sched_get_priority_min
-161common  sched_rr_get_interval   sys_sched_rr_get_interval
-162common  nanosleep   sys_nanosleep
+161common  sched_rr_get_interval   sys_sched_rr_get_interval_time32
+162common  nanosleep   sys_nanosleep_time32
 163common  mremap  sys_mremap
 164common  setresuid   sys_setresuid16
 165common  getresuid   sys_getresuid16
@@ -190,7 +190,7 @@
 174common  rt_sigactionsys_rt_sigaction
 175common  rt_sigprocmask  sys_rt_sigprocmask
 176common  rt_sigpending   sys_rt_sigpending
-177  

[PATCH v2 19/29] time: Add struct __kernel_timex

2019-01-18 Thread Arnd Bergmann
From: Deepa Dinamani 

struct timex uses struct timeval internally.
struct timeval is not y2038 safe.
Introduce a new UAPI type struct __kernel_timex
that is y2038 safe.

struct __kernel_timex uses a timeval type that is
similar to struct __kernel_timespec which preserves the
same structure size across 32 bit and 64 bit ABIs.
struct __kernel_timex also restructures other members of the
structure to make the structure the same on 64 bit and 32 bit
architectures.
Note that struct __kernel_timex is the same as struct timex
on a 64 bit architecture.

The above solution is similar to other new y2038 syscalls
that are being introduced: both 32 bit and 64 bit ABIs
have a common entry, and the compat entry supports the old 32 bit
syscall interface.

Alternatives considered were:
1. Add new time type to struct timex that makes use of padded
   bits. This time type could be based on the struct __kernel_timespec.
   modes will use a flag to notify which time structure should be
   used internally.
   This needs some application level changes on both 64 bit and 32 bit
   architectures. Although 64 bit machines could continue to use the
   older timeval structure without any changes.

2. Add a new u8 type to struct timex that makes use of padded bits. This
   can be used to save higher order tv_sec bits. modes will use a flag to
   notify presence of such a type.
   This will need some application level changes on 32 bit architectures.

3. Add a new compat_timex structure that differs in only the size of the
   time type; keep rest of struct timex the same.
   This requires extra syscalls to manage all 3 cases on 64 bit
   architectures. This will not need any application level changes but will
   add more complexity from kernel side.

Signed-off-by: Deepa Dinamani 
---
 include/linux/timex.h  |  7 +++
 include/uapi/linux/timex.h | 41 ++
 2 files changed, 48 insertions(+)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 39c25dbebfe8..7f40e9e42ecc 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,6 +53,13 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
+/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
+ * and 32-bit emulation.
+ */
+#ifndef CONFIG_64BIT_TIME
+#define __kernel_timex timex
+#endif
+
 #include 
 
 #define ADJ_ADJTIME0x8000  /* switch between adjtime/adjtimex 
modes */
diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h
index 92685d826444..a1c6b73016a5 100644
--- a/include/uapi/linux/timex.h
+++ b/include/uapi/linux/timex.h
@@ -92,6 +92,47 @@ struct timex {
int  :32; int  :32; int  :32;
 };
 
+struct __kernel_timex_timeval {
+   __kernel_time64_t   tv_sec;
+   long long   tv_usec;
+};
+
+#ifndef __kernel_timex
+struct __kernel_timex {
+   unsigned int modes; /* mode selector */
+   int :32;/* pad */
+   long long offset;   /* time offset (usec) */
+   long long freq; /* frequency offset (scaled ppm) */
+   long long maxerror;/* maximum error (usec) */
+   long long esterror;/* estimated error (usec) */
+   int status; /* clock command/status */
+   int :32;/* pad */
+   long long constant;/* pll time constant */
+   long long precision;/* clock precision (usec) (read only) */
+   long long tolerance;/* clock frequency tolerance (ppm)
+  * (read only)
+  */
+   struct __kernel_timex_timeval time; /* (read only, except for 
ADJ_SETOFFSET) */
+   long long tick; /* (modified) usecs between clock ticks */
+
+   long long ppsfreq;/* pps frequency (scaled ppm) (ro) */
+   long long jitter; /* pps jitter (us) (ro) */
+   int shift;  /* interval duration (s) (shift) (ro) */
+   int :32;/* pad */
+   long long stabil;/* pps stability (scaled ppm) (ro) */
+   long long jitcnt; /* jitter limit exceeded (ro) */
+   long long calcnt; /* calibration intervals (ro) */
+   long long errcnt; /* calibration errors (ro) */
+   long long stbcnt; /* stability limit exceeded (ro) */
+
+   int tai;/* TAI offset (ro) */
+
+   int  :32; int  :32; int  :32; int  :32;
+   int  :32; int  :32; int  :32; int  :32;
+   int  :32; int  :32; int  :32;
+};
+#endif
+
 /*
  * Mode codes (timex.mode)
  */
-- 
2.20.0



[PATCH v2 12/29] ipc: rename old-style shmctl/semctl/msgctl syscalls

2019-01-18 Thread Arnd Bergmann
The behavior of these system calls is slightly different between
architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION
symbol. Most architectures that implement the split IPC syscalls don't set
that symbol and only get the modern version, but alpha, arm, microblaze,
mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag.

For the architectures that so far only implement sys_ipc(), i.e. m68k,
mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior
when adding the split syscalls, so we need to distinguish between the
two groups of architectures.

The method I picked for this distinction is to have a separate system call
entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl()
does not. The system call tables of the five architectures are changed
accordingly.

As an additional benefit, we no longer need the configuration specific
definition for ipc_parse_version(), it always does the same thing now,
but simply won't get called on architectures with the modern interface.

A small downside is that on architectures that do set
ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points
that are never called. They only add a few bytes of bloat, so it seems
better to keep them compared to adding yet another Kconfig symbol.
I considered adding new syscall numbers for the IPC_64 variants for
consistency, but decided against that for now.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/kernel/syscalls/syscall.tbl  |  6 ++--
 arch/arm/tools/syscall.tbl  |  6 ++--
 arch/arm64/include/asm/unistd32.h   |  6 ++--
 arch/microblaze/kernel/syscalls/syscall.tbl |  6 ++--
 arch/mips/kernel/syscalls/syscall_n32.tbl   |  6 ++--
 arch/mips/kernel/syscalls/syscall_n64.tbl   |  6 ++--
 arch/xtensa/kernel/syscalls/syscall.tbl |  6 ++--
 include/linux/syscalls.h|  3 ++
 ipc/msg.c   | 39 
 ipc/sem.c   | 39 
 ipc/shm.c   | 40 +
 ipc/syscall.c   | 12 +++
 ipc/util.h  | 21 ---
 kernel/sys_ni.c |  3 ++
 14 files changed, 137 insertions(+), 62 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index f920b65e8c49..b0e247287908 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -174,17 +174,17 @@
 187common  osf_alt_sigpending  sys_ni_syscall
 188common  osf_alt_setsid  sys_ni_syscall
 199common  osf_swapon  sys_swapon
-200common  msgctl  sys_msgctl
+200common  msgctl  sys_old_msgctl
 201common  msgget  sys_msgget
 202common  msgrcv  sys_msgrcv
 203common  msgsnd  sys_msgsnd
-204common  semctl  sys_semctl
+204common  semctl  sys_old_semctl
 205common  semget  sys_semget
 206common  semop   sys_semop
 207common  osf_utsname sys_osf_utsname
 208common  lchown  sys_lchown
 209common  shmat   sys_shmat
-210common  shmctl  sys_shmctl
+210common  shmctl  sys_old_shmctl
 211common  shmdt   sys_shmdt
 212common  shmget  sys_shmget
 213common  osf_mvalid  sys_ni_syscall
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 20ed7e026723..b54b7f2bc24a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -314,15 +314,15 @@
 297common  recvmsg sys_recvmsg
 298common  semop   sys_semop   sys_oabi_semop
 299common  semget  sys_semget
-300common  semctl  sys_semctl
+300common  semctl  sys_old_semctl
 301common  msgsnd  sys_msgsnd
 302common  msgrcv  sys_msgrcv
 303common  msgget  sys_msgget
-304common  msgctl  sys_msgctl
+304common  msgctl  sys_old_msgctl
 305common  shmat   sys_shmat
 306common  shmdt   sys_shmdt
 307common  shmget  sys_shmget
-308common  shmctl  sys_shmctl
+308common  shmctl  sys_old_shmctl
 309common  add_key sys_add_key
 310common  request_key sys_request_key
 311common  keyctl  sys_keyctl
diff --git a/arch/arm64/include/asm/unistd32

[PATCH v2 05/29] alpha: update syscall macro definitions

2019-01-18 Thread Arnd Bergmann
Other architectures commonly use __NR_umount2 for sys_umount,
only ia64 and alpha use __NR_umount here. In order to synchronize
the generated tables, use umount2 like everyone else, and add back
the old name from asm/unistd.h for compatibility.

For shmat, alpha uses the osf_shmat name, we can do the same thing
here, which means we don't have to add an entry in the __IGNORE
list now that shmat is mandatory everywhere

alarm, creat, pause, time, and utime are optional everywhere
these days, no need to list them here any more.

I considered also adding the regular versions of the get*id system
calls that have different names and calling conventions on alpha,
which would further help unify the syscall ABI, but for now
I decided against that.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/include/asm/unistd.h| 6 --
 arch/alpha/include/uapi/asm/unistd.h   | 5 +
 arch/alpha/kernel/syscalls/syscall.tbl | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 21b706a5b772..564ba87bdc38 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -22,18 +22,12 @@
 /*
  * Ignore legacy syscalls that we don't use.
  */
-#define __IGNORE_alarm
-#define __IGNORE_creat
 #define __IGNORE_getegid
 #define __IGNORE_geteuid
 #define __IGNORE_getgid
 #define __IGNORE_getpid
 #define __IGNORE_getppid
 #define __IGNORE_getuid
-#define __IGNORE_pause
-#define __IGNORE_time
-#define __IGNORE_utime
-#define __IGNORE_umount2
 
 /* Alpha doesn't have protection keys. */
 #define __IGNORE_pkey_mprotect
diff --git a/arch/alpha/include/uapi/asm/unistd.h 
b/arch/alpha/include/uapi/asm/unistd.h
index 9ba724f116f1..4507071f995f 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -2,6 +2,11 @@
 #ifndef _UAPI_ALPHA_UNISTD_H
 #define _UAPI_ALPHA_UNISTD_H
 
+/* These are traditionally the names linux-alpha uses for
+ * the two otherwise generic system calls */
+#define __NR_umount__NR_umount2
+#define __NR_osf_shmat __NR_shmat
+
 #include 
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index e09558edae73..f920b65e8c49 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -29,7 +29,7 @@
 19 common  lseek   sys_lseek
 20 common  getxpid sys_getxpid
 21 common  osf_mount   sys_osf_mount
-22 common  umount  sys_umount
+22 common  umount2 sys_umount
 23 common  setuid  sys_setuid
 24 common  getxuid sys_getxuid
 25 common  exec_with_loadersys_ni_syscall
@@ -183,7 +183,7 @@
 206common  semop   sys_semop
 207common  osf_utsname sys_osf_utsname
 208common  lchown  sys_lchown
-209common  osf_shmat   sys_shmat
+209common  shmat   sys_shmat
 210common  shmctl  sys_shmctl
 211common  shmdt   sys_shmdt
 212common  shmget  sys_shmget
-- 
2.20.0



[PATCH v2 20/29] time: fix sys_timer_settime prototype

2019-01-18 Thread Arnd Bergmann
A small typo has crept into the y2038 conversion of the timer_settime
system call. So far this was completely harmless, but once we start
using the new version, this has to be fixed.

Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec")
Signed-off-by: Arnd Bergmann 
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 938d8908b9e0..baa4b70b02d3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id,
 asmlinkage long sys_timer_getoverrun(timer_t timer_id);
 asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
const struct __kernel_itimerspec __user 
*new_setting,
-   struct itimerspec __user *old_setting);
+   struct __kernel_itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
const struct __kernel_timespec __user *tp);
-- 
2.20.0



[PATCH v2 22/29] timex: use __kernel_timex internally

2019-01-18 Thread Arnd Bergmann
From: Deepa Dinamani 

struct timex is not y2038 safe.
Replace all uses of timex with y2038 safe __kernel_timex.

Note that struct __kernel_timex is an ABI interface definition.
We could define a new structure based on __kernel_timex that
is only available internally instead. Right now, there isn't
a strong motivation for this as the structure is isolated to
a few defined struct timex interfaces and such a structure would
be exactly the same as struct timex.

The patch was generated by the following coccinelle script:

virtual patch

@depends on patch forall@
identifier ts;
expression e;
@@
(
- struct timex ts;
+ struct __kernel_timex ts;
|
- struct timex ts = {};
+ struct __kernel_timex ts = {};
|
- struct timex ts = e;
+ struct __kernel_timex ts = e;
|
- struct timex *ts;
+ struct __kernel_timex *ts;
|
(memset \| copy_from_user \| copy_to_user \)(...,
- sizeof(struct timex))
+ sizeof(struct __kernel_timex))
)

@depends on patch forall@
identifier ts;
identifier fn;
@@
fn(...,
- struct timex *ts,
+ struct __kernel_timex *ts,
...) {
...
}

@depends on patch forall@
identifier ts;
identifier fn;
@@
fn(...,
- struct timex *ts) {
+ struct __kernel_timex *ts) {
...
}

Signed-off-by: Deepa Dinamani 
Cc: linux-alpha@vger.kernel.org
Cc: net...@vger.kernel.org
---
 arch/alpha/kernel/osf_sys.c  |  5 +++--
 arch/sparc/kernel/sys_sparc_64.c |  4 ++--
 drivers/ptp/ptp_clock.c  |  2 +-
 include/linux/posix-clock.h  |  2 +-
 include/linux/time32.h   |  6 +++---
 include/linux/timex.h|  4 ++--
 kernel/time/ntp.c| 18 ++
 kernel/time/ntp_internal.h   |  2 +-
 kernel/time/posix-clock.c|  2 +-
 kernel/time/posix-timers.c   |  8 
 kernel/time/posix-timers.h   |  2 +-
 kernel/time/time.c   | 14 +++---
 kernel/time/timekeeping.c|  4 ++--
 13 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 792586038808..bf497b8b0ec6 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1253,7 +1253,7 @@ struct timex32 {
 
 SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
 {
-struct timex txc;
+   struct __kernel_timex txc;
int ret;
 
/* copy relevant bits of struct timex. */
@@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, 
txc_p)
if (copy_to_user(txc_p, , offsetof(struct timex32, time)) ||
(copy_to_user(_p->tick, , sizeof(struct timex32) - 
  offsetof(struct timex32, tick))) ||
-   (put_tv_to_tv32(_p->time, )))
+   (put_user(txc.time.tv_sec, _p->time.tv_sec)) ||
+   (put_user(txc.time.tv_usec, _p->time.tv_usec)))
  return -EFAULT;
 
return ret;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 37de18a11207..9825ca6a6020 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -548,7 +548,7 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, 
len)
 SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
 {
struct timex txc;   /* Local copy of parameter */
-   struct timex *kt = (void *)
+   struct __kernel_timex *kt = (void *)
int ret;
 
/* Copy the user data space into the kernel copy
@@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, 
txc_p)
 SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex 
__user *, txc_p)
 {
struct timex txc;   /* Local copy of parameter */
-   struct timex *kt = (void *)
+   struct __kernel_timex *kt = (void *)
int ret;
 
if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 48f3594a7458..79bd102c9bbc 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct 
timespec64 *tp)
return err;
 }
 
-static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
+static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 {
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
struct ptp_clock_info *ops;
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 3a3bc71017d5..18674d7d5b1c 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -51,7 +51,7 @@ struct posix_clock;
 struct posix_clock_operations {
struct module *owner;
 
-   int  (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
+   int  (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex 
*tx);
 
int  (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts);
 
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 820a22e2b98b..0a1f302a1753 100644
--- 

[PATCH v2 00/29] y2038: add time64 syscalls

2019-01-18 Thread Arnd Bergmann
This is a minor update of the patches I posted last week, I
would like to add this into linux-next now, but would still do
changes if there are concerns about the contents. The first
version did not see a lot of replies, which could mean that
either everyone is happy with it, or that it was largely ignored.

See also the article at https://lwn.net/Articles/776435/.

Changes since v1:

- posting as a combined series for simplicity
- dropped one mips patch that was merged as a 5.0 fix
- reworked s390 compat syscall handling (posted separately)
  and rebased on top of that series
- minor fixes for arm64 and powerpc
- added alpha statfs64 interfaces
- added alpha get{eg,eu,g,p,u,pp}id()

 Arnd


v1 description for cleanup:
The system call tables have diverged a bit over the years, and a number
of the recent additions never made it into all architectures, for one
reason or another.

This is an attempt to clean it up as far as we can without breaking
compatibility, doing a number of steps:

- Add system calls that have not yet been integrated into all
  architectures but that we definitely want there.

- Add the separate ipc syscalls on all architectures that
  traditionally only had sys_ipc(). This version is done without
  support for IPC_OLD that is we have in sys_ipc. The
  new semtimedop_time64 syscall will only be added here, not
  in sys_ipc

- Add syscall numbers for a couple of syscalls that we probably
  don't need everywhere, in particular pkey_* and rseq,
  for the purpose of symmetry: if it's in asm-generic/unistd.h,
  it makes sense to have it everywhere.

- Prepare for having the same system call numbers for any future
  calls. In combination with the generated tables, this hopefully
  makes it easier to add new calls across all architectures
  together.

Most of the contents of this series are unrelated to the actual
y2038 work, but for the moment, that second series is based on
this one. If there are any concerns about changes here, I
can drop or rewrite any individual patch in this series.

My plan is to merge any patches in this series that are found
to be good together with the y2038 patches for linux-5.1, so
please review and provide Acks for merging through my tree,
or pick them up for 5.0 if they seem urgent enough.

v1 description for y2038 patches:

This series finally gets us to the point of having system calls with
64-bit time_t on all architectures, after a long time of incremental
preparation patches.

There was actually one conversion that I missed during the summer,
i.e. Deepa's timex series, which I now updated based the 5.0-rc1 changes
and review comments.

I hope that the actual conversion should be uncontroversial by now,
even if some of the patches are rather large.

The one area that may need a little discussion is for the system call
numbers assigned in the final patch: Can we get consensus on whether
the idea of using the same numbers on all architectures, as well as my
choice of numbers makes sense here?

So far, I have done a lot of build testing across most architectures,
which has found a number of bugs. I have also done an LTP run on arm32
with existing user space, but not on the other architectures. I did LTP
tests with a modified musl libc[2] last summer on an older version of
this series to make sure that the new 64-bit time_t interfaces work.
The version there will need updates for testing with this new kernel
patch series; I plan to do that next.

For testing, the series plus the preparatory patches is available at
[3].  Once there is a general agreement on this series and I have done
more tests for the new system calls, I plan to add this to linux-next
through my asm-generic tree or Thomas' timers tree.

Please review and test!

  Arnd

[1] https://lore.kernel.org/lkml/20190110162435.309262-1-a...@arndb.de/T/
[2] https://git.linaro.org/people/arnd/musl-y2038.git/
[3] https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git 
y2038-5.0-rc1

Arnd Bergmann (26):
  ia64: add __NR_umount2 definition
  ia64: add statx and io_pgetevents syscalls
  ia64: assign syscall numbers for perf and seccomp
  alpha: wire up io_pgetevents system call
  alpha: update syscall macro definitions
  ARM: add migrate_pages() system call
  ARM: add kexec_file_load system call number
  m68k: assign syscall number for seccomp
  sh: remove duplicate unistd_32.h file
  sh: add statx system call
  sparc64: fix sparc_ipc type conversion
  ipc: rename old-style shmctl/semctl/msgctl syscalls
  arch: add split IPC system calls where needed
  arch: add pkey and rseq syscall numbers everywhere
  alpha: add standard statfs64/fstatfs64 syscalls
  alpha: add generic get{eg,eu,g,p,u,pp}id() syscalls
  syscalls: remove obsolete __IGNORE_ macros
  time: make adjtime compat handling available for 32 bit
  time: fix sys_timer_settime prototype
  sparc64: add custom adjtimex/clock_adjtime functions
  x86/x32: use time64 versions of sigtimedwait and recvmmsg
  y2038: syscalls: rename

[PATCH v2 24/29] x86/x32: use time64 versions of sigtimedwait and recvmmsg

2019-01-18 Thread Arnd Bergmann
x32 has always followed the time64 calling conventions of these
syscalls, which required a special hack in compat_get_timespec
aka get_old_timespec32 to continue working.

Since we now have the time64 syscalls, use those explicitly.

Signed-off-by: Arnd Bergmann 
---
 arch/x86/entry/syscalls/syscall_64.tbl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..43a622aec07e 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,7 +361,7 @@
 520x32 execve  __x32_compat_sys_execve/ptregs
 521x32 ptrace  __x32_compat_sys_ptrace
 522x32 rt_sigpending   __x32_compat_sys_rt_sigpending
-523x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait
+523x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
 524x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
 525x32 sigaltstack __x32_compat_sys_sigaltstack
 526x32 timer_create__x32_compat_sys_timer_create
@@ -375,7 +375,7 @@
 534x32 preadv  __x32_compat_sys_preadv64
 535x32 pwritev __x32_compat_sys_pwritev64
 536x32 rt_tgsigqueueinfo   __x32_compat_sys_rt_tgsigqueueinfo
-537x32 recvmmsg__x32_compat_sys_recvmmsg
+537x32 recvmmsg__x32_compat_sys_recvmmsg_time64
 538x32 sendmmsg__x32_compat_sys_sendmmsg
 539x32 process_vm_readv__x32_compat_sys_process_vm_readv
 540x32 process_vm_writev   __x32_compat_sys_process_vm_writev
-- 
2.20.0



[PATCH v2 11/29] sparc64: fix sparc_ipc type conversion

2019-01-18 Thread Arnd Bergmann
__kernel_timespec and timespec are currently the same type, but once
they are different, the type cast has to be changed here.

Signed-off-by: Arnd Bergmann 
---
 arch/sparc/kernel/sys_sparc_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 274ed0b9b3e0..1c079e7bab09 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -344,7 +344,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, 
unsigned long, second
goto out;
case SEMTIMEDOP:
err = sys_semtimedop(first, ptr, (unsigned int)second,
-   (const struct timespec __user *)
+   (const struct __kernel_timespec __user *)
 (unsigned long) fifth);
goto out;
case SEMGET:
-- 
2.20.0



Re: [PATCH 14/15] arch: add split IPC system calls where needed

2019-01-15 Thread Arnd Bergmann
On Tue, Jan 15, 2019 at 5:36 PM Geert Uytterhoeven  wrote:
> On Tue, Jan 15, 2019 at 4:19 PM Arnd Bergmann  wrote:
> > On Tue, Jan 15, 2019 at 4:01 PM Arnd Bergmann  wrote:
> > > On Mon, Jan 14, 2019 at 4:59 AM Michael Ellerman  
> > > wrote:
> > > > Arnd Bergmann  writes:
> > > > >  arch/m68k/kernel/syscalls/syscall.tbl | 11 +++
> > > > >  arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++
> > > > >  arch/powerpc/kernel/syscalls/syscall.tbl  | 12 
> > > >
> > > > I have some changes I'd like to make to our syscall table that will
> > > > clash with this.
> > > >
> > > > I'll try and send them today.
> > >
> > > Ok. Are those for 5.0 or 5.1? If they are intended for 5.0, it would be
> > > nice for me to have a branch based on 5.0-rc1 that I can put
> > > the other patches on top of.
> >
> > There is also another change that I considered:
> >
> > At the end of my series, we have a lot of entries like
> >
> > 245 32  clock_settime   sys_clock_settime32
> > 245 64  clock_settime   sys_clock_settime
> > 245 spu clock_settime   sys_clock_settime
> >
> > which could be folded into
> >
> > 245 32  clock_settime   sys_clock_settime32
> > 245 spu64 clock_settime   sys_clock_settime
> >
> > if we just add another option to the ABI field. Any thoughts on
> > that?
>
> So "spu64" would mean "spu + 64"?
> That makes it more difficult to read, and to grep.
> What about allowing multiple ABIs, separated by commas?
> So that line would become:
>
> 245 spu,64 clock_settime   sys_clock_settime

I agree that would be a nice representation, but doing this would
again require changing the script, which then in turn clashes with
Firoz' patches to unify it under the scripts/ directory.

   Arnd


Re: [PATCH 14/15] arch: add split IPC system calls where needed

2019-01-15 Thread Arnd Bergmann
On Tue, Jan 15, 2019 at 4:01 PM Arnd Bergmann  wrote:
>
> On Mon, Jan 14, 2019 at 4:59 AM Michael Ellerman  wrote:
> > Arnd Bergmann  writes:
> > >  arch/m68k/kernel/syscalls/syscall.tbl | 11 +++
> > >  arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++
> > >  arch/powerpc/kernel/syscalls/syscall.tbl  | 12 
> >
> > I have some changes I'd like to make to our syscall table that will
> > clash with this.
> >
> > I'll try and send them today.
>
> Ok. Are those for 5.0 or 5.1? If they are intended for 5.0, it would be
> nice for me to have a branch based on 5.0-rc1 that I can put
> the other patches on top of.

There is also another change that I considered:

At the end of my series, we have a lot of entries like

245 32  clock_settime   sys_clock_settime32
245 64  clock_settime   sys_clock_settime
245 spu clock_settime   sys_clock_settime

which could be folded into

245 32  clock_settime   sys_clock_settime32
245 spu64 clock_settime   sys_clock_settime

if we just add another option to the ABI field. Any thoughts on
that?

  Arnd


Re: [PATCH 14/15] arch: add split IPC system calls where needed

2019-01-15 Thread Arnd Bergmann
On Mon, Jan 14, 2019 at 4:59 AM Michael Ellerman  wrote:
> Arnd Bergmann  writes:
> >  arch/m68k/kernel/syscalls/syscall.tbl | 11 +++
> >  arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++
> >  arch/powerpc/kernel/syscalls/syscall.tbl  | 12 
>
> I have some changes I'd like to make to our syscall table that will
> clash with this.
>
> I'll try and send them today.

Ok. Are those for 5.0 or 5.1? If they are intended for 5.0, it would be
nice for me to have a branch based on 5.0-rc1 that I can put
the other patches on top of.

> > diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
> > b/arch/powerpc/kernel/syscalls/syscall.tbl
> > index db3bbb8744af..1bffab54ff35 100644
> > --- a/arch/powerpc/kernel/syscalls/syscall.tbl
> > +++ b/arch/powerpc/kernel/syscalls/syscall.tbl
> > @@ -425,3 +425,15 @@
> >  386  nospu   pkey_mprotect   sys_pkey_mprotect
> >  387  nospu   rseqsys_rseq
> >  388  nospu   io_pgetevents   sys_io_pgetevents 
> >   compat_sys_io_pgetevents
> > +# room for arch specific syscalls
> > +392  64  semtimedop  sys_semtimedop
> > +393  common  semget  sys_semget
> > +394  common  semctl  sys_semctl
> >   compat_sys_semctl
> > +395  common  shmget  sys_shmget
> > +396  common  shmctl  sys_shmctl
> >   compat_sys_shmctl
> > +397  common  shmat   sys_shmat 
> >   compat_sys_shmat
> > +398  common  shmdt   sys_shmdt
> > +399  common  msgget  sys_msgget
> > +400  common  msgsnd  sys_msgsnd
> >   compat_sys_msgsnd
> > +401  common  msgrcv  sys_msgrcv
> >   compat_sys_msgrcv
> > +402  common  msgctl  sys_msgctl
> >   compat_sys_msgctl
>
> We already have a gap at 366-377 from when we tried to add the split IPC
> calls a few years back.
>
> I guess I don't mind leaving that gap and using the common numbers as
> you've done here.
>
> But it would be good to add a comment pointing out that we have room
> at 366 for more arch specific syscalls as well.

Ah, I missed that. I've added this to my patch now:

index 5c0936d862fc..2ddfba536d5f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -460,6 +460,7 @@
 363spu switch_endian   sys_ni_syscall
 364common  userfaultfd sys_userfaultfd
 365common  membarrier  sys_membarrier
+# 366-377 originally left for IPC, now unused
 378nospu   mlock2  sys_mlock2
 379nospu   copy_file_range sys_copy_file_range
 380common  preadv2 sys_preadv2
 compat_sys_preadv2

   Arnd


Re: [PATCH 15/15] arch: add pkey and rseq syscall numbers everywhere

2019-01-15 Thread Arnd Bergmann
On Tue, Jan 15, 2019 at 12:52 PM Russell King - ARM Linux admin
 wrote:
>
> On Thu, Jan 10, 2019 at 05:24:35PM +0100, Arnd Bergmann wrote:
> > Most architectures define system call numbers for the rseq and pkey system
> > calls, even when they don't support the features, and perhaps never will.
> >
> > Only a few architectures are missing these, so just define them anyway
> > for consistency. If we decide to add them later to one of these, the
> > system call numbers won't get out of sync then.
>
> I was lambasted for adding the pkey syscalls for 32-bit ARM in 2016,
> which will probably never support it.  Why has the attitude towards
> this kind of thing now apparently become acceptable?

I was (and still am) a bit unsure about this one. A number of architectures
added the numbers that won't ever support them, but I wasn't sure if
any of those that didn't add them might need it later.

I tried to just go by the rule that anything that we list in
asm-generic/unistd.h
is probably important enough that we want to list it everywhere, even if
that includes a couple that we end up being rather architecture specific.

I'm happy to drop this patch if you or others feel that we're better off
without it though.

  Arnd


Re: [PATCH 14/15] arch: add split IPC system calls where needed

2019-01-11 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 9:33 PM Heiko Carstens
 wrote:
> On Thu, Jan 10, 2019 at 05:24:34PM +0100, Arnd Bergmann wrote:

> > diff --git a/arch/s390/kernel/syscalls/syscall.tbl 
> > b/arch/s390/kernel/syscalls/syscall.tbl
> > index 022fc099b628..428cf512a757 100644
> > --- a/arch/s390/kernel/syscalls/syscall.tbl
> > +++ b/arch/s390/kernel/syscalls/syscall.tbl
> > @@ -391,3 +391,15 @@
> >  381  common  kexec_file_load sys_kexec_file_load 
> > compat_sys_kexec_file_load
> >  382  common  io_pgetevents   sys_io_pgetevents   
> > compat_sys_io_pgetevents
> >  383  common  rseqsys_rseq
> > compat_sys_rseq
> > +# room for arch specific syscalls
> > +392  64  semtimedop  sys_semtimedop  -
> > +393  common  semget  sys_semget  
> > sys_semget
> ...
> > +395  common  shmget  sys_shmget  
> > sys_shmget
> ...
> > +398  common  shmdt   sys_shmdt   
> > sys_shmdt
> > +399  common  msgget  sys_msgget  
> > sys_msgget
>
> These four need compat system call wrappers, unfortunately... (well,
> actually only shmget and shmdt require them, but let's add them for
> all four). See arch/s390/kernel/compat_wrapper.c
>
> I'm afraid this compat special handling will be even more annoying in
> the future, since s390 will be the only architecture which requires
> this special handling.
>
> _Maybe_ it would make sense to automatically generate a weak compat
> system call wrapper for s390 with the SYSCALL_DEFINE macros, but that
> probably won't work in all cases.

For some reason I was under the impression that s390 already did that.
However, it seems that x86 does, so I'll try to convert the x86 version
for s390, and see if I can get rid of all the wrappers that way.

It would certainly be safer to have the wrappers always present,
especially if we expect future system calls to be added to the
s390 table by whoever implements the syscall itself.

  Arnd


Re: [PATCH 15/15] arch: add pkey and rseq syscall numbers everywhere

2019-01-11 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 9:36 PM Heiko Carstens
 wrote:
> On Thu, Jan 10, 2019 at 05:24:35PM +0100, Arnd Bergmann wrote:

> Since you only need/want the system call numbers, could you please
> change these lines to:
>
> > +384  common  pkey_alloc  -   -
> > +385  common  pkey_free   -   -
> > +386  common  pkey_mprotect   -   -
>
> Otherwise it _looks_ like we would need compat wrappers here as well,
> even though all of them would just jump to sys_ni_syscall() in this
> case. Making this explicit seems to better.

Ok, fair enough. I considered doing this originally and then
decided against it for consistency with the asm-generic file,
but I don't care much either way.

Is this something you may want to add later? I'm not sure exactly
how pkey compares to s390 storage keys, or if this is something
completely unrelated.

 Arnd


Re: [PATCH 00/15] arch: synchronize syscall tables in preparation for y2038

2019-01-10 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 7:11 PM Geert Uytterhoeven  wrote:
> On Thu, Jan 10, 2019 at 6:06 PM Arnd Bergmann  wrote:
> > On Thu, Jan 10, 2019 at 5:59 PM Geert Uytterhoeven  
> > wrote:
> > > On Thu, Jan 10, 2019 at 5:26 PM Arnd Bergmann  wrote:
> > > > The system call tables have diverged a bit over the years, and a number
> > > > of the recent additions never made it into all architectures, for one
> > > > reason or another.
> > > >
> > > > This is an attempt to clean it up as far as we can without breaking
> > > > compatibility, doing a number of steps:
> > >
> > > Thanks a lot!
> > >
> > > > - Add system calls that have not yet been integrated into all
> > > >   architectures but that we definitely want there.
> > >
> > > It looks like you missed wiring up io_pgetevents() on m68k.
> > > Is that intentional?
> >
> > Yes, I thought I had described that somewhere but maybe I
> > forgot: semtimedop() and io_pgetevents() get replaced with
> > time64 versions in the follow-up, so I only added them in
> > 64-bit architectures. If you think we should have both
> > io_pgetevents() and io_pgetevents_time32() on all 32-bit
> > architectures, I can add that as well.
>
> Thanks, sounds fine to me.

Just to be sure, you mean it's fine to not add it, not that we should
add it?

 Arnd


Re: [PATCH 00/15] arch: synchronize syscall tables in preparation for y2038

2019-01-10 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 7:10 PM Joseph Myers  wrote:
>
> On Thu, 10 Jan 2019, Arnd Bergmann wrote:
>
> > - Add system calls that have not yet been integrated into all
> >   architectures but that we definitely want there.
>
> glibc has a note that alpha lacks statfs64, any plans for that?

Good catch, I missed that because all other 64-bit architectures
have a statfs() call with 64-bit fields. I see that it also has an
osf_statfs64 structure and system call with lots of padding and some
oddly sized fields: f_type, f_flags and f_namemax are only 16 bits
wide, the rest is all 64-bit.

Adding the regular statfs64() should be easy enough, we just need to
decide which layout to use:

a) use the currently unused 'struct statfs64' as provided by the
alpha uapi headers, which has a 32-bit __statfs_word but
64-bit f_blocks, f_bfree, f_bavail, f_files, and f_ffree.

b) copy asm-generic/statfs.h to the alpha asm/statfs.h and
change statfs64 to have the regular layout that we use
on all other 64-bit architectures, using all 64-bit fields.

The other open question for alpha (as mentioned in one of the
patches I sent) would be whether to add get{eg,eu,g,p,pp,u}id()
with the regular calling conventions.

   Arnd


[PATCH 00/11] y2038: add time64 syscalls

2019-01-10 Thread Arnd Bergmann
This series finally gets us to the point of having system calls with
64-bit time_t on all architectures, after a long time of incremental
preparation patches.

There was actually one conversion that I missed during the summer,
i.e. Deepa's timex series, which I now updated based the 5.0-rc1 changes
and review comments.

I hope that the actual conversion should be uncontroversial by now,
even if some of the patches are rather large.

The one area that may need a little discussion is for the system call
numbers assigned in the final patch: Can we get consensus on whether
the idea of using the same numbers on all architectures, as well as my
choice of numbers makes sense here?

So far, I have done a lot of build testing across most architectures,
which has found a number of bugs. I have also done an LTP run on arm32
with existing user space, but not on the other architectures. I did LTP
tests with a modified musl libc[2] last summer on an older version of
this series to make sure that the new 64-bit time_t interfaces work.
The version there will need updates for testing with this new kernel
patch series; I plan to do that next.

For testing, the series plus the preparatory patches is available at
[3].  Once there is a general agreement on this series and I have done
more tests for the new system calls, I plan to add this to linux-next
through my asm-generic tree or Thomas' timers tree.

Please review and test!

  Arnd

[1] https://lore.kernel.org/lkml/20190110162435.309262-1-a...@arndb.de/T/
[2] https://git.linaro.org/people/arnd/musl-y2038.git/
[3] https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git 
y2038-5.0-rc1

Arnd Bergmann (8):
  time: make adjtime compat handling available for 32 bit
  time: fix sys_timer_settime prototype
  sparc64: add custom adjtimex/clock_adjtime functions
  y2038: syscalls: rename y2038 compat syscalls
  y2038: use time32 syscall names on 32-bit
  y2038: remove struct definition redirects
  y2038: rename old time and utime syscalls
  y2038: add 64-bit time_t syscalls to all 32-bit architectures

Deepa Dinamani (3):
  time: Add struct __kernel_timex
  timex: use __kernel_timex internally
  timex: change syscalls to use struct __kernel_timex

 arch/Kconfig|   2 +-
 arch/alpha/kernel/osf_sys.c |   5 +-
 arch/alpha/kernel/syscalls/syscall.tbl  |   2 +
 arch/arm/include/asm/unistd.h   |   4 +-
 arch/arm/kernel/sys_oabi-compat.c   |   8 +-
 arch/arm/tools/syscall.tbl  |  77 -
 arch/arm64/include/asm/unistd.h |   2 +-
 arch/arm64/include/asm/unistd32.h   |  89 ++
 arch/ia64/kernel/syscalls/syscall.tbl   |   1 +
 arch/m68k/include/asm/unistd.h  |   4 +-
 arch/m68k/kernel/syscalls/syscall.tbl   |  72 +++-
 arch/microblaze/include/asm/unistd.h|   4 +-
 arch/microblaze/kernel/syscalls/syscall.tbl |  77 -
 arch/mips/include/asm/unistd.h  |   4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl   |  71 
 arch/mips/kernel/syscalls/syscall_n64.tbl   |   1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   |  74 +++-
 arch/parisc/include/asm/unistd.h|   9 +-
 arch/parisc/kernel/syscalls/syscall.tbl | 105 -
 arch/powerpc/include/asm/unistd.h   |   8 +-
 arch/powerpc/kernel/syscalls/syscall.tbl| 121 +++-
 arch/s390/include/asm/unistd.h  |   2 +-
 arch/s390/kernel/syscalls/syscall.tbl   |  72 +++-
 arch/sh/include/asm/unistd.h|   4 +-
 arch/sh/kernel/syscalls/syscall.tbl |  72 +++-
 arch/sparc/include/asm/unistd.h |   8 +-
 arch/sparc/kernel/sys_sparc_64.c|  59 +-
 arch/sparc/kernel/syscalls/syscall.tbl  | 100 +++-
 arch/x86/entry/syscalls/syscall_32.tbl  |  74 +++-
 arch/x86/entry/syscalls/syscall_64.tbl  |   4 +-
 arch/x86/include/asm/unistd.h   |   8 +-
 arch/xtensa/include/asm/unistd.h|   2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl |  71 
 drivers/ptp/ptp_clock.c |   2 +-
 fs/aio.c|  10 +-
 fs/select.c |   4 +-
 fs/timerfd.c|   4 +-
 fs/utimes.c |  10 +-
 include/linux/compat.h  | 104 +
 include/linux/posix-clock.h |   2 +-
 include/linux/syscalls.h|  65 ++-
 include/linux/time32.h  |  32 +-
 include/linux/time64.h  |   8 --
 include/linux/timex.h   |   4 +-
 include/uapi/asm-generic/unistd.h   | 103 -
 include/uapi/linux/time.h   |   4 -
 include/uapi/linux/timex.h  |  39 +++
 ipc/mqueue.c

[PATCH 10/11] y2038: rename old time and utime syscalls

2019-01-10 Thread Arnd Bergmann
The time, stime, utime, utimes, and futimesat system calls are only
used on older architectures, and we do not provide y2038 safe variants
of them, as they are replaced by clock_gettime64, clock_settime64,
and utimensat_time64.

However, for consistency it seems better to have the 32-bit architectures
that still use them call the "time32" entry points (leaving the
traditional handlers for the 64-bit architectures), like we do for system
calls that now require two versions.

Note: We used to always define __ARCH_WANT_SYS_TIME and
__ARCH_WANT_SYS_UTIME and only set __ARCH_WANT_COMPAT_SYS_TIME and
__ARCH_WANT_SYS_UTIME32 for compat mode on 64-bit kernels. Now this is
reversed: only 64-bit architectures set __ARCH_WANT_SYS_TIME/UTIME, while
we need __ARCH_WANT_SYS_TIME32/UTIME32 for 32-bit architectures and compat
mode. The resulting asm/unistd.h changes look a bit counterintuitive.

This is only a cleanup patch and it should not change any behavior.

Signed-off-by: Arnd Bergmann 
---
 arch/arm/include/asm/unistd.h   |  4 ++--
 arch/arm/tools/syscall.tbl  | 10 +-
 arch/m68k/include/asm/unistd.h  |  4 ++--
 arch/m68k/kernel/syscalls/syscall.tbl   | 10 +-
 arch/microblaze/include/asm/unistd.h|  4 ++--
 arch/microblaze/kernel/syscalls/syscall.tbl | 10 +-
 arch/mips/include/asm/unistd.h  |  4 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 10 +-
 arch/parisc/include/asm/unistd.h|  9 ++---
 arch/parisc/kernel/syscalls/syscall.tbl | 15 ++-
 arch/powerpc/include/asm/unistd.h   |  8 
 arch/powerpc/kernel/syscalls/syscall.tbl| 19 ++-
 arch/s390/include/asm/unistd.h  |  2 +-
 arch/sh/include/asm/unistd.h|  4 ++--
 arch/sh/kernel/syscalls/syscall.tbl | 10 +-
 arch/sparc/include/asm/unistd.h |  8 
 arch/sparc/kernel/syscalls/syscall.tbl  | 14 +-
 arch/x86/entry/syscalls/syscall_32.tbl  | 10 +-
 arch/x86/include/asm/unistd.h   |  8 
 arch/xtensa/include/asm/unistd.h|  2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl |  6 +++---
 kernel/time/time.c  |  4 ++--
 22 files changed, 98 insertions(+), 77 deletions(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index d713587dfcf4..7a39e77984ef 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -26,10 +26,10 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
-#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
-#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_TIME32
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 200f4b878a46..a96d9b5ee04e 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -24,7 +24,7 @@
 10 common  unlink  sys_unlink
 11 common  execve  sys_execve
 12 common  chdir   sys_chdir
-13 oabitimesys_time
+13 oabitimesys_time32
 14 common  mknod   sys_mknod
 15 common  chmod   sys_chmod
 16 common  lchown  sys_lchown16
@@ -36,12 +36,12 @@
 22 oabiumount  sys_oldumount
 23 common  setuid  sys_setuid16
 24 common  getuid  sys_getuid16
-25 oabistime   sys_stime
+25 oabistime   sys_stime32
 26 common  ptrace  sys_ptrace
 27 oabialarm   sys_alarm
 # 28 was sys_fstat
 29 common  pause   sys_pause
-30 oabiutime   sys_utime
+30 oabiutime   sys_utime32
 # 31 was sys_stty
 # 32 was sys_gtty
 33 common  access  sys_access
@@ -283,7 +283,7 @@
 266common  statfs64sys_statfs64_wrapper
 267common  fstatfs64   sys_fstatfs64_wrapper
 268common  tgkill  sys_tgkill
-269common  utimes  sys_utimes
+269common  utimes  sys_utimes_time32
 270common  arm_fadvise64_64sys_arm_fadvise64_64
 271common  pciconfig_iobasesys_pciconfig_iobase
 272common  pciconfig_read  sys_pciconfig_read
@@ -340,7 +340,7 @@
 323common  mkdirat sys_mkdirat
 324common  mknodat sys_mknodat
 325common  fchownatsys_fchownat
-326common  futimesat   sys_futimesat
+326common  futimesat   sys_futimesat_time32
 327common 

[PATCH 06/11] timex: change syscalls to use struct __kernel_timex

2019-01-10 Thread Arnd Bergmann
From: Deepa Dinamani 

struct timex is not y2038 safe.
Switch all the syscall apis to use y2038 safe __kernel_timex.

Note that sys_adjtimex() does not have a y2038 safe solution.  C libraries
can implement it by calling clock_adjtime(CLOCK_REALTIME, ...).

Signed-off-by: Deepa Dinamani 
Signed-off-by: Arnd Bergmann 
---
 include/linux/syscalls.h   | 6 +++---
 kernel/time/posix-timers.c | 2 +-
 kernel/time/time.c | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8e86d9623d4e..394e8db7e57e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct __sysctl_args;
 struct sysinfo;
 struct timespec;
 struct timeval;
-struct timex;
+struct __kernel_timex;
 struct timezone;
 struct tms;
 struct utimbuf;
@@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv,
struct timezone __user *tz);
 asmlinkage long sys_settimeofday(struct timeval __user *tv,
struct timezone __user *tz);
-asmlinkage long sys_adjtimex(struct timex __user *txc_p);
+asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p);
 
 /* kernel/timer.c */
 asmlinkage long sys_getpid(void);
@@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd,
  struct file_handle __user *handle,
  int flags);
 asmlinkage long sys_clock_adjtime(clockid_t which_clock,
-   struct timex __user *tx);
+   struct __kernel_timex __user *tx);
 asmlinkage long sys_syncfs(int fd);
 asmlinkage long sys_setns(int fd, int nstype);
 asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 2d84b3db1ade..de79f85ae14f 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct 
__kernel_timex * ktx)
 }
 
 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
-   struct timex __user *, utx)
+   struct __kernel_timex __user *, utx)
 {
struct __kernel_timex ktx;
int err;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index d179d33f639a..78b5c8f1495a 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 
__user *, tv,
 }
 #endif
 
-SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p)
 {
struct __kernel_timex txc;  /* Local copy of parameter */
int ret;
@@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
ret = do_adjtimex();
return copy_to_user(txc_p, , sizeof(struct __kernel_timex)) ? 
-EFAULT : ret;
 }
+#endif
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 
__user *utp)
-- 
2.20.0



[PATCH 11/11] y2038: add 64-bit time_t syscalls to all 32-bit architectures

2019-01-10 Thread Arnd Bergmann
This adds 21 new system calls on each ABI that has 32-bit time_t
today. All of these have the exact same semantics as their existing
counterparts, and the new ones all have macro names that end in 'time64'
for clarification.

This gets us to the point of being able to safely use a C library
that has 64-bit time_t in user space. There are still a couple of
loose ends to tie up in various areas of the code, but this is the
big one, and should be entirely uncontroversial at this point.

In particular, there are four system calls (getitimer, setitimer,
waitid, and getrusage) that don't have a 64-bit counterpart yet,
but these can all be safely implemented in the C library by wrapping
around the existing system calls because the 32-bit time_t they
pass only counts elapsed time, not time since the epoch. They
will be dealt with later.

Signed-off-by: Arnd Bergmann 
---
The one point that still needs to be agreed on is the actual
number assignment. Following the earlier patch that added
the sysv IPC calls with common numbers where possible, I also
tried the same here, using consistent numbers on all 32-bit
architectures.

There are a couple of minor issues with this:

- On asm-generic, we now leave the numbers from 295 to 402
  unassigned, which wastes a small amount of kernel .data
  segment. Originally I had asm-generic start at 300 and
  everyone else start at 400 here, which was also not
  perfect, and we have gone beyond 400 already, so I ended
  up just using the same numbers as the rest here.

- Once we get to 512, we clash with the x32 numbers (unless
  we remove x32 support first), and probably have to skip
  a few more. I also considered using the 512..547 space
  for 32-bit-only calls (which never clash with x32), but
  that also seems to add a bit of complexity.

- On alpha, we have already used up the space up to 527
  (with a small hole between 261 and 299). We could sync
  up with that as well, but my feeling was that alpha syscalls
  are already special enough that I don't care.

Let me know if you have other ideas.
---
 arch/alpha/kernel/syscalls/syscall.tbl  |  2 +
 arch/arm/tools/syscall.tbl  | 21 ++
 arch/arm64/include/asm/unistd.h |  2 +-
 arch/arm64/include/asm/unistd32.h   | 41 +++
 arch/ia64/kernel/syscalls/syscall.tbl   |  1 +
 arch/m68k/kernel/syscalls/syscall.tbl   | 20 +
 arch/microblaze/kernel/syscalls/syscall.tbl | 21 ++
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 21 ++
 arch/mips/kernel/syscalls/syscall_n64.tbl   |  1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 20 +
 arch/parisc/kernel/syscalls/syscall.tbl | 21 ++
 arch/powerpc/kernel/syscalls/syscall.tbl| 20 +
 arch/s390/kernel/syscalls/syscall.tbl   | 20 +
 arch/sh/kernel/syscalls/syscall.tbl | 20 +
 arch/sparc/kernel/syscalls/syscall.tbl  | 20 +
 arch/x86/entry/syscalls/syscall_32.tbl  | 20 +
 arch/xtensa/kernel/syscalls/syscall.tbl | 21 ++
 include/uapi/asm-generic/unistd.h   | 45 -
 scripts/checksyscalls.sh| 40 ++
 19 files changed, 375 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 25b4a7e76943..04d96d042180 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -456,3 +456,5 @@
 525common  pkey_free   sys_pkey_free
 526common  pkey_mprotect   sys_pkey_mprotect
 527common  rseqsys_rseq
+# all other architectures have common numbers for new syscall, alpha
+# is the exception.
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index a96d9b5ee04e..286afdc43283 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -416,3 +416,24 @@
 399common  io_pgetevents   sys_io_pgetevents_time32
 400common  migrate_pages   sys_migrate_pages
 401common  kexec_file_load sys_kexec_file_load
+# 402 is unused
+403common  clock_gettime64 sys_clock_gettime
+404common  clock_settime64 sys_clock_settime
+405common  clock_adjtime64 sys_clock_adjtime
+406common  clock_getres_time64 sys_clock_getres
+407common  clock_nanosleep_time64  sys_clock_nanosleep
+408common  timer_gettime64 sys_timer_gettime
+409common  timer_settime64 sys_timer_settime
+410common  timerfd_gettime64   sys_timerfd_gettime
+411common  timerfd_settime64   sys_timerfd_settime
+412common  utimensat_time64sys_utimensat
+413common  pselect6_time64 sys_pselect6
+414common  ppoll_time64sys_ppoll
+416common  io_pgetevents_time64

[PATCH 09/11] y2038: remove struct definition redirects

2019-01-10 Thread Arnd Bergmann
We now use 64-bit time_t on all architectures, so the __kernel_timex,
__kernel_timeval and __kernel_timespec redirects can be removed
after having served their purpose.

This makes it all much less confusing, as the __kernel_* types
now always refer to the same layout based on 64-bit time_t across
all 32-bit and 64-bit architectures.

Signed-off-by: Arnd Bergmann 
---
 include/linux/time64.h | 8 
 include/linux/timex.h  | 7 ---
 include/uapi/linux/time.h  | 4 
 include/uapi/linux/timex.h | 2 --
 4 files changed, 21 deletions(-)

diff --git a/include/linux/time64.h b/include/linux/time64.h
index 05634afba0db..f38d382ffec1 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -7,14 +7,6 @@
 typedef __s64 time64_t;
 typedef __u64 timeu64_t;
 
-/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
- * and 32-bit emulation.
- */
-#ifndef CONFIG_64BIT_TIME
-#define __kernel_timespec timespec
-#define __kernel_itimerspec itimerspec
-#endif
-
 #include 
 
 struct timespec64 {
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 4aff9f0d1367..ce0859763670 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,13 +53,6 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
-/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
- * and 32-bit emulation.
- */
-#ifndef CONFIG_64BIT_TIME
-#define __kernel_timex timex
-#endif
-
 #include 
 
 #define ADJ_ADJTIME0x8000  /* switch between adjtime/adjtimex 
modes */
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 6b56a2208be7..b03f8717c312 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -42,19 +42,15 @@ struct itimerval {
struct timeval it_value;/* current value */
 };
 
-#ifndef __kernel_timespec
 struct __kernel_timespec {
__kernel_time64_t   tv_sec; /* seconds */
long long   tv_nsec;/* nanoseconds */
 };
-#endif
 
-#ifndef __kernel_itimerspec
 struct __kernel_itimerspec {
struct __kernel_timespec it_interval;/* timer period */
struct __kernel_timespec it_value;   /* timer expiration */
 };
-#endif
 
 /*
  * legacy timeval structure, only embedded in structures that
diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h
index a1c6b73016a5..9f517f9010bb 100644
--- a/include/uapi/linux/timex.h
+++ b/include/uapi/linux/timex.h
@@ -97,7 +97,6 @@ struct __kernel_timex_timeval {
long long   tv_usec;
 };
 
-#ifndef __kernel_timex
 struct __kernel_timex {
unsigned int modes; /* mode selector */
int :32;/* pad */
@@ -131,7 +130,6 @@ struct __kernel_timex {
int  :32; int  :32; int  :32; int  :32;
int  :32; int  :32; int  :32;
 };
-#endif
 
 /*
  * Mode codes (timex.mode)
-- 
2.20.0



[PATCH 08/11] y2038: use time32 syscall names on 32-bit

2019-01-10 Thread Arnd Bergmann
This is the big flip, where all 32-bit architectures set COMPAT_32BIT_TIME
abd use the _time32 system calls from the former compat layer instead
of the system calls that take __kernel_timespec and similar arguments.

The temporary redirects for __kernel_timespec, __kernel_itimerspec
and __kernel_timex can get removed with this.

It would be easy to split this commit by architecture, but with the new
generated system call tables, it's easy enough to do it all at once,
which makes it a little easier to check that the changes are the same
in each table.

Signed-off-by: Arnd Bergmann 
---
 arch/Kconfig|  2 +-
 arch/arm/kernel/sys_oabi-compat.c   |  8 +-
 arch/arm/tools/syscall.tbl  | 46 ++--
 arch/m68k/kernel/syscalls/syscall.tbl   | 42 +--
 arch/microblaze/kernel/syscalls/syscall.tbl | 46 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 44 +--
 arch/parisc/kernel/syscalls/syscall.tbl | 69 +++--
 arch/powerpc/kernel/syscalls/syscall.tbl| 82 +++--
 arch/sh/kernel/syscalls/syscall.tbl | 42 +--
 arch/sparc/kernel/syscalls/syscall.tbl  | 64 ++--
 arch/x86/entry/syscalls/syscall_32.tbl  | 44 +--
 arch/xtensa/kernel/syscalls/syscall.tbl | 44 +--
 include/uapi/asm-generic/unistd.h   | 56 +++---
 13 files changed, 335 insertions(+), 254 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4cfb6de48f79..46db715a7f42 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -759,7 +759,7 @@ config 64BIT_TIME
  handling.
 
 config COMPAT_32BIT_TIME
-   def_bool (!64BIT && 64BIT_TIME) || COMPAT
+   def_bool !64BIT || COMPAT
help
  This enables 32 bit time_t support in addition to 64 bit time_t 
support.
  This is relevant on all 32-bit architectures, and 64-bit architectures
diff --git a/arch/arm/kernel/sys_oabi-compat.c 
b/arch/arm/kernel/sys_oabi-compat.c
index 92ab36f38795..acd054a42ba2 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -317,10 +317,10 @@ struct oabi_sembuf {
 asmlinkage long sys_oabi_semtimedop(int semid,
struct oabi_sembuf __user *tsops,
unsigned nsops,
-   const struct timespec __user *timeout)
+   const struct old_timespec32 __user *timeout)
 {
struct sembuf *sops;
-   struct timespec local_timeout;
+   struct old_timespec32 local_timeout;
long err;
int i;
 
@@ -350,7 +350,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
} else {
mm_segment_t fs = get_fs();
set_fs(KERNEL_DS);
-   err = sys_semtimedop(semid, sops, nsops, timeout);
+   err = sys_semtimedop_time32(semid, sops, nsops, timeout);
set_fs(fs);
}
kfree(sops);
@@ -375,7 +375,7 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int 
second, int third,
return  sys_oabi_semtimedop(first,
(struct oabi_sembuf __user *)ptr,
second,
-   (const struct timespec __user 
*)fifth);
+   (const struct old_timespec32 __user 
*)fifth);
default:
return sys_ipc(call, first, second, third, ptr, fifth);
}
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index b54b7f2bc24a..200f4b878a46 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -137,7 +137,7 @@
 121common  setdomainname   sys_setdomainname
 122common  uname   sys_newuname
 # 123 was sys_modify_ldt
-124common  adjtimexsys_adjtimex
+124common  adjtimexsys_adjtimex_time32
 125common  mprotectsys_mprotect
 126common  sigprocmask sys_sigprocmask
 # 127 was sys_create_module
@@ -174,8 +174,8 @@
 158common  sched_yield sys_sched_yield
 159common  sched_get_priority_max  sys_sched_get_priority_max
 160common  sched_get_priority_min  sys_sched_get_priority_min
-161common  sched_rr_get_interval   sys_sched_rr_get_interval
-162common  nanosleep   sys_nanosleep
+161common  sched_rr_get_interval   sys_sched_rr_get_interval_time32
+162common  nanosleep   sys_nanosleep_time32
 163common  mremap  sys_mremap
 164common  setresuid   sys_setresuid16
 165common  getresuid   sys_getresuid16
@@ -190,7 +190,7 @@
 174common  rt_sigactionsys_rt_sigaction
 175common  rt_sigprocmask  sys_rt_sigprocmask
 176common  rt_sigpending   sys_rt_sigpending
-177  

[PATCH 02/11] time: Add struct __kernel_timex

2019-01-10 Thread Arnd Bergmann
From: Deepa Dinamani 

struct timex uses struct timeval internally.
struct timeval is not y2038 safe.
Introduce a new UAPI type struct __kernel_timex
that is y2038 safe.

struct __kernel_timex uses a timeval type that is
similar to struct __kernel_timespec which preserves the
same structure size across 32 bit and 64 bit ABIs.
struct __kernel_timex also restructures other members of the
structure to make the structure the same on 64 bit and 32 bit
architectures.
Note that struct __kernel_timex is the same as struct timex
on a 64 bit architecture.

The above solution is similar to other new y2038 syscalls
that are being introduced: both 32 bit and 64 bit ABIs
have a common entry, and the compat entry supports the old 32 bit
syscall interface.

Alternatives considered were:
1. Add new time type to struct timex that makes use of padded
   bits. This time type could be based on the struct __kernel_timespec.
   modes will use a flag to notify which time structure should be
   used internally.
   This needs some application level changes on both 64 bit and 32 bit
   architectures. Although 64 bit machines could continue to use the
   older timeval structure without any changes.

2. Add a new u8 type to struct timex that makes use of padded bits. This
   can be used to save higher order tv_sec bits. modes will use a flag to
   notify presence of such a type.
   This will need some application level changes on 32 bit architectures.

3. Add a new compat_timex structure that differs in only the size of the
   time type; keep rest of struct timex the same.
   This requires extra syscalls to manage all 3 cases on 64 bit
   architectures. This will not need any application level changes but will
   add more complexity from kernel side.

Signed-off-by: Deepa Dinamani 
---
 include/linux/timex.h  |  7 +++
 include/uapi/linux/timex.h | 41 ++
 2 files changed, 48 insertions(+)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 39c25dbebfe8..7f40e9e42ecc 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,6 +53,13 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
+/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
+ * and 32-bit emulation.
+ */
+#ifndef CONFIG_64BIT_TIME
+#define __kernel_timex timex
+#endif
+
 #include 
 
 #define ADJ_ADJTIME0x8000  /* switch between adjtime/adjtimex 
modes */
diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h
index 92685d826444..a1c6b73016a5 100644
--- a/include/uapi/linux/timex.h
+++ b/include/uapi/linux/timex.h
@@ -92,6 +92,47 @@ struct timex {
int  :32; int  :32; int  :32;
 };
 
+struct __kernel_timex_timeval {
+   __kernel_time64_t   tv_sec;
+   long long   tv_usec;
+};
+
+#ifndef __kernel_timex
+struct __kernel_timex {
+   unsigned int modes; /* mode selector */
+   int :32;/* pad */
+   long long offset;   /* time offset (usec) */
+   long long freq; /* frequency offset (scaled ppm) */
+   long long maxerror;/* maximum error (usec) */
+   long long esterror;/* estimated error (usec) */
+   int status; /* clock command/status */
+   int :32;/* pad */
+   long long constant;/* pll time constant */
+   long long precision;/* clock precision (usec) (read only) */
+   long long tolerance;/* clock frequency tolerance (ppm)
+  * (read only)
+  */
+   struct __kernel_timex_timeval time; /* (read only, except for 
ADJ_SETOFFSET) */
+   long long tick; /* (modified) usecs between clock ticks */
+
+   long long ppsfreq;/* pps frequency (scaled ppm) (ro) */
+   long long jitter; /* pps jitter (us) (ro) */
+   int shift;  /* interval duration (s) (shift) (ro) */
+   int :32;/* pad */
+   long long stabil;/* pps stability (scaled ppm) (ro) */
+   long long jitcnt; /* jitter limit exceeded (ro) */
+   long long calcnt; /* calibration intervals (ro) */
+   long long errcnt; /* calibration errors (ro) */
+   long long stbcnt; /* stability limit exceeded (ro) */
+
+   int tai;/* TAI offset (ro) */
+
+   int  :32; int  :32; int  :32; int  :32;
+   int  :32; int  :32; int  :32; int  :32;
+   int  :32; int  :32; int  :32;
+};
+#endif
+
 /*
  * Mode codes (timex.mode)
  */
-- 
2.20.0



[PATCH 03/11] time: fix sys_timer_settime prototype

2019-01-10 Thread Arnd Bergmann
A small typo has crept into the y2038 conversion of the timer_settime
system call. So far this was completely harmless, but once we start
using the new version, this has to be fixed.

Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec")
Signed-off-by: Arnd Bergmann 
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0296772e8fe5..8e86d9623d4e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id,
 asmlinkage long sys_timer_getoverrun(timer_t timer_id);
 asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
const struct __kernel_itimerspec __user 
*new_setting,
-   struct itimerspec __user *old_setting);
+   struct __kernel_itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
const struct __kernel_timespec __user *tp);
-- 
2.20.0



[PATCH 04/11] sparc64: add custom adjtimex/clock_adjtime functions

2019-01-10 Thread Arnd Bergmann
sparc64 is the only architecture on Linux that has a 'timeval'
definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes
problems for sparc32 compat mode when we convert it to use the
new __kernel_timex type that has the same layout as all other
64-bit architectures.

To avoid adding sparc64 specific code into the generic adjtimex
implementation, this adds a wrapper in the sparc64 system call handling
that converts the sparc64 'timex' into the new '__kernel_timex'.

At this point, the two structures are defined to be identical,
but that will change in the next step once we convert sparc32.

Signed-off-by: Arnd Bergmann 
---
 arch/sparc/kernel/sys_sparc_64.c   | 59 +-
 arch/sparc/kernel/syscalls/syscall.tbl |  6 ++-
 include/linux/timex.h  |  2 +
 kernel/time/posix-timers.c | 24 +--
 4 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 1c079e7bab09..37de18a11207 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -28,8 +28,9 @@
 #include 
 #include 
 #include 
-
+#include 
 #include 
+
 #include 
 #include 
 
@@ -544,6 +545,62 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, 
len)
return err;
 }
 
+SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
+{
+   struct timex txc;   /* Local copy of parameter */
+   struct timex *kt = (void *)
+   int ret;
+
+   /* Copy the user data space into the kernel copy
+* structure. But bear in mind that the structures
+* may change
+*/
+   if (copy_from_user(, txc_p, sizeof(struct timex)))
+   return -EFAULT;
+
+   /*
+* override for sparc64 specific timeval type: tv_usec
+* is 32 bit wide instead of 64-bit in __kernel_timex
+*/
+   kt->time.tv_usec = txc.time.tv_usec;
+   ret = do_adjtimex(kt);
+   txc.time.tv_usec = kt->time.tv_usec;
+
+   return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret;
+}
+
+SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex 
__user *, txc_p)
+{
+   struct timex txc;   /* Local copy of parameter */
+   struct timex *kt = (void *)
+   int ret;
+
+   if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
+   pr_err_once("process %d (%s) attempted a POSIX timer syscall "
+   "while CONFIG_POSIX_TIMERS is not set\n",
+   current->pid, current->comm);
+
+   return -ENOSYS;
+   }
+
+   /* Copy the user data space into the kernel copy
+* structure. But bear in mind that the structures
+* may change
+*/
+   if (copy_from_user(, txc_p, sizeof(struct timex)))
+   return -EFAULT;
+
+   /*
+* override for sparc64 specific timeval type: tv_usec
+* is 32 bit wide instead of 64-bit in __kernel_timex
+*/
+   kt->time.tv_usec = txc.time.tv_usec;
+   ret = do_clock_adjtime(which_clock, kt);
+   txc.time.tv_usec = kt->time.tv_usec;
+
+   return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret;
+}
+
 SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
utrap_handler_t, new_p, utrap_handler_t, new_d,
utrap_handler_t __user *, old_p,
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl 
b/arch/sparc/kernel/syscalls/syscall.tbl
index 24ebef675184..e70110375399 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -258,7 +258,8 @@
 21664  sigreturn   sys_nis_syscall
 217common  clone   sys_clone
 218common  ioprio_get  sys_ioprio_get
-219common  adjtimexsys_adjtimex
compat_sys_adjtimex
+21932  adjtimexsys_adjtimex
compat_sys_adjtimex
+21964  adjtimexsys_sparc_adjtimex
 22032  sigprocmask sys_sigprocmask 
compat_sys_sigprocmask
 22064  sigprocmask sys_nis_syscall
 221common  create_module   sys_ni_syscall
@@ -377,7 +378,8 @@
 331common  prlimit64   sys_prlimit64
 332common  name_to_handle_at   sys_name_to_handle_at
 333common  open_by_handle_at   sys_open_by_handle_at   
compat_sys_open_by_handle_at
-334common  clock_adjtime   sys_clock_adjtime   
compat_sys_clock_adjtime
+33432  clock_adjtime   sys_clock_adjtime   
compat_sys_clock_adjtime
+33464  clock_adjtime   sys_sparc_clock_adjtime
 335common  syncfs  sys_syncfs
 336common  sendmmsgsys_sendmmsg
compat_sys_sendmmsg
 337common  setns   sys_setns
diff -

[PATCH 01/11] time: make adjtime compat handling available for 32 bit

2019-01-10 Thread Arnd Bergmann
We want to reuse the compat_timex handling on 32-bit architectures the
same way we are using the compat handling for timespec when moving to
64-bit time_t.

Move all definitions related to compat_timex out of the compat code
into the normal timekeeping code, along with a rename to old_timex32,
corresponding to the timespec/timeval structures, and make it controlled
by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select.

Signed-off-by: Arnd Bergmann 
---
 include/linux/compat.h | 35 ++-
 include/linux/time32.h | 32 -
 kernel/compat.c| 64 --
 kernel/time/posix-timers.c | 14 ++--
 kernel/time/time.c | 70 +++---
 5 files changed, 102 insertions(+), 113 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 056be0d03722..657ca6abd855 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -132,37 +132,6 @@ struct compat_tms {
compat_clock_t  tms_cstime;
 };
 
-struct compat_timex {
-   compat_uint_t modes;
-   compat_long_t offset;
-   compat_long_t freq;
-   compat_long_t maxerror;
-   compat_long_t esterror;
-   compat_int_t status;
-   compat_long_t constant;
-   compat_long_t precision;
-   compat_long_t tolerance;
-   struct old_timeval32 time;
-   compat_long_t tick;
-   compat_long_t ppsfreq;
-   compat_long_t jitter;
-   compat_int_t shift;
-   compat_long_t stabil;
-   compat_long_t jitcnt;
-   compat_long_t calcnt;
-   compat_long_t errcnt;
-   compat_long_t stbcnt;
-   compat_int_t tai;
-
-   compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
-   compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
-   compat_int_t:32; compat_int_t:32; compat_int_t:32;
-};
-
-struct timex;
-int compat_get_timex(struct timex *, const struct compat_timex __user *);
-int compat_put_timex(struct compat_timex __user *, const struct timex *);
-
 #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
 
 typedef struct {
@@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct 
old_timeval32 __user *tv,
struct timezone __user *tz);
 asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
struct timezone __user *tz);
-asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
+asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp);
 
 /* kernel/timer.c */
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
@@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 struct file_handle __user *handle,
 int flags);
 asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
-struct compat_timex __user *tp);
+struct old_timex32 __user *tp);
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags);
 asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 118b9977080c..820a22e2b98b 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -10,6 +10,7 @@
  */
 
 #include 
+#include 
 
 #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
@@ -35,13 +36,42 @@ struct old_utimbuf32 {
old_time32_tmodtime;
 };
 
+struct old_timex32 {
+   u32 modes;
+   s32 offset;
+   s32 freq;
+   s32 maxerror;
+   s32 esterror;
+   s32 status;
+   s32 constant;
+   s32 precision;
+   s32 tolerance;
+   struct old_timeval32 time;
+   s32 tick;
+   s32 ppsfreq;
+   s32 jitter;
+   s32 shift;
+   s32 stabil;
+   s32 jitcnt;
+   s32 calcnt;
+   s32 errcnt;
+   s32 stbcnt;
+   s32 tai;
+
+   s32:32; s32:32; s32:32; s32:32;
+   s32:32; s32:32; s32:32; s32:32;
+   s32:32; s32:32; s32:32;
+};
+
 extern int get_old_timespec32(struct timespec64 *, const void __user *);
 extern int put_old_timespec32(const struct timespec64 *, void __user *);
 extern int get_old_itimerspec32(struct itimerspec64 *its,
const struct old_itimerspec32 __user *uits);
 extern int put_old_itimerspec32(const struct itimerspec64 *its,
struct old_itimerspec32 __user *uits);
-
+struct timex;
+int get_old_timex32(struct timex *, const struct old_timex32 __user *);
+int put_old_timex32(struct old_timex32 __user *, const struct timex *);
 
 #if __BITS_PER_LONG == 64
 
diff --git a/kernel/compat.c b/kernel/compat.c
index f01affa17e22..d8a36c6ad7c9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -20,7 +20,6 

[PATCH 05/11] timex: use __kernel_timex internally

2019-01-10 Thread Arnd Bergmann
From: Deepa Dinamani 

struct timex is not y2038 safe.
Replace all uses of timex with y2038 safe __kernel_timex.

Note that struct __kernel_timex is an ABI interface definition.
We could define a new structure based on __kernel_timex that
is only available internally instead. Right now, there isn't
a strong motivation for this as the structure is isolated to
a few defined struct timex interfaces and such a structure would
be exactly the same as struct timex.

The patch was generated by the following coccinelle script:

virtual patch

@depends on patch forall@
identifier ts;
expression e;
@@
(
- struct timex ts;
+ struct __kernel_timex ts;
|
- struct timex ts = {};
+ struct __kernel_timex ts = {};
|
- struct timex ts = e;
+ struct __kernel_timex ts = e;
|
- struct timex *ts;
+ struct __kernel_timex *ts;
|
(memset \| copy_from_user \| copy_to_user \)(...,
- sizeof(struct timex))
+ sizeof(struct __kernel_timex))
)

@depends on patch forall@
identifier ts;
identifier fn;
@@
fn(...,
- struct timex *ts,
+ struct __kernel_timex *ts,
...) {
...
}

@depends on patch forall@
identifier ts;
identifier fn;
@@
fn(...,
- struct timex *ts) {
+ struct __kernel_timex *ts) {
...
}

Signed-off-by: Deepa Dinamani 
Cc: linux-alpha@vger.kernel.org
Cc: net...@vger.kernel.org
---
 arch/alpha/kernel/osf_sys.c  |  5 +++--
 arch/sparc/kernel/sys_sparc_64.c |  4 ++--
 drivers/ptp/ptp_clock.c  |  2 +-
 include/linux/posix-clock.h  |  2 +-
 include/linux/time32.h   |  6 +++---
 include/linux/timex.h|  4 ++--
 kernel/time/ntp.c| 18 ++
 kernel/time/ntp_internal.h   |  2 +-
 kernel/time/posix-clock.c|  2 +-
 kernel/time/posix-timers.c   |  8 
 kernel/time/posix-timers.h   |  2 +-
 kernel/time/time.c   | 14 +++---
 kernel/time/timekeeping.c|  4 ++--
 13 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 792586038808..bf497b8b0ec6 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1253,7 +1253,7 @@ struct timex32 {
 
 SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
 {
-struct timex txc;
+   struct __kernel_timex txc;
int ret;
 
/* copy relevant bits of struct timex. */
@@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, 
txc_p)
if (copy_to_user(txc_p, , offsetof(struct timex32, time)) ||
(copy_to_user(_p->tick, , sizeof(struct timex32) - 
  offsetof(struct timex32, tick))) ||
-   (put_tv_to_tv32(_p->time, )))
+   (put_user(txc.time.tv_sec, _p->time.tv_sec)) ||
+   (put_user(txc.time.tv_usec, _p->time.tv_usec)))
  return -EFAULT;
 
return ret;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 37de18a11207..9825ca6a6020 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -548,7 +548,7 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, 
len)
 SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
 {
struct timex txc;   /* Local copy of parameter */
-   struct timex *kt = (void *)
+   struct __kernel_timex *kt = (void *)
int ret;
 
/* Copy the user data space into the kernel copy
@@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, 
txc_p)
 SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex 
__user *, txc_p)
 {
struct timex txc;   /* Local copy of parameter */
-   struct timex *kt = (void *)
+   struct __kernel_timex *kt = (void *)
int ret;
 
if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 48f3594a7458..79bd102c9bbc 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct 
timespec64 *tp)
return err;
 }
 
-static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
+static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 {
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
struct ptp_clock_info *ops;
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 3a3bc71017d5..18674d7d5b1c 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -51,7 +51,7 @@ struct posix_clock;
 struct posix_clock_operations {
struct module *owner;
 
-   int  (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
+   int  (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex 
*tx);
 
int  (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts);
 
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 820a22e2b98b..0a1f302a1753 100644
--- 

Re: [PATCH 07/15] ARM: add kexec_file_load system call number

2019-01-10 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 5:39 PM Will Deacon  wrote:
>
> > diff --git a/arch/arm64/include/asm/unistd32.h 
> > b/arch/arm64/include/asm/unistd32.h
> > index 355fe2bc035b..19f3f58b6146 100644
> > --- a/arch/arm64/include/asm/unistd32.h
> > +++ b/arch/arm64/include/asm/unistd32.h
> > @@ -823,6 +823,8 @@ __SYSCALL(__NR_rseq, sys_rseq)
> >  __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
> >  #define __NR_migrate_pages 400
> >  __SYSCALL(__NR_migrate_pages, sys_migrate_pages)
> > +#define __NR_kexec_file_load 401
> > +__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
>
> Hmm, I wonder if we need a compat wrapper for this, or are we assuming
> that the early entry code has already zero-extended the long and pointer
> arguments?

Yes, that is generally the assumption for compat syscalls.

s390 needs some extra magic to do a 31-to-64 extension on pointer
arguments, and I think sometimes we need a special wrapper to
do sign-extension of 32-bit arguments into 64-bit, but the arguments
here should not need that.

 Arnd


Re: [PATCH 06/15] ARM: add migrate_pages() system call

2019-01-10 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 5:32 PM Will Deacon  wrote:

> > diff --git a/arch/arm64/include/asm/unistd32.h 
> > b/arch/arm64/include/asm/unistd32.h
> > index 04ee190b90fe..355fe2bc035b 100644
> > --- a/arch/arm64/include/asm/unistd32.h
> > +++ b/arch/arm64/include/asm/unistd32.h
> > @@ -821,6 +821,8 @@ __SYSCALL(__NR_statx, sys_statx)
> >  __SYSCALL(__NR_rseq, sys_rseq)
> >  #define __NR_io_pgetevents 399
> >  __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
> > +#define __NR_migrate_pages 400
> > +__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
>
> Should be compat_sys_migrate_pages instead?

Yes, good catch! Fixed now.

Thanks,

Arnd


Re: [PATCH 00/15] arch: synchronize syscall tables in preparation for y2038

2019-01-10 Thread Arnd Bergmann
On Thu, Jan 10, 2019 at 5:59 PM Geert Uytterhoeven  wrote:
>
> Hi Arnd,
>
> On Thu, Jan 10, 2019 at 5:26 PM Arnd Bergmann  wrote:
> > The system call tables have diverged a bit over the years, and a number
> > of the recent additions never made it into all architectures, for one
> > reason or another.
> >
> > This is an attempt to clean it up as far as we can without breaking
> > compatibility, doing a number of steps:
>
> Thanks a lot!
>
> > - Add system calls that have not yet been integrated into all
> >   architectures but that we definitely want there.
>
> It looks like you missed wiring up io_pgetevents() on m68k.
> Is that intentional?

Yes, I thought I had described that somewhere but maybe I
forgot: semtimedop() and io_pgetevents() get replaced with
time64 versions in the follow-up, so I only added them in
64-bit architectures. If you think we should have both
io_pgetevents() and io_pgetevents_time32() on all 32-bit
architectures, I can add that as well.

  Arnd


[PATCH 11/15] mips: fix n32 compat_ipc_parse_version

2019-01-10 Thread Arnd Bergmann
While reading through the sysvipc implementation, I noticed that the n32
semctl/shmctl/msgctl system calls behave differently based on whether
o32 support is enabled or not: Without o32, the IPC_64 flag passed by
user space is rejected but calls without that flag get IPC_64 behavior.

As far as I can tell, this was inadvertently changed by a cleanup patch
but never noticed by anyone, possibly nobody has tried using sysvipc
on n32 after linux-3.19.

Change it back to the old behavior now.

Fixes: 78aaf956ba3a ("MIPS: Compat: Fix build error if CONFIG_MIPS32_COMPAT but 
no compat ABI.")
Cc: sta...@vger.kernel.org
Signed-off-by: Arnd Bergmann 
---
As stated above, this was only found by inspection, the patch is not
tested. Please review accordingly.
---
 arch/mips/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 787290781b8c..0d14f51d0002 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -3155,6 +3155,7 @@ config MIPS32_O32
 config MIPS32_N32
bool "Kernel support for n32 binaries"
depends on 64BIT
+   select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select COMPAT
select MIPS32_COMPAT
select SYSVIPC_COMPAT if SYSVIPC
-- 
2.20.0



[PATCH 12/15] sparc64: fix sparc_ipc type conversion

2019-01-10 Thread Arnd Bergmann
__kernel_timespec and timespec are currently the same type, but once
they are different, the type cast has to be changed here.

Signed-off-by: Arnd Bergmann 
---
 arch/sparc/kernel/sys_sparc_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 274ed0b9b3e0..1c079e7bab09 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -344,7 +344,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, 
unsigned long, second
goto out;
case SEMTIMEDOP:
err = sys_semtimedop(first, ptr, (unsigned int)second,
-   (const struct timespec __user *)
+   (const struct __kernel_timespec __user *)
 (unsigned long) fifth);
goto out;
case SEMGET:
-- 
2.20.0



[PATCH 05/15] alpha: update syscall macro definitions

2019-01-10 Thread Arnd Bergmann
Other architectures commonly use __NR_umount2 for sys_umount,
only ia64 and alpha use __NR_umount here. In order to synchronize
the generated tables, use umount2 like everyone else, and add back
the old name from asm/unistd.h for compatibility.

For shmat, alpha uses the osf_shmat name, we can do the same thing
here, which means we don't have to add an entry in the __IGNORE
list now that shmat is mandatory everywhere

alarm, creat, pause, time, and utime are optional everywhere
these days, no need to list them here any more.

I considered also adding the regular versions of the get*id system
calls that have different names and calling conventions on alpha,
which would further help unify the syscall ABI, but for now
I decided against that.

Signed-off-by: Arnd Bergmann 
---
 arch/alpha/include/asm/unistd.h| 6 --
 arch/alpha/include/uapi/asm/unistd.h   | 5 +
 arch/alpha/kernel/syscalls/syscall.tbl | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 21b706a5b772..564ba87bdc38 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -22,18 +22,12 @@
 /*
  * Ignore legacy syscalls that we don't use.
  */
-#define __IGNORE_alarm
-#define __IGNORE_creat
 #define __IGNORE_getegid
 #define __IGNORE_geteuid
 #define __IGNORE_getgid
 #define __IGNORE_getpid
 #define __IGNORE_getppid
 #define __IGNORE_getuid
-#define __IGNORE_pause
-#define __IGNORE_time
-#define __IGNORE_utime
-#define __IGNORE_umount2
 
 /* Alpha doesn't have protection keys. */
 #define __IGNORE_pkey_mprotect
diff --git a/arch/alpha/include/uapi/asm/unistd.h 
b/arch/alpha/include/uapi/asm/unistd.h
index 9ba724f116f1..4507071f995f 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -2,6 +2,11 @@
 #ifndef _UAPI_ALPHA_UNISTD_H
 #define _UAPI_ALPHA_UNISTD_H
 
+/* These are traditionally the names linux-alpha uses for
+ * the two otherwise generic system calls */
+#define __NR_umount__NR_umount2
+#define __NR_osf_shmat __NR_shmat
+
 #include 
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index e09558edae73..f920b65e8c49 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -29,7 +29,7 @@
 19 common  lseek   sys_lseek
 20 common  getxpid sys_getxpid
 21 common  osf_mount   sys_osf_mount
-22 common  umount  sys_umount
+22 common  umount2 sys_umount
 23 common  setuid  sys_setuid
 24 common  getxuid sys_getxuid
 25 common  exec_with_loadersys_ni_syscall
@@ -183,7 +183,7 @@
 206common  semop   sys_semop
 207common  osf_utsname sys_osf_utsname
 208common  lchown  sys_lchown
-209common  osf_shmat   sys_shmat
+209common  shmat   sys_shmat
 210common  shmctl  sys_shmctl
 211common  shmdt   sys_shmdt
 212common  shmget  sys_shmget
-- 
2.20.0



[PATCH 02/15] ia64: add statx and io_pgetevents syscalls

2019-01-10 Thread Arnd Bergmann
All architectures should implement these two, so assign numbers
and hook them up on ia64.

Signed-off-by: Arnd Bergmann 
---
 arch/ia64/kernel/syscalls/syscall.tbl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index e97caf51be42..52585281205b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -335,3 +335,5 @@
 323common  copy_file_range sys_copy_file_range
 324common  preadv2 sys_preadv2
 325common  pwritev2sys_pwritev2
+326common  statx   sys_statx
+327common  io_pgetevents   sys_io_pgetevents
-- 
2.20.0



[PATCH 03/15] ia64: assign syscall numbers for perf and seccomp

2019-01-10 Thread Arnd Bergmann
Most architectures have assigned numbers for both seccomp and
perf_event_open, even when they do not implement either.

ia64 is an exception here, so for consistency lets add numbers for both
of them. Unless CONFIG_PERF_EVENTS and CONFIG_SECCOMP are implemented,
the system calls just return -ENOSYS.

Signed-off-by: Arnd Bergmann 
---
 arch/ia64/kernel/syscalls/syscall.tbl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index 52585281205b..2e93dbdcdb80 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -337,3 +337,5 @@
 325common  pwritev2sys_pwritev2
 326common  statx   sys_statx
 327common  io_pgetevents   sys_io_pgetevents
+328common  perf_event_open sys_perf_event_open
+329common  seccomp sys_seccomp
-- 
2.20.0



[PATCH 06/15] ARM: add migrate_pages() system call

2019-01-10 Thread Arnd Bergmann
The migrate_pages system call has an assigned number on all architectures
except ARM. When it got added initially in commit d80ade7b3231 ("ARM:
Fix warning: #warning syscall migrate_pages not implemented"), it was
intentionally left out based on the observation that there are no 32-bit
ARM NUMA systems.

However, there are now arm64 NUMA machines that can in theory run 32-bit
kernels (actually enabling NUMA there would require additional work)
as well as 32-bit user space on 64-bit kernels, so that argument is no
longer very strong.

Assigning the number lets us use the system call on 64-bit kernels as well
as providing a more consistent set of syscalls across architectures.

Signed-off-by: Arnd Bergmann 
---
 arch/arm/include/asm/unistd.h | 1 -
 arch/arm/tools/syscall.tbl| 1 +
 arch/arm64/include/asm/unistd.h   | 2 +-
 arch/arm64/include/asm/unistd32.h | 2 ++
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 88ef2ce1f69a..d713587dfcf4 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -45,7 +45,6 @@
  * Unimplemented (or alternatively implemented) syscalls
  */
 #define __IGNORE_fadvise64_64
-#define __IGNORE_migrate_pages
 
 #ifdef __ARM_EABI__
 /*
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 8edf93b4490f..86de9eb34296 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -414,3 +414,4 @@
 397common  statx   sys_statx
 398common  rseqsys_rseq
 399common  io_pgetevents   sys_io_pgetevents
+400common  migrate_pages   sys_migrate_pages
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index a7b1fc58ffdf..261216c3336e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls   400
+#define __NR_compat_syscalls   401
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 04ee190b90fe..355fe2bc035b 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -821,6 +821,8 @@ __SYSCALL(__NR_statx, sys_statx)
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_io_pgetevents 399
 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_migrate_pages 400
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 
 /*
  * Please add new compat syscalls above this comment and update
-- 
2.20.0



  1   2   >