[PATCH v2 3/3] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
From: Arnd Bergmann Most architectures only support a single hardcoded page size. In order to ensure that each one of these sets the corresponding Kconfig symbols, change over the PAGE_SHIFT definition to the common one and allow only the hardware page size to be selected. Acked-by: Guo Ren Acked-by: Heiko Carstens Acked-by: Stafford Horne Acked-by: Johannes Berg Signed-off-by: Arnd Bergmann --- No changes from v1 arch/alpha/Kconfig | 1 + arch/alpha/include/asm/page.h | 2 +- arch/arm/Kconfig | 1 + arch/arm/include/asm/page.h| 2 +- arch/csky/Kconfig | 1 + arch/csky/include/asm/page.h | 2 +- arch/m68k/Kconfig | 3 +++ arch/m68k/Kconfig.cpu | 2 ++ arch/m68k/include/asm/page.h | 6 +- arch/microblaze/Kconfig| 1 + arch/microblaze/include/asm/page.h | 2 +- arch/nios2/Kconfig | 1 + arch/nios2/include/asm/page.h | 2 +- arch/openrisc/Kconfig | 1 + arch/openrisc/include/asm/page.h | 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/page.h | 2 +- arch/s390/Kconfig | 1 + arch/s390/include/asm/page.h | 2 +- arch/sparc/Kconfig | 2 ++ arch/sparc/include/asm/page_32.h | 2 +- arch/sparc/include/asm/page_64.h | 3 +-- arch/um/Kconfig| 1 + arch/um/include/asm/page.h | 2 +- arch/x86/Kconfig | 1 + arch/x86/include/asm/page_types.h | 2 +- arch/xtensa/Kconfig| 1 + arch/xtensa/include/asm/page.h | 2 +- 28 files changed, 32 insertions(+), 19 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index d6968d090d49..4f490250d323 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -14,6 +14,7 @@ config ALPHA select PCI_DOMAINS if PCI select PCI_SYSCALL if PCI select HAVE_ASM_MODVERSIONS + select HAVE_PAGE_SIZE_8KB select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select NEED_DMA_MAP_STATE diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 4db1ebc0ed99..70419e6be1a3 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -6,7 +6,7 @@ #include /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 13 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0af6709570d1..9d52ba3a8ad1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -116,6 +116,7 @@ config ARM select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL + select HAVE_PAGE_SIZE_4KB select HAVE_PCI if MMU select HAVE_PERF_EVENTS select HAVE_PERF_REGS diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 119aa85d1feb..62af9f7f9e96 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -8,7 +8,7 @@ #define _ASMARM_PAGE_H /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index cf2a6fd7dff8..9c2723ab1c94 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -89,6 +89,7 @@ config CSKY select HAVE_KPROBES if !CPU_CK610 select HAVE_KPROBES_ON_FTRACE if !CPU_CK610 select HAVE_KRETPROBES if !CPU_CK610 + select HAVE_PAGE_SIZE_4KB select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 866855e1ab43..0ca6c408c07f 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -10,7 +10,7 @@ /* * PAGE_SHIFT determines the page size: 4KB */ -#define PAGE_SHIFT 12 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE - 1)) #define THREAD_SIZE(PAGE_SIZE * 2) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 4b3e93cac723..7b709453d5e7 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -84,12 +84,15 @@ config MMU config MMU_MOTOROLA bool + select HAVE_PAGE_SIZE_4KB config MMU_COLDFIRE + select HAVE_PAGE_SIZE_8KB bool config MMU_SUN3 bool + select HAVE_PAGE_SIZE_8KB depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE config ARCH_SUPPORTS_KEXEC diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 9dcf245c9cbf..c777a129768a 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -30,6 +30,7 @@ config COLDFIRE se
[PATCH v2 2/3] arch: simplify architecture specific page size configuration
From: Arnd Bergmann arc, arm64, parisc and powerpc all have their own Kconfig symbols in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these so the common symbols are the ones that are actually used, while leaving the arhcitecture specific ones as the user visible place for configuring it, to avoid breaking user configs. Reviewed-by: Christophe Leroy (powerpc32) Acked-by: Catalin Marinas Acked-by: Helge Deller # parisc Signed-off-by: Arnd Bergmann --- No changes from v1 arch/arc/Kconfig | 3 +++ arch/arc/include/uapi/asm/page.h | 6 ++ arch/arm64/Kconfig| 29 + arch/arm64/include/asm/page-def.h | 2 +- arch/parisc/Kconfig | 3 +++ arch/parisc/include/asm/page.h| 10 +- arch/powerpc/Kconfig | 31 ++- arch/powerpc/include/asm/page.h | 2 +- scripts/gdb/linux/constants.py.in | 2 +- scripts/gdb/linux/mm.py | 2 +- 10 files changed, 32 insertions(+), 58 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 1b0483c51cc1..4092bec198be 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -284,14 +284,17 @@ choice config ARC_PAGE_SIZE_8K bool "8KB" + select HAVE_PAGE_SIZE_8KB help Choose between 8k vs 16k config ARC_PAGE_SIZE_16K + select HAVE_PAGE_SIZE_16KB bool "16KB" config ARC_PAGE_SIZE_4K bool "4KB" + select HAVE_PAGE_SIZE_4KB depends on ARC_MMU_V3 || ARC_MMU_V4 endchoice diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a4ad619abfb..7fd9e741b527 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -13,10 +13,8 @@ #include /* PAGE_SHIFT determines the page size */ -#if defined(CONFIG_ARC_PAGE_SIZE_16K) -#define PAGE_SHIFT 14 -#elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define PAGE_SHIFT 12 +#ifdef __KERNEL__ +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #else /* * Default 8k diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aa7c1d435139..29290b8cb36d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -277,27 +277,21 @@ config 64BIT config MMU def_bool y -config ARM64_PAGE_SHIFT - int - default 16 if ARM64_64K_PAGES - default 14 if ARM64_16K_PAGES - default 12 - config ARM64_CONT_PTE_SHIFT int - default 5 if ARM64_64K_PAGES - default 7 if ARM64_16K_PAGES + default 5 if PAGE_SIZE_64KB + default 7 if PAGE_SIZE_16KB default 4 config ARM64_CONT_PMD_SHIFT int - default 5 if ARM64_64K_PAGES - default 5 if ARM64_16K_PAGES + default 5 if PAGE_SIZE_64KB + default 5 if PAGE_SIZE_16KB default 4 config ARCH_MMAP_RND_BITS_MIN - default 14 if ARM64_64K_PAGES - default 16 if ARM64_16K_PAGES + default 14 if PAGE_SIZE_64KB + default 16 if PAGE_SIZE_16KB default 18 # max bits determined by the following formula: @@ -1259,11 +1253,13 @@ choice config ARM64_4K_PAGES bool "4KB" + select HAVE_PAGE_SIZE_4KB help This feature enables 4KB pages support. config ARM64_16K_PAGES bool "16KB" + select HAVE_PAGE_SIZE_16KB help The system will use 16KB pages support. AArch32 emulation requires applications compiled with 16K (or a multiple of 16K) @@ -1271,6 +1267,7 @@ config ARM64_16K_PAGES config ARM64_64K_PAGES bool "64KB" + select HAVE_PAGE_SIZE_64KB help This feature enables 64KB pages support (4KB by default) allowing only two levels of page tables and faster TLB @@ -1291,19 +1288,19 @@ choice config ARM64_VA_BITS_36 bool "36-bit" if EXPERT - depends on ARM64_16K_PAGES + depends on PAGE_SIZE_16KB config ARM64_VA_BITS_39 bool "39-bit" - depends on ARM64_4K_PAGES + depends on PAGE_SIZE_4KB config ARM64_VA_BITS_42 bool "42-bit" - depends on ARM64_64K_PAGES + depends on PAGE_SIZE_64KB config ARM64_VA_BITS_47 bool "47-bit" - depends on ARM64_16K_PAGES + depends on PAGE_SIZE_16KB config ARM64_VA_BITS_48 bool "48-bit" diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbf..792e9fe881dc 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,7 +11,7 @@ #include /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5c845e8d59d9..b180e684fa0d
[PATCH v2 1/3] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
From: Arnd Bergmann These four architectures define the same Kconfig symbols for configuring the page size. Move the logic into a common place where it can be shared with all other architectures. Signed-off-by: Arnd Bergmann --- Changes from v1: - improve Kconfig help texts - fix Hexagon Kconfig arch/Kconfig | 92 ++- arch/hexagon/Kconfig | 24 ++-- arch/hexagon/include/asm/page.h | 6 +- arch/loongarch/Kconfig| 21 ++- arch/loongarch/include/asm/page.h | 10 +--- arch/mips/Kconfig | 58 ++- arch/mips/include/asm/page.h | 16 +- arch/sh/include/asm/page.h| 13 + arch/sh/mm/Kconfig| 42 -- 9 files changed, 121 insertions(+), 161 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a5af0edd3eb8..c63034e092d0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1078,17 +1078,105 @@ config HAVE_ARCH_COMPAT_MMAP_BASES and vice-versa 32-bit applications to call 64-bit mmap(). Required for applications doing different bitness syscalls. +config HAVE_PAGE_SIZE_4KB + bool + +config HAVE_PAGE_SIZE_8KB + bool + +config HAVE_PAGE_SIZE_16KB + bool + +config HAVE_PAGE_SIZE_32KB + bool + +config HAVE_PAGE_SIZE_64KB + bool + +config HAVE_PAGE_SIZE_256KB + bool + +choice + prompt "MMU page size" + +config PAGE_SIZE_4KB + bool "4KiB pages" + depends on HAVE_PAGE_SIZE_4KB + help + This option select the standard 4KiB Linux page size and the only + available option on many architectures. Using 4KiB page size will + minimize memory consumption and is therefore recommended for low + memory systems. + Some software that is written for x86 systems makes incorrect + assumptions about the page size and only runs on 4KiB pages. + +config PAGE_SIZE_8KB + bool "8KiB pages" + depends on HAVE_PAGE_SIZE_8KB + help + This option is the only supported page size on a few older + processors, and can be slightly faster than 4KiB pages. + +config PAGE_SIZE_16KB + bool "16KiB pages" + depends on HAVE_PAGE_SIZE_16KB + help + This option is usually a good compromise between memory + consumption and performance for typical desktop and server + workloads, often saving a level of page table lookups compared + to 4KB pages as well as reducing TLB pressure and overhead of + per-page operations in the kernel at the expense of a larger + page cache. + +config PAGE_SIZE_32KB + bool "32KiB pages" + depends on HAVE_PAGE_SIZE_32KB + Using 32KiB page size will result in slightly higher performance + kernel at the price of higher memory consumption compared to + 16KiB pages. This option is available only on cnMIPS cores. + Note that you will need a suitable Linux distribution to + support this. + +config PAGE_SIZE_64KB + bool "64KiB pages" + depends on HAVE_PAGE_SIZE_64KB + Using 64KiB page size will result in slightly higher performance + kernel at the price of much higher memory consumption compared to + 4KiB or 16KiB pages. + This is not suitable for general-purpose workloads but the + better performance may be worth the cost for certain types of + supercomputing or database applications that work mostly with + large in-memory data rather than small files. + +config PAGE_SIZE_256KB + bool "256KiB pages" + depends on HAVE_PAGE_SIZE_256KB + help + 256KiB pages have little practical value due to their extreme + memory usage. The kernel will only be able to run applications + that have been compiled with '-zmax-page-size' set to 256KiB + (the default is 64KiB or 4KiB on most architectures). + +endchoice + config PAGE_SIZE_LESS_THAN_64KB def_bool y - depends on !ARM64_64K_PAGES depends on !PAGE_SIZE_64KB - depends on !PARISC_PAGE_SIZE_64KB depends on PAGE_SIZE_LESS_THAN_256KB config PAGE_SIZE_LESS_THAN_256KB def_bool y depends on !PAGE_SIZE_256KB +config PAGE_SHIFT + int + default 12 if PAGE_SIZE_4KB + default 13 if PAGE_SIZE_8KB + default 14 if PAGE_SIZE_16KB + default 15 if PAGE_SIZE_32KB + default 16 if PAGE_SIZE_64KB + default 18 if PAGE_SIZE_256KB + # This allows to use a set of generic functions to determine mmap base # address by giving priority to top-down scheme only if the process # is not in legacy mode (compat task, unlimited stack size or diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index a880ee067d2e..1414052e7d6b 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -8,6 +8,10 @@ con
[v2 PATCH 0/3] arch: mm, vdso: consolidate PAGE_SIZE definition
From: Arnd Bergmann Naresh noticed that the newly added usage of the PAGE_SIZE macro in include/vdso/datapage.h introduced a build regression. I had an older patch that I revived to have this defined through Kconfig rather than through including asm/page.h, which is not allowed in vdso code. The vdso patch series now has a temporary workaround, but I still want to get this into v6.9 so we can place the hack with CONFIG_PAGE_SIZE in the vdso. I've applied this to the asm-generic tree already, please let me know if there are still remaining issues. It's really close to the merge window already, so I'd probably give this a few more days before I send a pull request, or defer it to v6.10 if anything goes wrong. Sorry for the delay, I was still waiting to resolve the m68k question, but there were no further replies in the end, so I kept my original version. Changes from v1: - improve Kconfig help texts - remove an extraneous line in hexagon Arnd Link: https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/ Link: https://lore.kernel.org/all/65dc6c14.170a0220.f4a3f.9...@mx.google.com/ Link: https://lore.kernel.org/lkml/20240226161414.2316610-1-a...@kernel.org/ Arnd Bergmann (3): arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions arch: simplify architecture specific page size configuration arch: define CONFIG_PAGE_SIZE_*KB on all architectures arch/Kconfig | 92 +- arch/alpha/Kconfig | 1 + arch/alpha/include/asm/page.h | 2 +- arch/arc/Kconfig | 3 + arch/arc/include/uapi/asm/page.h | 6 +- arch/arm/Kconfig | 1 + arch/arm/include/asm/page.h| 2 +- arch/arm64/Kconfig | 29 +- arch/arm64/include/asm/page-def.h | 2 +- arch/csky/Kconfig | 1 + arch/csky/include/asm/page.h | 2 +- arch/hexagon/Kconfig | 24 ++-- arch/hexagon/include/asm/page.h| 6 +- arch/loongarch/Kconfig | 21 ++- arch/loongarch/include/asm/page.h | 10 +--- arch/m68k/Kconfig | 3 + arch/m68k/Kconfig.cpu | 2 + arch/m68k/include/asm/page.h | 6 +- arch/microblaze/Kconfig| 1 + arch/microblaze/include/asm/page.h | 2 +- arch/mips/Kconfig | 58 ++- arch/mips/include/asm/page.h | 16 +- arch/nios2/Kconfig | 1 + arch/nios2/include/asm/page.h | 2 +- arch/openrisc/Kconfig | 1 + arch/openrisc/include/asm/page.h | 2 +- arch/parisc/Kconfig| 3 + arch/parisc/include/asm/page.h | 10 +--- arch/powerpc/Kconfig | 31 ++ arch/powerpc/include/asm/page.h| 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/page.h | 2 +- arch/s390/Kconfig | 1 + arch/s390/include/asm/page.h | 2 +- arch/sh/include/asm/page.h | 13 + arch/sh/mm/Kconfig | 42 -- arch/sparc/Kconfig | 2 + arch/sparc/include/asm/page_32.h | 2 +- arch/sparc/include/asm/page_64.h | 3 +- arch/um/Kconfig| 1 + arch/um/include/asm/page.h | 2 +- arch/x86/Kconfig | 1 + arch/x86/include/asm/page_types.h | 2 +- arch/xtensa/Kconfig| 1 + arch/xtensa/include/asm/page.h | 2 +- scripts/gdb/linux/constants.py.in | 2 +- scripts/gdb/linux/mm.py| 2 +- 47 files changed, 185 insertions(+), 238 deletions(-) -- 2.39.2 To: Thomas Gleixner To: Vincenzo Frascino To: Kees Cook To: Anna-Maria Behnsen Cc: Matt Turner Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Guo Ren Cc: Brian Cain Cc: Huacai Chen Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Thomas Bogendoerfer Cc: Helge Deller Cc: Michael Ellerman Cc: Christophe Leroy Cc: Palmer Dabbelt Cc: John Paul Adrian Glaubitz Cc: Andreas Larsson Cc: Richard Weinberger Cc: x...@kernel.org Cc: Max Filippov Cc: Andy Lutomirski Cc: Vincenzo Frascino Cc: Jan Kiszka Cc: Kieran Bingham Cc: Andrew Morton Cc: Arnd Bergmann Cc: linux-ker...@vger.kernel.org Cc: linux-alpha@vger.kernel.org Cc: linux-snps-...@lists.infradead.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-c...@vger.kernel.org Cc: linux-hexa...@vger.kernel.org Cc: loonga...@lists.linux.dev Cc: linux-m...@lists.linux-m68k.org Cc: linux-m...@vger.kernel.org Cc: linux-openr...@vger.kernel.org Cc: linux-par...@vger.kernel.org Cc: linuxppc-...@lists.ozlabs.org Cc: linux-ri...@lists.infradead.org Cc: linux-s...@vger.kernel.org Cc: linux...@vger.kernel.org Cc: sparcli...@vger.kernel.org Cc: linux...@lists.infradead.org
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Tue, Feb 27, 2024, at 16:44, Christophe Leroy wrote: > Le 27/02/2024 à 16:40, Arnd Bergmann a écrit : >> On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote: > > > For 256K pages, powerpc has the following help. I think you should have > it too: > > The kernel will only be able to run applications that have been > compiled with '-zmax-page-size' set to 256K (the default is 64K) using > binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE > definition from 0x1 to 0x4 in older versions. I don't think we need to mention pre-2.18 binutils any more, but the rest seems useful, changed the text now to config PAGE_SIZE_256KB bool "256KiB pages" depends on HAVE_PAGE_SIZE_256KB help 256KiB pages have little practical value due to their extreme memory usage. The kernel will only be able to run applications that have been compiled with '-zmax-page-size' set to 256KiB (the default is 64KiB or 4KiB on most architectures). Arnd
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Tue, Feb 27, 2024, at 09:45, Geert Uytterhoeven wrote: > >> +config PAGE_SIZE_4KB >> + bool "4KB pages" > > Now you got rid of the 4000-byte ("4kB") pages and friends, please > do not replace these by Kelvin-bytes, and use the official binary > prefixes => "4 KiB". > Done, thanks. Arnd
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Mon, Feb 26, 2024, at 20:02, Christophe Leroy wrote: > Le 26/02/2024 à 17:14, Arnd Bergmann a écrit : >> From: Arnd Bergmann > > That's a nice re-factor. > > The only drawback I see is that we are loosing several interesting > arch-specific comments/help text. Don't know if there could be an easy > way to keep them. This is what I have now, trying to write it as generic as possible while still giving useful advice: config PAGE_SIZE_4KB bool "4KiB pages" depends on HAVE_PAGE_SIZE_4KB help This option select the standard 4KiB Linux page size and the only available option on many architectures. Using 4KiB page size will minimize memory consumption and is therefore recommended for low memory systems. Some software that is written for x86 systems makes incorrect assumptions about the page size and only runs on 4KiB pages. config PAGE_SIZE_8KB bool "8KiB pages" depends on HAVE_PAGE_SIZE_8KB help This option is the only supported page size on a few older processors, and can be slightly faster than 4KiB pages. config PAGE_SIZE_16KB bool "16KiB pages" depends on HAVE_PAGE_SIZE_16KB help This option is usually a good compromise between memory consumption and performance for typical desktop and server workloads, often saving a level of page table lookups compared to 4KB pages as well as reducing TLB pressure and overhead of per-page operations in the kernel at the expense of a larger page cache. config PAGE_SIZE_32KB bool "32KiB pages" depends on HAVE_PAGE_SIZE_32KB Using 32KiB page size will result in slightly higher performance kernel at the price of higher memory consumption compared to 16KiB pages. This option is available only on cnMIPS cores. Note that you will need a suitable Linux distribution to support this. config PAGE_SIZE_64KB bool "64KiB pages" depends on HAVE_PAGE_SIZE_64KB Using 64KiB page size will result in slightly higher performance kernel at the price of much higher memory consumption compared to 4KiB or 16KiB pages. This is not suitable for general-purpose workloads but the better performance may be worth the cost for certain types of supercomputing or database applications that work mostly with large in-memory data rather than small files. config PAGE_SIZE_256KB bool "256KiB pages" depends on HAVE_PAGE_SIZE_256KB help 256KB pages have little practical value due to their extreme memory usage. Let me know if you think some of this should be adapted further. >> >> +#define PAGE_SHIFT CONFIG_PAGE_SHIFT >> #define PAGE_SIZE (1UL << PAGE_SHIFT) >> #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) >> > > Could we move PAGE_SIZE and PAGE_MASK in a generic/core header instead > of having it duplicated for each arch ? Yes, but I'm leaving this for a follow-up series, since I had to stop somewhere and there is always room for cleanup up headers further ;-) Arnd
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote: > On 2024-02-26 10:14 AM, Arnd Bergmann wrote: >> >> +config HAVE_PAGE_SIZE_4KB >> +bool >> + >> +config HAVE_PAGE_SIZE_8KB >> +bool >> + >> +config HAVE_PAGE_SIZE_16KB >> +bool >> + >> +config HAVE_PAGE_SIZE_32KB >> +bool >> + >> +config HAVE_PAGE_SIZE_64KB >> +bool >> + >> +config HAVE_PAGE_SIZE_256KB >> +bool >> + >> +choice >> +prompt "MMU page size" > > Should this have some generic help text (at least a warning about > compatibility)? Good point. I've added some of this now, based on the mips text with some generalizations for other architectures: config PAGE_SIZE_4KB bool "4KiB pages" depends on HAVE_PAGE_SIZE_4KB help This option select the standard 4KiB Linux page size and the only available option on many architectures. Using 4KiB page size will minimize memory consumption and is therefore recommended for low memory systems. Some software that is written for x86 systems makes incorrect assumptions about the page size and only runs on 4KiB pages. config PAGE_SIZE_8KB bool "8KiB pages" depends on HAVE_PAGE_SIZE_8KB help This option is the only supported page size on a few older processors, and can be slightly faster than 4KiB pages. config PAGE_SIZE_16KB bool "16KiB pages" depends on HAVE_PAGE_SIZE_16KB help This option is usually a good compromise between memory consumption and performance for typical desktop and server workloads, often saving a level of page table lookups compared to 4KB pages as well as reducing TLB pressure and overhead of per-page operations in the kernel at the expense of a larger page cache. config PAGE_SIZE_32KB bool "32KiB pages" depends on HAVE_PAGE_SIZE_32KB Using 32KiB page size will result in slightly higher performance kernel at the price of higher memory consumption compared to 16KiB pages. This option is available only on cnMIPS cores. Note that you will need a suitable Linux distribution to support this. config PAGE_SIZE_64KB bool "64KiB pages" depends on HAVE_PAGE_SIZE_64KB Using 64KiB page size will result in slightly higher performance kernel at the price of much higher memory consumption compared to 4KiB or 16KiB pages. This is not suitable for general-purpose workloads but the better performance may be worth the cost for certain types of supercomputing or database applications that work mostly with large in-memory data rather than small files. config PAGE_SIZE_256KB bool "256KiB pages" depends on HAVE_PAGE_SIZE_256KB help 256KB pages have little practical value due to their extreme memory usage. >> diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig >> index a880ee067d2e..aac46ee1a000 100644 >> --- a/arch/hexagon/Kconfig >> +++ b/arch/hexagon/Kconfig >> @@ -8,6 +8,11 @@ config HEXAGON >> select ARCH_HAS_SYNC_DMA_FOR_DEVICE >> select ARCH_NO_PREEMPT >> select DMA_GLOBAL_POOL >> +select FRAME_POINTER > > Looks like a paste error. > Fixed, thanks! I think that happened during a rebase. >> #ifdef CONFIG_PAGE_SIZE_1MB >> -#define PAGE_SHIFT 20 >> #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB >> #endif > > The corresponding Kconfig option does not exist (and did not exist before this > patch). Yes, I noticed that as well. It's clearly harmless. Arnd
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
On Tue, Feb 27, 2024, at 12:12, Geert Uytterhoeven wrote: > On Tue, Feb 27, 2024 at 11:59 AM Arnd Bergmann wrote: >> On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote: >> I was a bit unsure about how to best do this since there >> is not really a need for a fixed page size on nommu kernels, >> whereas the three MMU configs clearly tie the page size to >> the MMU rather than the platform. >> >> There should be no reason for coldfire to have a different >> page size from dragonball if neither of them actually uses >> hardware pages, so one of them could be changed later. > > Indeed, in theory, PAGE_SIZE doesn't matter for nommu, but the concept > of pages is used all over the place in Linux. > > I'm mostly worried about some Coldfire code relying on the actual value > of PAGE_SIZE in some other context. e.g. for configuring non-cacheable > regions. Right, any change here would have to be carefully tested. I would expect that a 4K page size would reduce memory consumption even on NOMMU systems that should have the same tradeoffs for representing files in the page cache and in mem_map[]. > And does this impact running nommu binaries on a system with MMU? > I.e. if nommu binaries were built with a 4 KiB PAGE_SIZE, do they > still run on MMU systems with an 8 KiB PAGE_SIZE (coldfire and sun3), > or are there some subtleties to take into account? As far as I understand, binaries have to be built and linked for the largest page size they can run on, so running them on a kernel with smaller page size usually works. One notable exception is sys_mmap2(), which on most architectures takes units of 4KiB but on m68k is actually written to take PAGE_SIZE units. As Al pointed out in f8b7256096a2 ("Unify sys_mmap*"), it has always been wrong on sun3, presumably because users of that predate modern glibc. Running coldfire nommu binaries on coldfire mmu kernels would run into the same bug if either of them changes PAGE_SIZE. If you can run coldfire nommu binaries on classic m68k, that is already broken in the same way. Arnd
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote: > Hi Arnd, >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu >> index 9dcf245c9cbf..c777a129768a 100644 >> --- a/arch/m68k/Kconfig.cpu >> +++ b/arch/m68k/Kconfig.cpu >> @@ -30,6 +30,7 @@ config COLDFIRE >> select GENERIC_CSUM >> select GPIOLIB >> select HAVE_LEGACY_CLK >> + select HAVE_PAGE_SIZE_8KB if !MMU > > if you would drop the !MMU-dependency here. > >> >> endchoice >> >> @@ -45,6 +46,7 @@ config M68000 >> select GENERIC_CSUM >> select CPU_NO_EFFICIENT_FFS >> select HAVE_ARCH_HASH >> + select HAVE_PAGE_SIZE_4KB > > Perhaps replace this by > > config M68KCLASSIC > bool "Classic M68K CPU family support" > select HAVE_ARCH_PFN_VALID > + select HAVE_PAGE_SIZE_4KB if !MMU > > so it covers all 680x0 CPUs without MMU? I was a bit unsure about how to best do this since there is not really a need for a fixed page size on nommu kernels, whereas the three MMU configs clearly tie the page size to the MMU rather than the platform. There should be no reason for coldfire to have a different page size from dragonball if neither of them actually uses hardware pages, so one of them could be changed later. Let me know if that makes sense to you, or you still prefer me to change it like you suggested. Arnd
[PATCH 4/4] vdso: avoid including asm/page.h
From: Arnd Bergmann The recent change to the vdso_data_store broke building compat VDSO on at least arm64 because it includes headers outside of the include/vdso/ namespace: In file included from arch/arm64/include/asm/lse.h:5, from arch/arm64/include/asm/cmpxchg.h:14, from arch/arm64/include/asm/atomic.h:16, from include/linux/atomic.h:7, from include/asm-generic/bitops/atomic.h:5, from arch/arm64/include/asm/bitops.h:25, from include/linux/bitops.h:68, from arch/arm64/include/asm/memory.h:209, from arch/arm64/include/asm/page.h:46, from include/vdso/datapage.h:22, from lib/vdso/gettimeofday.c:5, from : arch/arm64/include/asm/atomic_ll_sc.h:298:9: error: unknown type name 'u128' 298 | u128 full; Use an open-coded page size calculation based on the new CONFIG_PAGE_SHIFT Kconfig symbol instead. Reported-by: Linux Kernel Functional Testing Fixes: a0d2fcd62ac2 ("vdso/ARM: Make union vdso_data_store available for all architectures") Link: https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/ Signed-off-by: Arnd Bergmann --- include/vdso/datapage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 7ba44379a095..2c39a67d7e23 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -19,8 +19,6 @@ #include #include -#include - #ifdef CONFIG_ARCH_HAS_VDSO_DATA #include #else @@ -128,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden */ union vdso_data_store { struct vdso_datadata[CS_BASES]; - u8 page[PAGE_SIZE]; + u8 page[1ul << CONFIG_PAGE_SHIFT]; }; /* -- 2.39.2
[PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
From: Arnd Bergmann Most architectures only support a single hardcoded page size. In order to ensure that each one of these sets the corresponding Kconfig symbols, change over the PAGE_SHIFT definition to the common one and allow only the hardware page size to be selected. Signed-off-by: Arnd Bergmann --- arch/alpha/Kconfig | 1 + arch/alpha/include/asm/page.h | 2 +- arch/arm/Kconfig | 1 + arch/arm/include/asm/page.h| 2 +- arch/csky/Kconfig | 1 + arch/csky/include/asm/page.h | 2 +- arch/m68k/Kconfig | 3 +++ arch/m68k/Kconfig.cpu | 2 ++ arch/m68k/include/asm/page.h | 6 +- arch/microblaze/Kconfig| 1 + arch/microblaze/include/asm/page.h | 2 +- arch/nios2/Kconfig | 1 + arch/nios2/include/asm/page.h | 2 +- arch/openrisc/Kconfig | 1 + arch/openrisc/include/asm/page.h | 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/page.h | 2 +- arch/s390/Kconfig | 1 + arch/s390/include/asm/page.h | 2 +- arch/sparc/Kconfig | 2 ++ arch/sparc/include/asm/page_32.h | 2 +- arch/sparc/include/asm/page_64.h | 3 +-- arch/um/Kconfig| 1 + arch/um/include/asm/page.h | 2 +- arch/x86/Kconfig | 1 + arch/x86/include/asm/page_types.h | 2 +- arch/xtensa/Kconfig| 1 + arch/xtensa/include/asm/page.h | 2 +- 28 files changed, 32 insertions(+), 19 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index d6968d090d49..4f490250d323 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -14,6 +14,7 @@ config ALPHA select PCI_DOMAINS if PCI select PCI_SYSCALL if PCI select HAVE_ASM_MODVERSIONS + select HAVE_PAGE_SIZE_8KB select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select NEED_DMA_MAP_STATE diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 4db1ebc0ed99..70419e6be1a3 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -6,7 +6,7 @@ #include /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 13 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0af6709570d1..9d52ba3a8ad1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -116,6 +116,7 @@ config ARM select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL + select HAVE_PAGE_SIZE_4KB select HAVE_PCI if MMU select HAVE_PERF_EVENTS select HAVE_PERF_REGS diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 119aa85d1feb..62af9f7f9e96 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -8,7 +8,7 @@ #define _ASMARM_PAGE_H /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index cf2a6fd7dff8..9c2723ab1c94 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -89,6 +89,7 @@ config CSKY select HAVE_KPROBES if !CPU_CK610 select HAVE_KPROBES_ON_FTRACE if !CPU_CK610 select HAVE_KRETPROBES if !CPU_CK610 + select HAVE_PAGE_SIZE_4KB select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 4a0502e324a6..f70f37402d75 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -10,7 +10,7 @@ /* * PAGE_SHIFT determines the page size: 4KB */ -#define PAGE_SHIFT 12 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE - 1)) #define THREAD_SIZE(PAGE_SIZE * 2) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 4b3e93cac723..7b709453d5e7 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -84,12 +84,15 @@ config MMU config MMU_MOTOROLA bool + select HAVE_PAGE_SIZE_4KB config MMU_COLDFIRE + select HAVE_PAGE_SIZE_8KB bool config MMU_SUN3 bool + select HAVE_PAGE_SIZE_8KB depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE config ARCH_SUPPORTS_KEXEC diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 9dcf245c9cbf..c777a129768a 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -30,6 +30,7 @@ config COLDFIRE select GENERIC_CSUM select GPIOLIB select HAVE_LEGACY_CLK + select HAVE_PAGE_SIZE_8KB if !MMU end
[PATCH 2/4] arch: simplify architecture specific page size configuration
From: Arnd Bergmann arc, arm64, parisc and powerpc all have their own Kconfig symbols in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these so the common symbols are the ones that are actually used, while leaving the arhcitecture specific ones as the user visible place for configuring it, to avoid breaking user configs. Signed-off-by: Arnd Bergmann --- arch/arc/Kconfig | 3 +++ arch/arc/include/uapi/asm/page.h | 6 ++ arch/arm64/Kconfig| 29 + arch/arm64/include/asm/page-def.h | 2 +- arch/parisc/Kconfig | 3 +++ arch/parisc/include/asm/page.h| 10 +- arch/powerpc/Kconfig | 31 ++- arch/powerpc/include/asm/page.h | 2 +- scripts/gdb/linux/constants.py.in | 2 +- scripts/gdb/linux/mm.py | 2 +- 10 files changed, 32 insertions(+), 58 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 1b0483c51cc1..4092bec198be 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -284,14 +284,17 @@ choice config ARC_PAGE_SIZE_8K bool "8KB" + select HAVE_PAGE_SIZE_8KB help Choose between 8k vs 16k config ARC_PAGE_SIZE_16K + select HAVE_PAGE_SIZE_16KB bool "16KB" config ARC_PAGE_SIZE_4K bool "4KB" + select HAVE_PAGE_SIZE_4KB depends on ARC_MMU_V3 || ARC_MMU_V4 endchoice diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a4ad619abfb..7fd9e741b527 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -13,10 +13,8 @@ #include /* PAGE_SHIFT determines the page size */ -#if defined(CONFIG_ARC_PAGE_SIZE_16K) -#define PAGE_SHIFT 14 -#elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define PAGE_SHIFT 12 +#ifdef __KERNEL__ +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #else /* * Default 8k diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aa7c1d435139..29290b8cb36d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -277,27 +277,21 @@ config 64BIT config MMU def_bool y -config ARM64_PAGE_SHIFT - int - default 16 if ARM64_64K_PAGES - default 14 if ARM64_16K_PAGES - default 12 - config ARM64_CONT_PTE_SHIFT int - default 5 if ARM64_64K_PAGES - default 7 if ARM64_16K_PAGES + default 5 if PAGE_SIZE_64KB + default 7 if PAGE_SIZE_16KB default 4 config ARM64_CONT_PMD_SHIFT int - default 5 if ARM64_64K_PAGES - default 5 if ARM64_16K_PAGES + default 5 if PAGE_SIZE_64KB + default 5 if PAGE_SIZE_16KB default 4 config ARCH_MMAP_RND_BITS_MIN - default 14 if ARM64_64K_PAGES - default 16 if ARM64_16K_PAGES + default 14 if PAGE_SIZE_64KB + default 16 if PAGE_SIZE_16KB default 18 # max bits determined by the following formula: @@ -1259,11 +1253,13 @@ choice config ARM64_4K_PAGES bool "4KB" + select HAVE_PAGE_SIZE_4KB help This feature enables 4KB pages support. config ARM64_16K_PAGES bool "16KB" + select HAVE_PAGE_SIZE_16KB help The system will use 16KB pages support. AArch32 emulation requires applications compiled with 16K (or a multiple of 16K) @@ -1271,6 +1267,7 @@ config ARM64_16K_PAGES config ARM64_64K_PAGES bool "64KB" + select HAVE_PAGE_SIZE_64KB help This feature enables 64KB pages support (4KB by default) allowing only two levels of page tables and faster TLB @@ -1291,19 +1288,19 @@ choice config ARM64_VA_BITS_36 bool "36-bit" if EXPERT - depends on ARM64_16K_PAGES + depends on PAGE_SIZE_16KB config ARM64_VA_BITS_39 bool "39-bit" - depends on ARM64_4K_PAGES + depends on PAGE_SIZE_4KB config ARM64_VA_BITS_42 bool "42-bit" - depends on ARM64_64K_PAGES + depends on PAGE_SIZE_64KB config ARM64_VA_BITS_47 bool "47-bit" - depends on ARM64_16K_PAGES + depends on PAGE_SIZE_16KB config ARM64_VA_BITS_48 bool "48-bit" diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbf..792e9fe881dc 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,7 +11,7 @@ #include /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5c845e8d59d9..b180e684fa0d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -273,6 +273,7 @@ choice config PARISC_PAGE_SIZE_4KB
[PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
From: Arnd Bergmann These four architectures define the same Kconfig symbols for configuring the page size. Move the logic into a common place where it can be shared with all other architectures. Signed-off-by: Arnd Bergmann --- arch/Kconfig | 58 +-- arch/hexagon/Kconfig | 25 +++-- arch/hexagon/include/asm/page.h | 6 +--- arch/loongarch/Kconfig| 21 --- arch/loongarch/include/asm/page.h | 10 +- arch/mips/Kconfig | 58 +++ arch/mips/include/asm/page.h | 16 + arch/sh/include/asm/page.h| 13 +-- arch/sh/mm/Kconfig| 42 +++--- 9 files changed, 88 insertions(+), 161 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a5af0edd3eb8..237cea01ed9b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1078,17 +1078,71 @@ config HAVE_ARCH_COMPAT_MMAP_BASES and vice-versa 32-bit applications to call 64-bit mmap(). Required for applications doing different bitness syscalls. +config HAVE_PAGE_SIZE_4KB + bool + +config HAVE_PAGE_SIZE_8KB + bool + +config HAVE_PAGE_SIZE_16KB + bool + +config HAVE_PAGE_SIZE_32KB + bool + +config HAVE_PAGE_SIZE_64KB + bool + +config HAVE_PAGE_SIZE_256KB + bool + +choice + prompt "MMU page size" + +config PAGE_SIZE_4KB + bool "4KB pages" + depends on HAVE_PAGE_SIZE_4KB + +config PAGE_SIZE_8KB + bool "8KB pages" + depends on HAVE_PAGE_SIZE_8KB + +config PAGE_SIZE_16KB + bool "16KB pages" + depends on HAVE_PAGE_SIZE_16KB + +config PAGE_SIZE_32KB + bool "32KB pages" + depends on HAVE_PAGE_SIZE_32KB + +config PAGE_SIZE_64KB + bool "64KB pages" + depends on HAVE_PAGE_SIZE_64KB + +config PAGE_SIZE_256KB + bool "256KB pages" + depends on HAVE_PAGE_SIZE_256KB + +endchoice + config PAGE_SIZE_LESS_THAN_64KB def_bool y - depends on !ARM64_64K_PAGES depends on !PAGE_SIZE_64KB - depends on !PARISC_PAGE_SIZE_64KB depends on PAGE_SIZE_LESS_THAN_256KB config PAGE_SIZE_LESS_THAN_256KB def_bool y depends on !PAGE_SIZE_256KB +config PAGE_SHIFT + int + default 12 if PAGE_SIZE_4KB + default 13 if PAGE_SIZE_8KB + default 14 if PAGE_SIZE_16KB + default 15 if PAGE_SIZE_32KB + default 16 if PAGE_SIZE_64KB + default 18 if PAGE_SIZE_256KB + # This allows to use a set of generic functions to determine mmap base # address by giving priority to top-down scheme only if the process # is not in legacy mode (compat task, unlimited stack size or diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index a880ee067d2e..aac46ee1a000 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -8,6 +8,11 @@ config HEXAGON select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_PREEMPT select DMA_GLOBAL_POOL + select FRAME_POINTER + select HAVE_PAGE_SIZE_4KB + select HAVE_PAGE_SIZE_16KB + select HAVE_PAGE_SIZE_64KB + select HAVE_PAGE_SIZE_256KB # Other pending projects/to-do items. # select HAVE_REGS_AND_STACK_ACCESS_API # select HAVE_HW_BREAKPOINT if PERF_EVENTS @@ -120,26 +125,6 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. -choice - prompt "Kernel page size" - default PAGE_SIZE_4KB - help - Changes the default page size; use with caution. - -config PAGE_SIZE_4KB - bool "4KB" - -config PAGE_SIZE_16KB - bool "16KB" - -config PAGE_SIZE_64KB - bool "64KB" - -config PAGE_SIZE_256KB - bool "256KB" - -endchoice - source "kernel/Kconfig.hz" endmenu diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index 10f1bc07423c..65c9bac639fa 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -13,27 +13,22 @@ /* This is probably not the most graceful way to handle this. */ #ifdef CONFIG_PAGE_SIZE_4KB -#define PAGE_SHIFT 12 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_4KB #endif #ifdef CONFIG_PAGE_SIZE_16KB -#define PAGE_SHIFT 14 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_16KB #endif #ifdef CONFIG_PAGE_SIZE_64KB -#define PAGE_SHIFT 16 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_64KB #endif #ifdef CONFIG_PAGE_SIZE_256KB -#define PAGE_SHIFT 18 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_256KB #endif #ifdef CONFIG_PAGE_SIZE_1MB -#define PAGE_SHIFT 20 #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB #endif @@ -50,6 +45,7 @@ #define HVM_HUGEPAGE_SIZE 0x5 #endif +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~((1 <<
[PATCH 0/4] arch: mm, vdso: consolidate PAGE_SIZE definition
From: Arnd Bergmann Naresh noticed that the newly added usage of the PAGE_SIZE macro in include/vdso/datapage.h introduced a build regression. I had an older patch that I revived to have this defined through Kconfig rather than through including asm/page.h, which is not allowed in vdso code. I rebased and tested on top of the tip/timers/core branch that introduced the regression. If these patches get added, the compat VDSOs all build again, but the changes are a bit invasive. Arnd Link: https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/ Link: https://lore.kernel.org/all/65dc6c14.170a0220.f4a3f.9...@mx.google.com/ Arnd Bergmann (4): arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions arch: simplify architecture specific page size configuration arch: define CONFIG_PAGE_SIZE_*KB on all architectures vdso: avoid including asm/page.h arch/Kconfig | 58 -- arch/alpha/Kconfig | 1 + arch/alpha/include/asm/page.h | 2 +- arch/arc/Kconfig | 3 ++ arch/arc/include/uapi/asm/page.h | 6 ++-- arch/arm/Kconfig | 1 + arch/arm/include/asm/page.h| 2 +- arch/arm64/Kconfig | 29 +++ arch/arm64/include/asm/page-def.h | 2 +- arch/csky/Kconfig | 1 + arch/csky/include/asm/page.h | 2 +- arch/hexagon/Kconfig | 25 +++-- arch/hexagon/include/asm/page.h| 6 +--- arch/loongarch/Kconfig | 21 --- arch/loongarch/include/asm/page.h | 10 +- arch/m68k/Kconfig | 3 ++ arch/m68k/Kconfig.cpu | 2 ++ arch/m68k/include/asm/page.h | 6 +--- arch/microblaze/Kconfig| 1 + arch/microblaze/include/asm/page.h | 2 +- arch/mips/Kconfig | 58 +++--- arch/mips/include/asm/page.h | 16 + arch/nios2/Kconfig | 1 + arch/nios2/include/asm/page.h | 2 +- arch/openrisc/Kconfig | 1 + arch/openrisc/include/asm/page.h | 2 +- arch/parisc/Kconfig| 3 ++ arch/parisc/include/asm/page.h | 10 +- arch/powerpc/Kconfig | 31 arch/powerpc/include/asm/page.h| 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/page.h | 2 +- arch/s390/Kconfig | 1 + arch/s390/include/asm/page.h | 2 +- arch/sh/include/asm/page.h | 13 +-- arch/sh/mm/Kconfig | 42 +++--- arch/sparc/Kconfig | 2 ++ arch/sparc/include/asm/page_32.h | 2 +- arch/sparc/include/asm/page_64.h | 3 +- arch/um/Kconfig| 1 + arch/um/include/asm/page.h | 2 +- arch/x86/Kconfig | 1 + arch/x86/include/asm/page_types.h | 2 +- arch/xtensa/Kconfig| 1 + arch/xtensa/include/asm/page.h | 2 +- include/vdso/datapage.h| 4 +-- scripts/gdb/linux/constants.py.in | 2 +- scripts/gdb/linux/mm.py| 2 +- 48 files changed, 153 insertions(+), 241 deletions(-) -- 2.39.2 To: Thomas Gleixner To: Vincenzo Frascino To: Kees Cook To: Anna-Maria Behnsen Cc: Matt Turner Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Guo Ren Cc: Brian Cain Cc: Huacai Chen Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Thomas Bogendoerfer Cc: Helge Deller Cc: Michael Ellerman Cc: Christophe Leroy Cc: Palmer Dabbelt Cc: John Paul Adrian Glaubitz Cc: Andreas Larsson Cc: Richard Weinberger Cc: x...@kernel.org Cc: Max Filippov Cc: Andy Lutomirski Cc: Vincenzo Frascino Cc: Jan Kiszka Cc: Kieran Bingham Cc: Andrew Morton Cc: Arnd Bergmann Cc: linux-ker...@vger.kernel.org Cc: linux-alpha@vger.kernel.org Cc: linux-snps-...@lists.infradead.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-c...@vger.kernel.org Cc: linux-hexa...@vger.kernel.org Cc: loonga...@lists.linux.dev Cc: linux-m...@lists.linux-m68k.org Cc: linux-m...@vger.kernel.org Cc: linux-openr...@vger.kernel.org Cc: linux-par...@vger.kernel.org Cc: linuxppc-...@lists.ozlabs.org Cc: linux-ri...@lists.infradead.org Cc: linux-s...@vger.kernel.org Cc: linux...@vger.kernel.org Cc: sparcli...@vger.kernel.org Cc: linux...@lists.infradead.org
[PATCH 2/2] rtc/alpha: remove legacy rtc driver
The old drivers/char/rtc.c driver was originally the implementation for x86 PCs but got subsequently replaced by the rtc class driver on all architectures except alpha. Move alpha over to the portable driver and remove the old one for good. The CONFIG_JS_RTC option was only ever used on SPARC32 but has not been available for many years, this was used to build the same rtc driver with a different module name. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: linux-alpha@vger.kernel.org Cc: Paul Gortmaker Signed-off-by: Arnd Bergmann --- This was last discussed in early 2018 in https://lore.kernel.org/lkml/CAK8P3a0QZNY+K+V1HG056xCerz=_l2jh5ufz+2lwkdqkw5z...@mail.gmail.com/ Nobody ever replied there, so let's try this instead. If there is any reason to keep the driver after all, please let us know. --- arch/alpha/configs/defconfig |3 +- drivers/char/Kconfig | 56 -- drivers/char/Makefile|4 - drivers/char/rtc.c | 1311 -- 4 files changed, 2 insertions(+), 1372 deletions(-) delete mode 100644 drivers/char/rtc.c diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index f4ec420d7f2d..e10c1be3c0d1 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -53,7 +53,8 @@ CONFIG_NET_PCI=y CONFIG_YELLOWFIN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_RTC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y CONFIG_REISERFS_FS=m CONFIG_ISO9660_FS=y diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index dabbf3f519c6..c2ac4f257c82 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -243,62 +243,6 @@ config NVRAM To compile this driver as a module, choose M here: the module will be called nvram. -# -# These legacy RTC drivers just cause too many conflicts with the generic -# RTC framework ... let's not even try to coexist any more. -# -if RTC_LIB=n - -config RTC - tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)" - depends on ALPHA - ---help--- - If you say Y here and create a character special file /dev/rtc with - major number 10 and minor number 135 using mknod ("man mknod"), you - will get access to the real time clock (or hardware clock) built - into your computer. - - Every PC has such a clock built in. It can be used to generate - signals from as low as 1Hz up to 8192Hz, and can also be used - as a 24 hour alarm. It reports status information via the file - /proc/driver/rtc and its behaviour is set by various ioctls on - /dev/rtc. - - If you run Linux on a multiprocessor machine and said Y to - "Symmetric Multi Processing" above, you should say Y here to read - and set the RTC in an SMP compatible fashion. - - If you think you have a use for such a device (such as periodic data - sampling), then say Y here, and read - for details. - - To compile this driver as a module, choose M here: the - module will be called rtc. - -config JS_RTC - tristate "Enhanced Real Time Clock Support" - depends on SPARC32 && PCI - ---help--- - If you say Y here and create a character special file /dev/rtc with - major number 10 and minor number 135 using mknod ("man mknod"), you - will get access to the real time clock (or hardware clock) built - into your computer. - - Every PC has such a clock built in. It can be used to generate - signals from as low as 1Hz up to 8192Hz, and can also be used - as a 24 hour alarm. It reports status information via the file - /proc/driver/rtc and its behaviour is set by various ioctls on - /dev/rtc. - - If you think you have a use for such a device (such as periodic data - sampling), then say Y here, and read - for details. - - To compile this driver as a module, choose M here: the - module will be called js-rtc. - -endif # RTC_LIB - config DTLK tristate "Double Talk PC internal speech card support" depends on ISA diff --git a/drivers/char/Makefile b/drivers/char/Makefile index abe3138b1f5a..ffce287ef415 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_APM_EMULATION) += apm-emulation.o obj-$(CONFIG_DTLK) += dtlk.o obj-$(CONFIG_APPLICOM) += applicom.o obj-$(CONFIG_SONYPI) += sonypi.o -obj-$(CONFIG_RTC) += rtc.o obj-$(CONFIG_HPET) += hpet.o obj-$(CONFIG_XILINX_HWICAP)+= xilinx_hwicap/ obj-$(CONFIG_NVRAM)+= nvram.o @@ -45,9 +44,6 @@ obj-$(CONFIG_TCG_TPM) += tpm/ obj-$(CONFIG_PS3_FLASH)+= ps3flash.o -obj-$(CONFIG_JS_RTC) += js-rtc.o -js-rtc-y = rtc.o - obj-$(
Re: [PATCH v9 08/10] open: openat2(2) syscall
On Thu, Jul 18, 2019 at 6:12 PM Aleksa Sarai wrote: > On 2019-07-18, Arnd Bergmann wrote: > > On Sat, Jul 6, 2019 at 5:00 PM Aleksa Sarai wrote: > > > > In fact, that seems similar enough to the existing openat() that I think > > you could also just add the fifth argument to the existing call when > > a newly defined flag is set, similarly to how we only use the 'mode' > > argument when O_CREAT or O_TMPFILE are set. > > I considered doing this (and even had a preliminary version of it), but > I discovered that I was not in favour of this idea -- once I started to > write tests using it -- for a few reasons: > > 1. It doesn't really allow for clean extension for a future 6th > argument (because you are using up O_* flags to signify "use the > next argument", and O_* flags don't give -EINVAL if they're > unknown). Now, yes you can do the on-start runtime check that > everyone does -- but I've never really liked having to do it. > > Having reserved padding for later extensions (that is actually > checked and gives -EINVAL) matches more modern syscall designs. > > 2. I really was hoping that the variadic openat(2) could be done away > using this union setup (Linus said he didn't like it, and suggested > using something like 'struct stat' as an argument for openat(2) -- > though personally I am not sure I would personally like to use an > interface like that). > > 3. In order to avoid wasting a syscall argument for mode/mask you need > to either have something like your suggested mode_mask (which makes > the syscall arguments less consistent) or have some sort of > mode-like argument that is treated specially (which is really awful > on multiple levels -- this one I also tried and even wrote my > original tests using). And in both cases, the shims for > open{,at}(2) are somewhat less clean. These are all good reasons, thanks for providing the background. > All of that being said, I'd be happy to switch to whatever you think > makes the most sense. As long as it's possible to get an O_PATH with > RESOLVE_IN_ROOT set, I'm happy. I don't feel I should be in charge of making the decision. I'd still prefer avoiding the indirect argument structure because 4. it's inconsistent with most other syscalls 5. you get the same problem with seccomp and strace that clone3() has -- these and others only track the register arguments by default. 6. copying the structure adds a small overhead compared to passing registers 7. the calling conventions may be inconvenient for a user space library, so you end up with different prototypes for the low-level syscall and the libc abstraction. I don't see any of the above seven points as a showstopper either way, so I hope someone else has a strong opinion and can make the decision easier for you. In the meantime just keep what you have, so you don't have to change it multiple times. Arnd
Re: [PATCH v9 08/10] open: openat2(2) syscall
On Sat, Jul 6, 2019 at 5:00 PM Aleksa Sarai wrote: > diff --git a/arch/alpha/kernel/syscalls/syscall.tbl > b/arch/alpha/kernel/syscalls/syscall.tbl > index 9e7704e44f6d..1703d048c141 100644 > --- a/arch/alpha/kernel/syscalls/syscall.tbl > +++ b/arch/alpha/kernel/syscalls/syscall.tbl > @@ -461,6 +461,7 @@ > 530common getegid sys_getegid > 531common geteuid sys_geteuid > 532common getppid sys_getppid > +533common openat2 sys_openat2 > # all other architectures have common numbers for new syscall, alpha > # is the exception. > 534common pidfd_send_signal sys_pidfd_send_signal My plan here was to add new syscalls in the same order as everwhere else, just with the number 110 higher. In the long run, I hope we can automate this. > diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl > index aaf479a9e92d..4ad262698396 100644 > --- a/arch/arm/tools/syscall.tbl > +++ b/arch/arm/tools/syscall.tbl > @@ -447,3 +447,4 @@ > 431common fsconfigsys_fsconfig > 432common fsmount sys_fsmount > 433common fspick sys_fspick > +434common openat2 sys_openat2 434 is already used in linux-next, I suggest you use 437 (Palmer just submitted fchmodat4, which could become 436). > +/** > + * Arguments for how openat2(2) should open the target path. If @extra is > zero, > + * then openat2(2) is identical to openat(2). > + * > + * @flags: O_* flags (unknown flags ignored). > + * @mode: O_CREAT file mode (ignored otherwise). > + * @upgrade_mask: restrict how the O_PATH may be re-opened (ignored > otherwise). > + * @resolve: RESOLVE_* flags (-EINVAL on unknown flags). > + * @reserved: reserved for future extensions, must be zeroed. > + */ > +struct open_how { > + __u32 flags; > + union { > + __u16 mode; > + __u16 upgrade_mask; > + }; > + __u16 resolve; > + __u64 reserved[7]; /* must be zeroed */ > +}; We can have system calls with up to six arguments on all architectures, so this could still be done more conventionally without the indirection: like long openat2(int dfd, const char __user * filename, int flags, mode_t mode_mask, __u16 resolve); In fact, that seems similar enough to the existing openat() that I think you could also just add the fifth argument to the existing call when a newly defined flag is set, similarly to how we only use the 'mode' argument when O_CREAT or O_TMPFILE are set. > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h This file seems to lack a declaration for the system call, which means it will cause a build failure on some architectures, e.g. arch/arc/kernel/sys.c: #define __SYSCALL(nr, call) [nr] = (call), void *sys_call_table[NR_syscalls] = { [0 ... NR_syscalls-1] = sys_ni_syscall, #include }; Arnd
Re: [PATCH v1 1/2] open: add close_range()
On Thu, May 23, 2019 at 6:33 PM Christian Brauner wrote: > On Thu, May 23, 2019 at 07:22:17PM +0300, Konstantin Khlebnikov wrote: > > On 22.05.2019 18:52, Christian Brauner wrote:> This adds the close_range() > > syscall. It allows to efficiently close a range > > > 22 files changed, 100 insertions(+), 9 deletions(-) > > > > > > > It would be better to split arch/ wiring into separate patch for better > > readability. > > Ok. You mean only do x86 - seems to be the standard - and then move the > others into a separate patch? Doesn't seem worth to have a patch > per-arch, I'd think. I think I would prefer the first patch to just add the call without wiring it up anywhere, and a second patch do add it on all architectures including x86. Arnd
Re: [PATCH v2 1/2] pid: add pidfd_open()
On Mon, May 20, 2019 at 4:48 PM Christian Brauner wrote: > > On Mon, May 20, 2019 at 04:37:03PM +0200, Arnd Bergmann wrote: > > On Mon, May 20, 2019 at 3:46 PM Christian Brauner > > wrote: > > > > > > In line with Arnd's recent changes to consolidate syscall numbers across > > > architectures, I have added the pidfd_open() syscall to all architectures > > > at the same time. > > > > Thanks! I've checked that the ones you have added are all > > done correctly. However, double-checking that you got all of them, > > I noticed that you missed mips-o32 and mips-n64. With those added: > > > > Acked-by: Arnd Bergmann > > Perfect, will plumb mips-o32 and mips-n64 and resend once more with your > ack added. > Sidenote: You plan on merging the common syscall tables or will there be > a script to do this work per-arch in the future? David Howells also asked about this. I think having a common table will be best in the long run, patches welcome. As you noticed, there are still a few minor differences between the files on mips, arm, x86, alpha and s390, and we are missing the .tbl files for arm-compat and asm-generic, as well as an architecture independent script. Once that is all taken care of, we can move the entries for syscall 403 and higher into a common file, and change the script to pick up the contents from there in addition to the architecture specific file. Arnd
Re: [PATCH v2 1/2] pid: add pidfd_open()
On Mon, May 20, 2019 at 3:46 PM Christian Brauner wrote: > > In line with Arnd's recent changes to consolidate syscall numbers across > architectures, I have added the pidfd_open() syscall to all architectures > at the same time. Thanks! I've checked that the ones you have added are all done correctly. However, double-checking that you got all of them, I noticed that you missed mips-o32 and mips-n64. With those added: Acked-by: Arnd Bergmann
Re: [GIT PULL 1/4] ARM: SoC platform updates
On Thu, May 16, 2019 at 5:34 PM Linus Torvalds wrote: > > On Wed, May 15, 2019 at 11:43 PM Olof Johansson wrote: > > > > SoC updates, mostly refactorings and cleanups of old legacy platforms. > > Major themes this release: > > Hmm. This brings in a new warning: > > drivers/clocksource/timer-ixp4xx.c:78:20: warning: > ‘ixp4xx_read_sched_clock’ defined but not used [-Wunused-function] > > because that drivers is enabled for build testing, but that function > is only used under > > #ifdef CONFIG_ARM > sched_clock_register(ixp4xx_read_sched_clock, 32, timer_freq); > #endif > > It's not clear why that #ifdef is there. This driver only builds > non-ARM when COMPILE_TEST is enabled, and that #ifdef actually breaks > that build test. > > I'm going to remove that #ifdef in my merge, because I do *not* want > to see new warnings, and it doesn't seem to make any sense. > > Maybe that's the wrong resolution, please holler and let me know if > you want something else. As far as I can tell, that is the best fix, thanks for the cleanup! Arnd
[PATCH net-next 3/3] net: socket: implement 64-bit timestamps
The 'timeval' and 'timespec' data structures used for socket timestamps are going to be redefined in user space based on 64-bit time_t in future versions of the C library to deal with the y2038 overflow problem, which breaks the ABI definition. Unlike many modern ioctl commands, SIOCGSTAMP and SIOCGSTAMPNS do not use the _IOR() macro to encode the size of the transferred data, so it remains ambiguous whether the application uses the old or new layout. The best workaround I could find is rather ugly: we redefine the command code based on the size of the respective data structure with a ternary operator. This lets it get evaluated as late as possible, hopefully after that structure is visible to the caller. We cannot use an #ifdef here, because inux/sockios.h might have been included before any libc header that could determine the size of time_t. The ioctl implementation now interprets the new command codes as always referring to the 64-bit structure on all architectures, while the old architecture specific command code still refers to the old architecture specific layout. The new command number is only used when they are actually different. Signed-off-by: Arnd Bergmann --- arch/alpha/include/uapi/asm/sockios.h | 4 ++-- arch/mips/include/uapi/asm/sockios.h | 4 ++-- arch/sh/include/uapi/asm/sockios.h | 5 +++-- arch/xtensa/include/uapi/asm/sockios.h | 4 ++-- include/uapi/asm-generic/sockios.h | 4 ++-- include/uapi/linux/sockios.h | 21 + net/socket.c | 24 ++-- 7 files changed, 50 insertions(+), 16 deletions(-) diff --git a/arch/alpha/include/uapi/asm/sockios.h b/arch/alpha/include/uapi/asm/sockios.h index ba287e4b01bf..af92bc27c3be 100644 --- a/arch/alpha/include/uapi/asm/sockios.h +++ b/arch/alpha/include/uapi/asm/sockios.h @@ -11,7 +11,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */ #endif /* _ASM_ALPHA_SOCKIOS_H */ diff --git a/arch/mips/include/uapi/asm/sockios.h b/arch/mips/include/uapi/asm/sockios.h index 5b40a88593fa..66f60234f290 100644 --- a/arch/mips/include/uapi/asm/sockios.h +++ b/arch/mips/include/uapi/asm/sockios.h @@ -21,7 +21,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */ #endif /* _ASM_SOCKIOS_H */ diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h index 17313d2c3527..ef18a668456d 100644 --- a/arch/sh/include/uapi/asm/sockios.h +++ b/arch/sh/include/uapi/asm/sockios.h @@ -10,6 +10,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */ -#define SIOCGSTAMPNS _IOR('s', 101, struct timespec) /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */ + #endif /* __ASM_SH_SOCKIOS_H */ diff --git a/arch/xtensa/include/uapi/asm/sockios.h b/arch/xtensa/include/uapi/asm/sockios.h index fb8ac3607189..1a1f58f4b75a 100644 --- a/arch/xtensa/include/uapi/asm/sockios.h +++ b/arch/xtensa/include/uapi/asm/sockios.h @@ -26,7 +26,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */ #endif /* _XTENSA_SOCKIOS_H */ diff --git a/include/uapi/asm-generic/sockios.h b/include/uapi/asm-generic/sockios.h index 64f658c7cec2..44fa3ed70483 100644 --- a/include/uapi/asm-generic/sockios.h +++ b/include/uapi/asm-generic/sockios.h @@ -8,7 +8,7 @@ #define FIOGETOWN 0x8903 #define SIOCGPGRP 0x8904 #define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907/* Get stamp (timespec) */ #endif /* __ASM_GENERIC_SOCKIOS_H */ diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index d393e9ed3964..7d1bccbbef78 100644 --- a/include/uapi/linux
[PATCH] [v2] arch: add pidfd and io_uring syscalls everywhere
Add the io_uring and pidfd_send_signal system calls to all architectures. These system calls are designed to handle both native and compat tasks, so all entries are the same across architectures, only arm-compat and the generic tale still use an old format. Acked-by: Michael Ellerman (powerpc) Acked-by: Heiko Carstens (s390) Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- Changes since v1: - fix s390 table - use 'n64' tag in mips-n64 instead of common. --- arch/alpha/kernel/syscalls/syscall.tbl | 4 arch/arm/tools/syscall.tbl | 4 arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 arch/ia64/kernel/syscalls/syscall.tbl | 4 arch/m68k/kernel/syscalls/syscall.tbl | 4 arch/microblaze/kernel/syscalls/syscall.tbl | 4 arch/mips/kernel/syscalls/syscall_n32.tbl | 4 arch/mips/kernel/syscalls/syscall_n64.tbl | 4 arch/mips/kernel/syscalls/syscall_o32.tbl | 4 arch/parisc/kernel/syscalls/syscall.tbl | 4 arch/powerpc/kernel/syscalls/syscall.tbl| 4 arch/s390/kernel/syscalls/syscall.tbl | 4 arch/sh/kernel/syscalls/syscall.tbl | 4 arch/sparc/kernel/syscalls/syscall.tbl | 4 arch/xtensa/kernel/syscalls/syscall.tbl | 4 16 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 63ed39cbd3bd..165f268beafc 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -463,3 +463,7 @@ 532common getppid sys_getppid # all other architectures have common numbers for new syscall, alpha # is the exception. +534common pidfd_send_signal sys_pidfd_send_signal +535common io_uring_setup sys_io_uring_setup +536common io_uring_enter sys_io_uring_enter +537common io_uring_register sys_io_uring_register diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 9016f4081bb9..0393917eaa57 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -437,3 +437,7 @@ 421common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422common futex_time64sys_futex 423common sched_rr_get_interval_time64sys_sched_rr_get_interval +424common pidfd_send_signal sys_pidfd_send_signal +425common io_uring_setup sys_io_uring_setup +426common io_uring_enter sys_io_uring_enter +427common io_uring_register sys_io_uring_register diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 310d8f1cae7a..c6946fe640e6 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -49,7 +49,7 @@ #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 424 +#define __NR_compat_syscalls 428 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 5590f2623690..23f1a44acada 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) __SYSCALL(__NR_futex_time64, sys_futex) #define __NR_sched_rr_get_interval_time64 423 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ab9cda5f6136..56e3d0b685e1 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -344,3 +344,7 @@ 332common pkey_free sys_pkey_free 333common rseqsys_rseq # 334 through 423 are reserved to sync up with other architectures +424common pidfd_send_signal sys_pidfd_send_signal +425common io_uring_setup sys_io_uring_setup +426common io_uring_enter sys_io_uring_enter +427common io_uring_register sys_io_uring_register diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 125c14178979..df4ec3ec71d1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl
Re: [PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere
On Sun, Mar 31, 2019 at 5:47 PM Michael Ellerman wrote: > > Arnd Bergmann writes: > > Add the io_uring and pidfd_send_signal system calls to all architectures. > > > > These system calls are designed to handle both native and compat tasks, > > so all entries are the same across architectures, only arm-compat and > > the generic tale still use an old format. > > > > Signed-off-by: Arnd Bergmann > > --- > > arch/alpha/kernel/syscalls/syscall.tbl | 4 > > arch/arm/tools/syscall.tbl | 4 > > arch/arm64/include/asm/unistd.h | 2 +- > > arch/arm64/include/asm/unistd32.h | 8 > > arch/ia64/kernel/syscalls/syscall.tbl | 4 > > arch/m68k/kernel/syscalls/syscall.tbl | 4 > > arch/microblaze/kernel/syscalls/syscall.tbl | 4 > > arch/mips/kernel/syscalls/syscall_n32.tbl | 4 > > arch/mips/kernel/syscalls/syscall_n64.tbl | 4 > > arch/mips/kernel/syscalls/syscall_o32.tbl | 4 > > arch/parisc/kernel/syscalls/syscall.tbl | 4 > > arch/powerpc/kernel/syscalls/syscall.tbl| 4 > > Have you done any testing? > > I'd rather not wire up syscalls that have never been tested at all on > powerpc. No, I have not. I did review the system calls carefully and added the first patch to fix the bug on x86 compat mode before adding the same bug on the other compat architectures though ;-) Generally, my feeling is that adding system calls is not fundamentally different from adding other ABIs, and we should really do it at the same time across all architectures, rather than waiting for each maintainer to get around to reviewing and testing the new calls first. This is not a problem on powerpc, but a lot of other architectures are less active, which is how we have always ended up with different sets of system calls across architectures. The problem here is that this makes it harder for the C library to know when a system call is guaranteed to be available. glibc still needs a feature test for newly added syscalls to see if they are working (they might be backported to an older kernel, or disabled), but whenever the minimum kernel version is increased, it makes sense to drop those checks and assume non-optional system calls will work if they were part of that minimum version. In the future, I'd hope that any new system calls get added right away on all architectures when they land (it was a bit tricky this time, because I still did a bunch of reworks that conflicted with the new calls). Bugs will happen of course, but I think adding them sooner makes it more likely to catch those bugs early on so we have a chance to fix them properly, and need fewer arch specific workarounds (ideally none) for system calls. Arnd
Re: [PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere
On Mon, Mar 25, 2019 at 6:37 PM Paul Burton wrote: > On Mon, Mar 25, 2019 at 03:47:37PM +0100, Arnd Bergmann wrote: > > Add the io_uring and pidfd_send_signal system calls to all architectures. > > > > These system calls are designed to handle both native and compat tasks, > > so all entries are the same across architectures, only arm-compat and > > the generic tale still use an old format. > > > > Signed-off-by: Arnd Bergmann > > --- > >% > > diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl > > b/arch/mips/kernel/syscalls/syscall_n64.tbl > > index c85502e67b44..c4a49f7d57bb 100644 > > --- a/arch/mips/kernel/syscalls/syscall_n64.tbl > > +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl > > @@ -338,3 +338,7 @@ > > 327 n64 rseqsys_rseq > > 328 n64 io_pgetevents sys_io_pgetevents > > # 329 through 423 are reserved to sync up with other architectures > > +424 common pidfd_send_signal sys_pidfd_send_signal > > +425 common io_uring_setup sys_io_uring_setup > > +426 common io_uring_enter sys_io_uring_enter > > +427 common io_uring_register sys_io_uring_register > > Shouldn't these declare the ABI as "n64"? > > I don't see anywhere that it would actually change the generated code, > but a comment at the top of the file says that every entry should use > "n64" and so far they all do. Did you have something else in mind here? You are right, the use of 'common' here is unintentional but harmless, and I should have used 'n64' here. We may decide to do things differently in the future, i.e. we could have just a single global file for newly added system calls once it turns out that the tables are consistent across all architectures, but I'd probably go on with the separate identical entries for a bit before changing that. Arnd
[PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere
Add the io_uring and pidfd_send_signal system calls to all architectures. These system calls are designed to handle both native and compat tasks, so all entries are the same across architectures, only arm-compat and the generic tale still use an old format. Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 4 arch/arm/tools/syscall.tbl | 4 arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 arch/ia64/kernel/syscalls/syscall.tbl | 4 arch/m68k/kernel/syscalls/syscall.tbl | 4 arch/microblaze/kernel/syscalls/syscall.tbl | 4 arch/mips/kernel/syscalls/syscall_n32.tbl | 4 arch/mips/kernel/syscalls/syscall_n64.tbl | 4 arch/mips/kernel/syscalls/syscall_o32.tbl | 4 arch/parisc/kernel/syscalls/syscall.tbl | 4 arch/powerpc/kernel/syscalls/syscall.tbl| 4 arch/s390/kernel/syscalls/syscall.tbl | 4 arch/sh/kernel/syscalls/syscall.tbl | 4 arch/sparc/kernel/syscalls/syscall.tbl | 4 arch/xtensa/kernel/syscalls/syscall.tbl | 4 16 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 63ed39cbd3bd..165f268beafc 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -463,3 +463,7 @@ 532common getppid sys_getppid # all other architectures have common numbers for new syscall, alpha # is the exception. +534common pidfd_send_signal sys_pidfd_send_signal +535common io_uring_setup sys_io_uring_setup +536common io_uring_enter sys_io_uring_enter +537common io_uring_register sys_io_uring_register diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 9016f4081bb9..0393917eaa57 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -437,3 +437,7 @@ 421common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422common futex_time64sys_futex 423common sched_rr_get_interval_time64sys_sched_rr_get_interval +424common pidfd_send_signal sys_pidfd_send_signal +425common io_uring_setup sys_io_uring_setup +426common io_uring_enter sys_io_uring_enter +427common io_uring_register sys_io_uring_register diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 310d8f1cae7a..c6946fe640e6 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -49,7 +49,7 @@ #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 424 +#define __NR_compat_syscalls 428 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 5590f2623690..23f1a44acada 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) __SYSCALL(__NR_futex_time64, sys_futex) #define __NR_sched_rr_get_interval_time64 423 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ab9cda5f6136..56e3d0b685e1 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -344,3 +344,7 @@ 332common pkey_free sys_pkey_free 333common rseqsys_rseq # 334 through 423 are reserved to sync up with other architectures +424common pidfd_send_signal sys_pidfd_send_signal +425common io_uring_setup sys_io_uring_setup +426common io_uring_enter sys_io_uring_enter +427common io_uring_register sys_io_uring_register diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 125c14178979..df4ec3ec71d1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -423,3 +423,7 @@ 421common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422common futex_time64sys_futex
Re: [PATCH] y2038: fix socket.h header inclusion
On Mon, Mar 18, 2019 at 2:12 PM Florian Weimer wrote: > > On Mon, Mar 18, 2019 at 10:25 AM Florian Weimer wrote: > >> > >> * Arnd Bergmann: > >> > >> > Should we just remove __kernel_fd_set from the exported headers and > >> > define the internal fd_set directly in include/linux/types.h? (Adding the > >> > folks from the old thread to Cc). > >> > >> The type is used in the sanitizers, but incorrectly. They assume that > >> FD_SETSIZE is always 1024. (The existence of __kernel_fd_set is > >> itself somewhat questionable because it leads to such bugs.) > >> Moving around the type could cause a build failure in the sanitizers, but > >> I'm > >> not entirely clear how the UAPI headers are included there. > > > > It looks like sanitizer_platform_limits_posix.cc includes > > linux/posix_types.h to ensure that __kernel_fd_set is the same > > size as __sanitizer___kernel_fd_set, and then it uses the > > latter afterwards. > > > > What I don't see here is what kind of operation is actually done > > on the data, I only see a cast to void. > > I think it is used to assert that the select family of system calls > writes to the 1024 bits for each of the passed pointers. Yes, that is what I expected to see in libsanitizer, I just couldn't find any code that actually does this check. > Which is not actually true—the write size is controlled by the > file descriptor count argument. Yes, of course. In fact, I see multiple possible problems that - kernel reading uninitialized data if 'FD_ZERO()' was used with a shorter size than the count argument. - kernel writing beyond the fd_set data on stack when the declaration had a shorter size than the count argument. Each one could happen either because __FD_SETSIZE is smaller than 'count', or because kernel and user space disagree on the element size (32 vs 64 bit on x32). > > If libsanitizer actually does > > anything interesting here, we should definitely fix it to use the > > correct size, especially since this is actually something that > > can trigger a buffer overflow in subtle ways when used carelessly. > > See for example [1], which we still have not addressed > > The footnote is missing. Sorry, I meant [1] https://patchwork.kernel.org/patch/10245053/ > > For this specific use (and probably others like it), renaming the > > fds_bits member to __kernel_fds_bits or something like that > > would keep user space still compiling. That would only break > > if someone was using __kernel_fd_set, and actually doing > > bit operations on it. glibc uses '__fds_bits' unless __USE_XOPEN > > is set, so maybe we should use use that name unconditionally. > > Please use something that is more obviously Linux-specific. Ok, so not '__fds_bits'. Is '__kernel_fds_bits' ok? I would prefer to keep at least the name __kernel_ namespace that we have for typedefs and the occasional struct tag. Arnd
Re: [PATCH] y2038: fix socket.h header inclusion
On Sun, Mar 17, 2019 at 7:20 PM Deepa Dinamani wrote: > On Fri, Mar 15, 2019 at 2:20 PM Florian Weimer wrote: > > > On Thu, Mar 14, 2019 at 7:41 PM Florian Weimer wrote: > > >> > diff --git a/arch/alpha/include/uapi/asm/socket.h > > >> > b/arch/alpha/include/uapi/asm/socket.h > > >> > index 0d0fddb7e738..976e89b116e5 100644 > > >> > --- a/arch/alpha/include/uapi/asm/socket.h > > >> > +++ b/arch/alpha/include/uapi/asm/socket.h > > >> > @@ -2,8 +2,8 @@ > > >> > #ifndef _UAPI_ASM_SOCKET_H > > >> > #define _UAPI_ASM_SOCKET_H > > >> > > > >> > +#include > > >> > #include > > >> > -#include > > >> > > >> This breaks POSIX conformance in glibc because the > > >> header is not namespace clean. It contains the > > >> identifiers fds_bits and val: > > >> > > >> unsigned long fds_bits[__FD_SETSIZE / (8 * sizeof(long))]; > > >> > > >> int val[2]; > > > > > > What is problematic about the struct members here? I had thought that > > > only the struct names have to be in a namespace to be usable here, > > > but not the members. > > > > According POSIX, a user can do this: > > > > #define fds_bits 1024 > > > > before including the header file. Similarly for val. > > > > Since glibc pulls in indirectly, the result is a parse > > error, even though the programmer did nothing wrong (fds_bits is not > > an identifier used by POSIX, nor is it in the implementation > > namespace, ans is a POSIX header). Ok, I see. Thanks for the explanation! > > > We could use asm/posix_types.h instead of linux/posix_types.h, > > > would that address your concern? > > > > It should fix the fds_bits case, I think. But > > still uses val, so that part of the issue > > remains. > > Would moving kernel namespace types(__kernel prefix) to a different > header file(kernel_types.h?) and then including this from > linux/posix_types.h. > And, for socket.h just including kernel_types.h make sense? I fear we have considered linux/posix_types.h to be something that can be included anywhere for a long time, so it may be better to ensure that this is actually the case, and avoid the problem with those two structures but leave the rest untouched. I think we can move __kernel_fsid_t into include/uapi/asm-generic/statfs.h, which is the only thing that needs it anyway. We have two definitions of it today, the non-generic one being for mips32, but incidentally there was a patch the other day to remove that and use the generic one instead. With that done, we can change asm/socket.h to just use asm/posix_types.h. I would still prefer to solve the problem for linux/posix_types.h as well, but I'm not sure even how __kernel_fd_set is used today in user space, if at all. Commit 8ded2bbc1845 ("posix_types.h: Cleanup stale __NFDBITS and related definitions") removed most of the fd_set definition after a long discussion [1], and since then it has been basically impossible to use 'struct fd_set' from the kernel in a meaningful way without including the libc headers or duplicating them. Should we just remove __kernel_fd_set from the exported headers and define the internal fd_set directly in include/linux/types.h? (Adding the folks from the old thread to Cc). Arnd [1] https://lore.kernel.org/lkml/20120724181209.ga10...@zod.bos.redhat.com/t/
Re: [PATCH] y2038: fix socket.h header inclusion
On Thu, Mar 14, 2019 at 7:41 PM Florian Weimer wrote: > > * Arnd Bergmann: > > > diff --git a/arch/alpha/include/uapi/asm/socket.h > > b/arch/alpha/include/uapi/asm/socket.h > > index 0d0fddb7e738..976e89b116e5 100644 > > --- a/arch/alpha/include/uapi/asm/socket.h > > +++ b/arch/alpha/include/uapi/asm/socket.h > > @@ -2,8 +2,8 @@ > > #ifndef _UAPI_ASM_SOCKET_H > > #define _UAPI_ASM_SOCKET_H > > > > +#include > > #include > > -#include > > This breaks POSIX conformance in glibc because the > header is not namespace clean. It contains the > identifiers fds_bits and val: > > unsigned long fds_bits[__FD_SETSIZE / (8 * sizeof(long))]; > > int val[2]; What is problematic about the struct members here? I had thought that only the struct names have to be in a namespace to be usable here, but not the members. The only part that might be problematic is #undef __FD_SETSIZE #define __FD_SETSIZE1024 but we already get that from a number of other inclusions of linux/posix_types.h. Is this what you mean? > We could duplicate some of the SO_* constants for POSIX mode in glibc, > but it would be nice to avoid that. > > Is there a different way of fixing this on the kernel side that avoids > including ? We could use asm/posix_types.h instead of linux/posix_types.h, would that address your concern? Arnd
Re: [PATCH] x86: Deprecate a.out support
On Mon, Mar 11, 2019 at 8:47 PM Måns Rullgård wrote: > Linus Torvalds writes: > > On Mon, Mar 11, 2019 at 11:08 AM Måns Rullgård wrote: > >> > >> The latest version I have is 5.1, and that uses ECOFF. > > > > ECOFF _is_ a.out as far as Linux is concerned. > > > > So Linux basically treats ECOFF as "regular a.out with just some > > header extensions". > > > > We don't have any specific support for ECOFF. > > > > I _think_. Again, it's been years and years. > > Right, which is why killing a.out entirely would have the unfortunate > effect of also removing the OSF/1 compatibility on Alpha. > > If we are to support Alpha as an architecture at all, it makes sense to > support the things people actually use it for. > > Now, personally I can live without it. I just don't like to see > features removed without due consideration. The main historic use case I've heard of was running Netscape Navigator on Alpha Linux, before there was an open source version. Doing this today to connect to the open internet is probably a bit pointless, but there may be other use cases. Looking at the system call table in the kernel (arch/alpha/kernel/syscalls/syscall.tbl), we seem to support a specific subset that was required for a set of applications, and not much more. Old system calls (osf_old_open, osf_execve, osf_old_sigaction) are listed but not implemented, and the same is true for most of the later calls (osf_fuser, osf_sigsendset, osf_waitid, osf_signal, ...), just the ones in the middle are there. This would also indicate that it never really worked as a general-purpose emulation layer but was only there for a specific set of applications. Another data point I have is that osf1 emulation was broken between linux-4.13 and linux-4.16 without anyone noticing, see 47669fb6b595 ("alpha: osf_sys.c: fix put_tv32 regression"). Arnd
[PATCH] y2038: fix socket.h header inclusion
Referencing the __kernel_long_t type caused some user space applications to stop compiling when they had not already included linux/posix_types.h, e.g. s/multicast.c -o ext/sockets/multicast.lo In file included from /builddir/build/BUILD/php-7.3.3/main/php.h:468, from /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:27: /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c: In function 'zm_startup_sockets': /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:776:40: error: '__kernel_long_t' undeclared (first use in this function) 776 | REGISTER_LONG_CONSTANT("SO_SNDTIMEO", SO_SNDTIMEO, CONST_CS | CONST_PERSISTENT); It is safe to include that header here, since it only contains kernel internal types that do not conflict with other user space types. It's still possible that some related build failures remain, but those are likely to be for code that is not already y2038 safe. Reported-by: Laura Abbott Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW") Signed-off-by: Arnd Bergmann --- arch/alpha/include/uapi/asm/socket.h | 2 +- arch/mips/include/uapi/asm/socket.h | 2 +- arch/parisc/include/uapi/asm/socket.h | 2 +- arch/sparc/include/uapi/asm/socket.h | 2 +- include/uapi/asm-generic/socket.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 0d0fddb7e738..976e89b116e5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -2,8 +2,8 @@ #ifndef _UAPI_ASM_SOCKET_H #define _UAPI_ASM_SOCKET_H +#include #include -#include /* For setsockopt(2) */ /* diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index eb9f33f8a8b3..d41765cfbc6e 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -10,8 +10,8 @@ #ifndef _UAPI_ASM_SOCKET_H #define _UAPI_ASM_SOCKET_H +#include #include -#include /* * For setsockopt(2) diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 16e428f03526..66c5dd245ac7 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -2,8 +2,8 @@ #ifndef _UAPI_ASM_SOCKET_H #define _UAPI_ASM_SOCKET_H +#include #include -#include /* For setsockopt(2) */ #define SOL_SOCKET 0x diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 88fe4f978aca..9265a9eece15 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -2,8 +2,8 @@ #ifndef _ASM_SOCKET_H #define _ASM_SOCKET_H +#include #include -#include /* For setsockopt(2) */ #define SOL_SOCKET 0x diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c8b430cb6dc4..8c1391c89171 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -2,8 +2,8 @@ #ifndef __ASM_GENERIC_SOCKET_H #define __ASM_GENERIC_SOCKET_H +#include #include -#include /* For setsockopt(2) */ #define SOL_SOCKET 1 -- 2.20.0
Re: [PATCH] x86: Deprecate a.out support
On Sun, Mar 10, 2019 at 11:46 PM Linus Torvalds wrote: > > On Sun, Mar 10, 2019 at 2:37 PM Matt Turner wrote: > > > > I'm not aware of a reason to keep a.out support on alpha. > > Hmm. I was looking at removing a.out support entirely, but it's > actually fairly incestuous on alpha. > > For example, arch/alpha/boot/tools/objstrip.c very much has some a.out > support in it. Maybe it can just be removed entirely. > > There's also an a.out.h include in arch/alpha/kernel/binfmt_loader.c. > > Finally, note that CONFIG_OSF4_COMPAT also no longer makes sense > without a.out support. > > So this attached patch does not compile on alpha, but it's been many > many years since I had an alpha to test with, so I'm stuck. > > Matt, can you fill in the details and complete this patch? I wonder if we could remove the osf time32 compat code as well, this was one of the areas that kept causing problems with the y2038 rework. (I think it's all good now, but it's never been tested as far as I can tell). For some syscalls (e.g. brk, mmap, getxuid, ...) we definitely need to keep the osf1 version, since it is the only supported ABI. I just looked up some really old source trees and found that glibc-2.1 was the first release to use 64-bit time_t the way we do it today, as implemented in [1], so all Debian and SuSE releases for alpha had it, but any ELF binaries built on Red Hat Linux 4.x and 5.x (released 1996 through 1998) or earlier would use 32-bit time_t osf1 syscalls. Red Hat 2.x and 3.x were a.out based on alpha. Arnd [1] https://repo.or.cz/glibc/history.git/commitdiff/64819b5c3a94e81e4
Re: [PATCH] add delay between port write and port read
On Fri, Mar 1, 2019 at 8:19 PM Linus Torvalds wrote: > > On Fri, Mar 1, 2019 at 11:13 AM Maciej W. Rozycki > wrote: > > > > What do we do WRT straight-through vs byte-swapping properties of these > > accessors? > > I think the whole point of __raw_xyz() is that it's the lowest level > model. It gives you relaxed ordering (together with the ioremap > model), and it gives you straight-through behavior. > > And yes, any driver using them needs to be aware of the byte ordering, > which may or may not be the same as regular memory, and may or may not > be the same as other devices. > > So __raw_xyz() is very much for low-level drivers that know what they > are doing. Caveat user. > > "If it breaks, you get to keep both pieces" I agree in principle, but I think we already have a lot of precedence for __raw_xyz() being relied on having a specific behavior in architecture independent drivers, and I think it makes sense for architectures to provide that. Specifically, I think we need __raw_xyz() to do the same as xyz() on all little-endian kernels regarding byte ordering (not barriers), and I would expect it to provide the same ordering and addressing as swabX(xyz()) on big-endian kernels. Without that, using __raw_xyz() to copy between RAM and buffers in PCI memory space is broken, as you said, but the assumption would be broken on certain older machines that do a hardware endian swap by swizzling the address lines rather than swapping bytes on the data bus. The best idea I have for working around this is to never rely on __raw_xyz() to not do byte swapping in platform specific drivers with CPU-endian MMIO space, but to have a platform specific set of wrappers around the normal I/O functions, and make __raw_xyz() just do whatever we expect them to do on PCI devices. Arnd
Re: [PATCH] add delay between port write and port read
On Tue, Feb 19, 2019 at 2:44 PM Mikulas Patocka wrote: > On Tue, 19 Feb 2019, Mikulas Patocka wrote: > > > The patches cd0e00c106722eca40b38ebf11cf134c01901086 and > > 92d7223a74235054f2aa7227d207d9c57f84dca0 fix a theoretical issue where the > > code didn't follow the specification. Unfortunatelly, they also reduce > > timing when port write is followed by a port read. > > > > These reduced timing cause hang on boot on the Avanti platform when > > probing serial ports. This patch adds memory barrier after the outb, outw, > > outl functions, so that there is delay between port write and subsequent > > port read - just like before. > > > > Fixes: cd0e00c10672 ("alpha: io: reorder barriers to guarantee writeX() and > > iowriteX() ordering") > > Cc: sta...@vger.kernel.org# v4.17+ > > you can also add: > > Tested-by: Mikulas Patocka Acked-by: Arnd Bergmann but I notice you are missing Signed-off-by. We clearly need this patch, but I assumed the alpha maintainers would pick it up, not me. I merged the original changes since they were cross-architecture, but I don't normally take patches for a particular architecture through the asm-generic tree (or the soc tree for that matter). Arnd
Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
On Fri, Jan 18, 2019 at 7:50 PM Andy Lutomirski wrote: > On Fri, Jan 18, 2019 at 8:25 AM Arnd Bergmann wrote: > > I have a patch that I'll send soon to make x32 use its own table. As > far as I'm concerned, 547 is *it*. 548 is just a normal number and is > not special. But let's please not reuse 512..547 for other purposes > on x86 variants -- that way lies even more confusion, IMO. (trimming Cc list, as this is getting a little off-topic most most) Just so I understand: do you mean duplicating the .tbl file, or just the resulting table of entry points? In either way, how will that work with the new io_uring_setup() system call that will have to use the compat entry point? Are you planning to use the same syscall number as x86_64 but point it to the compat function, or do we still need a new syscall number for x32 in the regular range? Arnd
Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
On Mon, Jan 21, 2019 at 6:08 PM Arnd Bergmann wrote: > On Mon, Jan 21, 2019 at 9:19 AM Geert Uytterhoeven > wrote: > > Regardless, I'm wondering what to do with the holes marked "room for > > arch specific calls". > > When is a syscall really arch-specific, and can it be added there, and > > when does it turn out (later) that it isn't, breaking the > > synchronization again? > > We've had a bit of that already, with cacheflush(), which exists on > a couple of architectures, including some that use the first > 'arch specific' slot (244) of the asm-generic table. I think this > will be rare enough that we can figure out a solution when we > get there. > > > The pkey syscalls may be a bad example, as AFAIU they can be implemented > > on some architectures, but not on some others. Still, I had skipped them > > when adding new syscalls to m68k. > > > > Perhaps we should get rid of the notion of "arch-specific syscalls", and > > reserve a slot everywhere anyway? > > I don't mind calling the hole something else if that helps. Out of > principle I would already assume that anything we add for x86 > or the generic table should be added everywhere, but we can > make it broader than that. Applying this fixup below, ARnd diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index d9c2d2eea044..955ab6a3b61f 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,7 +398,7 @@ 384i386arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385i386io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386i386rseqsys_rseq __ia32_sys_rseq -# room for arch specific syscalls +# don't use numbers 387 through 392, add new calls at the end 393i386semget sys_semget __ia32_sys_semget 394i386semctl sys_semctl __ia32_compat_sys_semctl 395i386shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 43a622aec07e..2ae92fddb6d5 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,8 @@ 332common statx __x64_sys_statx 333common io_pgetevents __x64_sys_io_pgetevents 334common rseq__x64_sys_rseq +# don't use numbers 387 through 423, add new calls after the last +# 'common' entry # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 53831e4a4c86..acf9a07ab2ff 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -740,7 +740,7 @@ __SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, __SYSCALL(__NR_rseq, sys_rseq) #define __NR_kexec_file_load 294 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) -/* 295 through 402 are unassigned to sync up with generic numbers */ +/* 295 through 402 are unassigned to sync up with generic numbers, don't use */ #if __BITS_PER_LONG == 32 #define __NR_clock_gettime64 403 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
Re: [PATCH v2 14/29] arch: add pkey and rseq syscall numbers everywhere
On Mon, Jan 21, 2019 at 9:56 AM Geert Uytterhoeven wrote: > > Note that all architectures that already define pkey syscalls, list > pkey_mprotect first. It's easy enough to change, so I've reordered them for consistency now. > Regardless, for m68k: > Acked-by: Geert Uytterhoeven Thanks, Arnd
Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
On Mon, Jan 21, 2019 at 9:19 AM Geert Uytterhoeven wrote: > On Sat, Jan 19, 2019 at 3:29 PM Russell King - ARM Linux admin > wrote: > > On Fri, Jan 18, 2019 at 11:53:25AM -0800, Andy Lutomirski wrote: > > > On Fri, Jan 18, 2019 at 11:33 AM Arnd Bergmann wrote: > > > > On Fri, Jan 18, 2019 at 7:50 PM Andy Lutomirski wrote: > > > > > > Can we perhaps just start the consistent numbers above 547 or maybe > > > block out 512..547 in the new regime? > > > > I don't think you gain much with that kind of scheme - it won't take > > very long before an architecture misses having a syscall added, and > > then someone else adds their own. Been there with ARM - I was keeping > > the syscall table in the same order as x86 for new syscalls, but now > > Same for m68k, and probably other architectures. > > > that others have been adding syscalls to the table since I converted > > ARM to the tabular form, that's now gone out the window. > > > > So, I think it's completely pointless to do what you're suggesting. > > We'll just end up with a big hole in the middle of the syscall table > > and then revert back to random numbering of syscalls thereafter again. > > I believe the plan is to add future syscalls for all architectures in a > single commit, to keep everything in sync. Yes, that is the idea. This was not realistic before, since each one of the old architectures had its own way of describing the system call tables, and many needed a different set of quirks. Since (almost) everything is now converted to the syscall.tbl format, we have removed all obsolete architectures, and a lot of the quirks (x32, spu, s390-31) won't matter as much in the future, I think it is now possible to do it. We could even extend scripts/checksyscalls.sh to warn if a new syscall above 423 is not added to all 16 tables at the same time. > Regardless, I'm wondering what to do with the holes marked "room for > arch specific calls". > When is a syscall really arch-specific, and can it be added there, and > when does it turn out (later) that it isn't, breaking the > synchronization again? We've had a bit of that already, with cacheflush(), which exists on a couple of architectures, including some that use the first 'arch specific' slot (244) of the asm-generic table. I think this will be rare enough that we can figure out a solution when we get there. > The pkey syscalls may be a bad example, as AFAIU they can be implemented > on some architectures, but not on some others. Still, I had skipped them > when adding new syscalls to m68k. > > Perhaps we should get rid of the notion of "arch-specific syscalls", and > reserve a slot everywhere anyway? I don't mind calling the hole something else if that helps. Out of principle I would already assume that anything we add for x86 or the generic table should be added everywhere, but we can make it broader than that. Arnd
Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
On Fri, Jan 18, 2019 at 5:25 PM Arnd Bergmann wrote: > > This adds 21 new system calls on each ABI that has 32-bit time_t > today. All of these have the exact same semantics as their existing > counterparts, and the new ones all have macro names that end in 'time64' > for clarification. > > This gets us to the point of being able to safely use a C library > that has 64-bit time_t in user space. There are still a couple of > loose ends to tie up in various areas of the code, but this is the > big one, and should be entirely uncontroversial at this point. I've successfully tested this with musl and LTP now, using an i386 kernel. The musl port I used is at https://git.linaro.org/people/arnd.bergmann/musl-y2038.git/ This is just an updated version of what I used for testing last year, using the current syscall assignment, and going back to the time32 versions of getitimer/setitimer and wait4/waitid/getusage. It's certainly not intended for merging like this, but a proper musl port is under discussion now, and this should be sufficient if anyone else wants to try out the new syscall ABI before we merge it. The LTP I have is heavily hacked, and has a number of failures resulting from differences between musl and glibc, or from the way we convert between the kernel types and the user space types. The testing found one minor bug in all the kernel syscall tables: > +418common mq_timedsend_time64 sys_mq_timedsend > +419common mq_timedreceiv_time64 sys_mq_timedreceive While this would have fit in with umount(), creat() and mknod(), it was unintentional, and I've changed it back to mq_timedreceive_time64 (with an added 'e'). Arnd
Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
On Fri, Jan 18, 2019 at 8:53 PM Andy Lutomirski wrote: > I think we have two issues if we reuse those numbers for new syscalls. > First, I'd really like to see new syscalls be numbered consistently > everywhere, or at least on all x86 variants, and we can't on x32 > because they mean something else. Perhaps more importantly, due to > what is arguably a rather severe bug, issuing a native x86_64 syscall > (x32 bit clear) with nr in the range 512..547 does *not* return > -ENOSYS on a kernel with x32 enabled. Instead it does something that > is somewhat arbitrary. With my patch applied, it will return -ENOSYS, > but old kernels will still exist, and this will break syscall probing. > > Can we perhaps just start the consistent numbers above 547 or maybe > block out 512..547 in the new regime? I'm definitely fine with not reusing them ever, and jumping from 511 to 548 when we get there on all architectures, if you think that helps. While we could also jump to 548 *now*, I think that would be a bit wasteful. Syscall numbers are fairly cheap, but not entirely free, especially when you consider architectures like mips that have an upper bound of 1000 syscalls before they have to get inventive. Arnd
Re: [PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
On Fri, Jan 18, 2019 at 7:50 PM Andy Lutomirski wrote: > On Fri, Jan 18, 2019 at 8:25 AM Arnd Bergmann wrote: > > - Once we get to 512, we clash with the x32 numbers (unless > > we remove x32 support first), and probably have to skip > > a few more. I also considered using the 512..547 space > > for 32-bit-only calls (which never clash with x32), but > > that also seems to add a bit of complexity. > > I have a patch that I'll send soon to make x32 use its own table. As > far as I'm concerned, 547 is *it*. 548 is just a normal number and is > not special. But let's please not reuse 512..547 for other purposes > on x86 variants -- that way lies even more confusion, IMO. Fair enough, the space for those numbers is cheap enough here. I take it you mean we also should not reuse that number space if we were to decide to remove x32 soon, but you are not worried about clashing with arch/alpha when everything else uses consistent numbers? Arnd
Re: [PATCH v2 13/29] arch: add split IPC system calls where needed
On Fri, Jan 18, 2019 at 6:20 PM Gabriel Paubert wrote: > > On Fri, Jan 18, 2019 at 05:18:19PM +0100, Arnd Bergmann wrote: > > The IPC system call handling is highly inconsistent across architectures, > > some use sys_ipc, some use separate calls, and some use both. We also > > have some architectures that require passing IPC_64 in the flags, and > > others that set it implicitly. > > > > For the additon of a y2083 safe semtimedop() system call, I chose to only > > It's not critical, but there are two typos in that line: > additon -> addition > 2083 -> 2038 Fixed both, thanks! Arnd
Re: [PATCH v2 00/29] y2038: add time64 syscalls
On Fri, Jan 18, 2019 at 5:57 PM Dennis Clarke wrote: > > On 1/18/19 11:18 AM, Arnd Bergmann wrote: > > This is a minor update of the patches I posted last week, I > > would like to add this into linux-next now, but would still do > > changes if there are concerns about the contents. The first > > version did not see a lot of replies, which could mean that > > either everyone is happy with it, or that it was largely ignored. > > > > See also the article at https://lwn.net/Articles/776435/. > > I would be happy to read "Approaching the kernel year-2038 end game" > however it is behind a pay wall. Perhaps it may be best to just > host interesting articles about open source idea elsewhere. It's a short summary of the current state. You can also find a video and slides from my ELC presentation online for a little more context. Generally speaking, I'd recommend paying for the subscription to lwn.net to anyone interested in the kernel, but it should become visible to everyone with the next day (a week after the initial publication). In the meantime, you can find the article at https://lwn.net/SubscriberLink/776435/a59d93d01d1addfc/. Finally, I've made a list of the remaining work that Deepa and I are planning to still continue (this should be mostly complete but may be missing a few things): syscalls - merge big series for 5.1, to allow time64 syscalls - waitid/wait4/getrusage should get a replacement based on __kernel_timespec - getitimer/setitimer should probably follow getrusage - vdso, waiting for consolidation series from Vincenzo Frascino before adding time64 entry points file systems - range checks on timestamps - xfs - NFS - hfs/hfsplus - coda - hostfs - relatime_need_update drivers - media - alsa - sockets - af_packet - ppp ioctl - rtc ioctl - omap3isp core kernel - fix ELF core files (elfcore.h) - syscall Audit code (kernel/audit.c, kernel/auditsc.c) - make all time32 code conditional - remove include/linux/timekeeping32.h - remove compat_time* from time32.h - remove timeval - remove timespec - remove time_t Arnd
[PATCH v2 06/29] ARM: add migrate_pages() system call
The migrate_pages system call has an assigned number on all architectures except ARM. When it got added initially in commit d80ade7b3231 ("ARM: Fix warning: #warning syscall migrate_pages not implemented"), it was intentionally left out based on the observation that there are no 32-bit ARM NUMA systems. However, there are now arm64 NUMA machines that can in theory run 32-bit kernels (actually enabling NUMA there would require additional work) as well as 32-bit user space on 64-bit kernels, so that argument is no longer very strong. Assigning the number lets us use the system call on 64-bit kernels as well as providing a more consistent set of syscalls across architectures. Signed-off-by: Arnd Bergmann --- arch/arm/include/asm/unistd.h | 1 - arch/arm/tools/syscall.tbl| 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 88ef2ce1f69a..d713587dfcf4 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -45,7 +45,6 @@ * Unimplemented (or alternatively implemented) syscalls */ #define __IGNORE_fadvise64_64 -#define __IGNORE_migrate_pages #ifdef __ARM_EABI__ /* diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 8edf93b4490f..86de9eb34296 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -414,3 +414,4 @@ 397common statx sys_statx 398common rseqsys_rseq 399common io_pgetevents sys_io_pgetevents +400common migrate_pages sys_migrate_pages diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index a7b1fc58ffdf..261216c3336e 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 400 +#define __NR_compat_syscalls 401 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 04ee190b90fe..f15bcbacb8f6 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -821,6 +821,8 @@ __SYSCALL(__NR_statx, sys_statx) __SYSCALL(__NR_rseq, sys_rseq) #define __NR_io_pgetevents 399 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) +#define __NR_migrate_pages 400 +__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages) /* * Please add new compat syscalls above this comment and update -- 2.20.0
[PATCH v2 28/29] y2038: rename old time and utime syscalls
The time, stime, utime, utimes, and futimesat system calls are only used on older architectures, and we do not provide y2038 safe variants of them, as they are replaced by clock_gettime64, clock_settime64, and utimensat_time64. However, for consistency it seems better to have the 32-bit architectures that still use them call the "time32" entry points (leaving the traditional handlers for the 64-bit architectures), like we do for system calls that now require two versions. Note: We used to always define __ARCH_WANT_SYS_TIME and __ARCH_WANT_SYS_UTIME and only set __ARCH_WANT_COMPAT_SYS_TIME and __ARCH_WANT_SYS_UTIME32 for compat mode on 64-bit kernels. Now this is reversed: only 64-bit architectures set __ARCH_WANT_SYS_TIME/UTIME, while we need __ARCH_WANT_SYS_TIME32/UTIME32 for 32-bit architectures and compat mode. The resulting asm/unistd.h changes look a bit counterintuitive. This is only a cleanup patch and it should not change any behavior. Signed-off-by: Arnd Bergmann --- arch/arm/include/asm/unistd.h | 4 ++-- arch/arm/tools/syscall.tbl | 10 +- arch/m68k/include/asm/unistd.h | 4 ++-- arch/m68k/kernel/syscalls/syscall.tbl | 10 +- arch/microblaze/include/asm/unistd.h| 4 ++-- arch/microblaze/kernel/syscalls/syscall.tbl | 10 +- arch/mips/include/asm/unistd.h | 4 ++-- arch/mips/kernel/syscalls/syscall_o32.tbl | 10 +- arch/parisc/include/asm/unistd.h| 9 ++--- arch/parisc/kernel/syscalls/syscall.tbl | 15 ++- arch/powerpc/include/asm/unistd.h | 8 arch/powerpc/kernel/syscalls/syscall.tbl| 19 ++- arch/s390/include/asm/unistd.h | 2 +- arch/sh/include/asm/unistd.h| 4 ++-- arch/sh/kernel/syscalls/syscall.tbl | 10 +- arch/sparc/include/asm/unistd.h | 8 arch/sparc/kernel/syscalls/syscall.tbl | 14 +- arch/x86/entry/syscalls/syscall_32.tbl | 10 +- arch/x86/include/asm/unistd.h | 8 arch/xtensa/include/asm/unistd.h| 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 6 +++--- kernel/time/time.c | 4 ++-- 22 files changed, 98 insertions(+), 77 deletions(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index d713587dfcf4..7a39e77984ef 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -26,10 +26,10 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_OLD_MMAP #define __ARCH_WANT_SYS_OLD_SELECT -#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_UTIME32 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT) -#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_SYS_TIME32 #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_ALARM diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 200f4b878a46..a96d9b5ee04e 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -24,7 +24,7 @@ 10 common unlink sys_unlink 11 common execve sys_execve 12 common chdir sys_chdir -13 oabitimesys_time +13 oabitimesys_time32 14 common mknod sys_mknod 15 common chmod sys_chmod 16 common lchown sys_lchown16 @@ -36,12 +36,12 @@ 22 oabiumount sys_oldumount 23 common setuid sys_setuid16 24 common getuid sys_getuid16 -25 oabistime sys_stime +25 oabistime sys_stime32 26 common ptrace sys_ptrace 27 oabialarm sys_alarm # 28 was sys_fstat 29 common pause sys_pause -30 oabiutime sys_utime +30 oabiutime sys_utime32 # 31 was sys_stty # 32 was sys_gtty 33 common access sys_access @@ -283,7 +283,7 @@ 266common statfs64sys_statfs64_wrapper 267common fstatfs64 sys_fstatfs64_wrapper 268common tgkill sys_tgkill -269common utimes sys_utimes +269common utimes sys_utimes_time32 270common arm_fadvise64_64sys_arm_fadvise64_64 271common pciconfig_iobasesys_pciconfig_iobase 272common pciconfig_read sys_pciconfig_read @@ -340,7 +340,7 @@ 323common mkdirat sys_mkdirat 324common mknodat sys_mknodat 325common fchownatsys_fchownat -326common futimesat sys_futimesat +326common futimesat sys_futimesat_time32 327common
[PATCH v2 21/29] sparc64: add custom adjtimex/clock_adjtime functions
sparc64 is the only architecture on Linux that has a 'timeval' definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes problems for sparc32 compat mode when we convert it to use the new __kernel_timex type that has the same layout as all other 64-bit architectures. To avoid adding sparc64 specific code into the generic adjtimex implementation, this adds a wrapper in the sparc64 system call handling that converts the sparc64 'timex' into the new '__kernel_timex'. At this point, the two structures are defined to be identical, but that will change in the next step once we convert sparc32. Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 59 +- arch/sparc/kernel/syscalls/syscall.tbl | 6 ++- include/linux/timex.h | 2 + kernel/time/posix-timers.c | 24 +-- 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 1c079e7bab09..37de18a11207 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -28,8 +28,9 @@ #include #include #include - +#include #include + #include #include @@ -544,6 +545,62 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) return err; } +SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *) + int ret; + + /* Copy the user data space into the kernel copy +* structure. But bear in mind that the structures +* may change +*/ + if (copy_from_user(, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* +* override for sparc64 specific timeval type: tv_usec +* is 32 bit wide instead of 64-bit in __kernel_timex +*/ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_adjtimex(kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret; +} + +SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *) + int ret; + + if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { + pr_err_once("process %d (%s) attempted a POSIX timer syscall " + "while CONFIG_POSIX_TIMERS is not set\n", + current->pid, current->comm); + + return -ENOSYS; + } + + /* Copy the user data space into the kernel copy +* structure. But bear in mind that the structures +* may change +*/ + if (copy_from_user(, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* +* override for sparc64 specific timeval type: tv_usec +* is 32 bit wide instead of 64-bit in __kernel_timex +*/ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_clock_adjtime(which_clock, kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret; +} + SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type, utrap_handler_t, new_p, utrap_handler_t, new_d, utrap_handler_t __user *, old_p, diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 24ebef675184..e70110375399 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -258,7 +258,8 @@ 21664 sigreturn sys_nis_syscall 217common clone sys_clone 218common ioprio_get sys_ioprio_get -219common adjtimexsys_adjtimex compat_sys_adjtimex +21932 adjtimexsys_adjtimex compat_sys_adjtimex +21964 adjtimexsys_sparc_adjtimex 22032 sigprocmask sys_sigprocmask compat_sys_sigprocmask 22064 sigprocmask sys_nis_syscall 221common create_module sys_ni_syscall @@ -377,7 +378,8 @@ 331common prlimit64 sys_prlimit64 332common name_to_handle_at sys_name_to_handle_at 333common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -334common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +33432 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +33464 clock_adjtime sys_sparc_clock_adjtime 335common syncfs sys_syncfs 336common sendmmsgsys_sendmmsg compat_sys_sendmmsg 337common setns sys_setns diff -
[PATCH v2 23/29] timex: change syscalls to use struct __kernel_timex
From: Deepa Dinamani struct timex is not y2038 safe. Switch all the syscall apis to use y2038 safe __kernel_timex. Note that sys_adjtimex() does not have a y2038 safe solution. C libraries can implement it by calling clock_adjtime(CLOCK_REALTIME, ...). Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 6 +++--- kernel/time/posix-timers.c | 2 +- kernel/time/time.c | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index baa4b70b02d3..09330d5bda0c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct __sysctl_args; struct sysinfo; struct timespec; struct timeval; -struct timex; +struct __kernel_timex; struct timezone; struct tms; struct utimbuf; @@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); -asmlinkage long sys_adjtimex(struct timex __user *txc_p); +asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); /* kernel/timer.c */ asmlinkage long sys_getpid(void); @@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, - struct timex __user *tx); + struct __kernel_timex __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2d84b3db1ade..de79f85ae14f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) } SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, - struct timex __user *, utx) + struct __kernel_timex __user *, utx) { struct __kernel_timex ktx; int err; diff --git a/kernel/time/time.c b/kernel/time/time.c index d179d33f639a..78b5c8f1495a 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, } #endif -SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) +#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) +SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p) { struct __kernel_timex txc; /* Local copy of parameter */ int ret; @@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) ret = do_adjtimex(); return copy_to_user(txc_p, , sizeof(struct __kernel_timex)) ? -EFAULT : ret; } +#endif #ifdef CONFIG_COMPAT_32BIT_TIME int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp) -- 2.20.0
[PATCH v2 02/29] ia64: add statx and io_pgetevents syscalls
All architectures should implement these two, so assign numbers and hook them up on ia64. Signed-off-by: Arnd Bergmann --- arch/ia64/kernel/syscalls/syscall.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index e97caf51be42..52585281205b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -335,3 +335,5 @@ 323common copy_file_range sys_copy_file_range 324common preadv2 sys_preadv2 325common pwritev2sys_pwritev2 +326common statx sys_statx +327common io_pgetevents sys_io_pgetevents -- 2.20.0
[PATCH v2 03/29] ia64: assign syscall numbers for perf and seccomp
Most architectures have assigned numbers for both seccomp and perf_event_open, even when they do not implement either. ia64 is an exception here, so for consistency lets add numbers for both of them. Unless CONFIG_PERF_EVENTS and CONFIG_SECCOMP are implemented, the system calls just return -ENOSYS. Signed-off-by: Arnd Bergmann --- arch/ia64/kernel/syscalls/syscall.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 52585281205b..2e93dbdcdb80 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -337,3 +337,5 @@ 325common pwritev2sys_pwritev2 326common statx sys_statx 327common io_pgetevents sys_io_pgetevents +328common perf_event_open sys_perf_event_open +329common seccomp sys_seccomp -- 2.20.0
[PATCH v2 29/29] y2038: add 64-bit time_t syscalls to all 32-bit architectures
This adds 21 new system calls on each ABI that has 32-bit time_t today. All of these have the exact same semantics as their existing counterparts, and the new ones all have macro names that end in 'time64' for clarification. This gets us to the point of being able to safely use a C library that has 64-bit time_t in user space. There are still a couple of loose ends to tie up in various areas of the code, but this is the big one, and should be entirely uncontroversial at this point. In particular, there are four system calls (getitimer, setitimer, waitid, and getrusage) that don't have a 64-bit counterpart yet, but these can all be safely implemented in the C library by wrapping around the existing system calls because the 32-bit time_t they pass only counts elapsed time, not time since the epoch. They will be dealt with later. Signed-off-by: Arnd Bergmann --- The one point that still needs to be agreed on is the actual number assignment. Following the earlier patch that added the sysv IPC calls with common numbers where possible, I also tried the same here, using consistent numbers on all 32-bit architectures. There are a couple of minor issues with this: - On asm-generic, we now leave the numbers from 295 to 402 unassigned, which wastes a small amount of kernel .data segment. Originally I had asm-generic start at 300 and everyone else start at 400 here, which was also not perfect, and we have gone beyond 400 already, so I ended up just using the same numbers as the rest here. - Once we get to 512, we clash with the x32 numbers (unless we remove x32 support first), and probably have to skip a few more. I also considered using the 512..547 space for 32-bit-only calls (which never clash with x32), but that also seems to add a bit of complexity. - On alpha, we have already used up the space up to 527 (with a small hole between 261 and 299). We could sync up with that as well, but my feeling was that alpha syscalls are already special enough that I don't care. Let me know if you have other ideas. --- arch/alpha/kernel/syscalls/syscall.tbl | 2 + arch/arm/tools/syscall.tbl | 21 ++ arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 41 +++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 20 + arch/microblaze/kernel/syscalls/syscall.tbl | 21 ++ arch/mips/kernel/syscalls/syscall_n32.tbl | 21 ++ arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 20 + arch/parisc/kernel/syscalls/syscall.tbl | 21 ++ arch/powerpc/kernel/syscalls/syscall.tbl| 20 + arch/s390/kernel/syscalls/syscall.tbl | 20 + arch/sh/kernel/syscalls/syscall.tbl | 20 + arch/sparc/kernel/syscalls/syscall.tbl | 20 + arch/x86/entry/syscalls/syscall_32.tbl | 20 + arch/xtensa/kernel/syscalls/syscall.tbl | 21 ++ include/uapi/asm-generic/unistd.h | 45 - scripts/checksyscalls.sh| 40 ++ 19 files changed, 375 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 337b8108771a..936a33fae3c9 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -461,3 +461,5 @@ 530common getegid sys_getegid 531common geteuid sys_geteuid 532common getppid sys_getppid +# all other architectures have common numbers for new syscall, alpha +# is the exception. diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index a96d9b5ee04e..286afdc43283 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -416,3 +416,24 @@ 399common io_pgetevents sys_io_pgetevents_time32 400common migrate_pages sys_migrate_pages 401common kexec_file_load sys_kexec_file_load +# 402 is unused +403common clock_gettime64 sys_clock_gettime +404common clock_settime64 sys_clock_settime +405common clock_adjtime64 sys_clock_adjtime +406common clock_getres_time64 sys_clock_getres +407common clock_nanosleep_time64 sys_clock_nanosleep +408common timer_gettime64 sys_timer_gettime +409common timer_settime64 sys_timer_settime +410common timerfd_gettime64 sys_timerfd_gettime +411common timerfd_settime64 sys_timerfd_settime +412common utimensat_time64sys_utimensat +413common pselect6_time64 sys_pselect6 +414common ppoll_time64sys_ppoll +416common io_pgetevents_time64
[PATCH v2 04/29] alpha: wire up io_pgetevents system call
The io_pgetevents system call was added in linux-4.18 but has no entry for alpha: warning: #warning syscall io_pgetevents not implemented [-Wcpp] Assign a the next system call number here. Cc: sta...@vger.kernel.org Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 7b56a53be5e3..e09558edae73 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 520common preadv2 sys_preadv2 521common pwritev2sys_pwritev2 522common statx sys_statx +523common io_pgetevents sys_io_pgetevents -- 2.20.0
[PATCH v2 07/29] ARM: add kexec_file_load system call number
A couple of architectures including arm64 already implement the kexec_file_load system call, on many others we have assigned a system call number for it, but not implemented it yet. Adding the number in arch/arm/ lets us use the system call on arm64 systems in compat mode, and also reduces the number of differences between architectures. If we want to implement kexec_file_load on ARM in the future, the number assignment means that kexec tools can already be built with the now current set of kernel headers. Signed-off-by: Arnd Bergmann --- arch/arm/tools/syscall.tbl| 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 86de9eb34296..20ed7e026723 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -415,3 +415,4 @@ 398common rseqsys_rseq 399common io_pgetevents sys_io_pgetevents 400common migrate_pages sys_migrate_pages +401common kexec_file_load sys_kexec_file_load diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 261216c3336e..2c30e6f145ff 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 401 +#define __NR_compat_syscalls 402 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index f15bcbacb8f6..8ca1d4c304f4 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -823,6 +823,8 @@ __SYSCALL(__NR_rseq, sys_rseq) __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) #define __NR_migrate_pages 400 __SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages) +#define __NR_kexec_file_load 401 +__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) /* * Please add new compat syscalls above this comment and update -- 2.20.0
[PATCH v2 10/29] sh: add statx system call
statx is available on almost all other architectures but got missed on sh, so add it now. Signed-off-by: Arnd Bergmann --- arch/sh/kernel/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 21ec75288562..a70db013dbc7 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -390,3 +390,4 @@ 380common copy_file_range sys_copy_file_range 381common preadv2 sys_preadv2 382common pwritev2sys_pwritev2 +383common statx sys_statx -- 2.20.0
[PATCH v2 01/29] ia64: add __NR_umount2 definition
Other architectures commonly use __NR_umount2 for sys_umount, only ia64 and alpha use __NR_umount here. In order to synchronize the generated tables, use umount2 like everyone else, and add back the old name from asm/unistd.h for compatibility. The __IGNORE_* lines are now all obsolete and can be removed as a side-effect. Signed-off-by: Arnd Bergmann --- arch/ia64/include/asm/unistd.h| 14 -- arch/ia64/include/uapi/asm/unistd.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 2 +- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 0b08ebd2dfde..9ba6110b10b9 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -12,20 +12,6 @@ #define NR_syscalls__NR_syscalls /* length of syscall table */ -/* - * The following defines stop scripts/checksyscalls.sh from complaining about - * unimplemented system calls. Glibc provides for each of these by using - * more modern equivalent system calls. - */ -#define __IGNORE_fork /* clone() */ -#define __IGNORE_time /* gettimeofday() */ -#define __IGNORE_alarm /* setitimer(ITIMER_REAL, ... */ -#define __IGNORE_pause /* rt_sigprocmask(), rt_sigsuspend() */ -#define __IGNORE_utime /* utimes() */ -#define __IGNORE_getpgrp /* getpgid() */ -#define __IGNORE_vfork /* clone() */ -#define __IGNORE_umount2 /* umount() */ - #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_UTIME diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index b2513922dcb5..013e0bcacc39 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -15,6 +15,8 @@ #define __NR_Linux 1024 +#define __NR_umount __NR_umount2 + #include #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index b22203b40bfe..e97caf51be42 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -29,7 +29,7 @@ 17 common getpid sys_getpid 18 common getppid sys_getppid 19 common mount sys_mount -20 common umount sys_umount +20 common umount2 sys_umount 21 common setuid sys_setuid 22 common getuid sys_getuid 23 common geteuid sys_geteuid -- 2.20.0
[PATCH v2 08/29] m68k: assign syscall number for seccomp
Most architectures have assigned a numbers for the seccomp syscall even when they do not implement it. m68k is an exception here, so for consistency lets add the number. Unless CONFIG_SECCOMP is implemented, the system call just returns -ENOSYS. Signed-off-by: Arnd Bergmann --- arch/m68k/kernel/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 1a95c4a1bc0d..85779d6ef935 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -387,3 +387,4 @@ 377common preadv2 sys_preadv2 378common pwritev2sys_pwritev2 379common statx sys_statx +380common seccomp sys_seccomp -- 2.20.0
[PATCH v2 14/29] arch: add pkey and rseq syscall numbers everywhere
Most architectures define system call numbers for the rseq and pkey system calls, even when they don't support the features, and perhaps never will. Only a few architectures are missing these, so just define them anyway for consistency. If we decide to add them later to one of these, the system call numbers won't get out of sync then. Signed-off-by: Arnd Bergmann --- arch/alpha/include/asm/unistd.h | 4 arch/alpha/kernel/syscalls/syscall.tbl | 4 arch/ia64/kernel/syscalls/syscall.tbl | 4 arch/m68k/kernel/syscalls/syscall.tbl | 4 arch/parisc/include/asm/unistd.h| 3 --- arch/parisc/kernel/syscalls/syscall.tbl | 4 arch/s390/include/asm/unistd.h | 3 --- arch/s390/kernel/syscalls/syscall.tbl | 3 +++ arch/sh/kernel/syscalls/syscall.tbl | 4 arch/sparc/include/asm/unistd.h | 5 - arch/sparc/kernel/syscalls/syscall.tbl | 4 arch/xtensa/kernel/syscalls/syscall.tbl | 1 + 12 files changed, 28 insertions(+), 15 deletions(-) diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 564ba87bdc38..31ad350b58a0 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -29,9 +29,5 @@ #define __IGNORE_getppid #define __IGNORE_getuid -/* Alpha doesn't have protection keys. */ -#define __IGNORE_pkey_mprotect -#define __IGNORE_pkey_alloc -#define __IGNORE_pkey_free #endif /* _ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index b0e247287908..25b4a7e76943 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -452,3 +452,7 @@ 521common pwritev2sys_pwritev2 522common statx sys_statx 523common io_pgetevents sys_io_pgetevents +524common pkey_alloc sys_pkey_alloc +525common pkey_free sys_pkey_free +526common pkey_mprotect sys_pkey_mprotect +527common rseqsys_rseq diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 2e93dbdcdb80..84e03de00177 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -339,3 +339,7 @@ 327common io_pgetevents sys_io_pgetevents 328common perf_event_open sys_perf_event_open 329common seccomp sys_seccomp +330common pkey_alloc sys_pkey_alloc +331common pkey_free sys_pkey_free +332common pkey_mprotect sys_pkey_mprotect +333common rseqsys_rseq diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 5354ba02eed2..ae88b85d068e 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -388,6 +388,10 @@ 378common pwritev2sys_pwritev2 379common statx sys_statx 380common seccomp sys_seccomp +381common pkey_alloc sys_pkey_alloc +382common pkey_free sys_pkey_free +383common pkey_mprotect sys_pkey_mprotect +384common rseqsys_rseq # room for arch specific calls 393common semget sys_semget 394common semctl sys_semctl diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index c2c2afb28941..9ec1026af877 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -12,9 +12,6 @@ #define __IGNORE_select/* newselect */ #define __IGNORE_fadvise64 /* fadvise64_64 */ -#define __IGNORE_pkey_mprotect -#define __IGNORE_pkey_alloc -#define __IGNORE_pkey_free #ifndef ASM_LINE_SEP # define ASM_LINE_SEP ; diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 9bbd2f9f56c8..e07231de3597 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -367,3 +367,7 @@ 348common pwritev2sys_pwritev2 compat_sys_pwritev2 349common statx sys_statx 350common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +351common pkey_alloc sys_pkey_alloc +352common pkey_free sys_pkey_free +353common pkey_mprotect sys_pkey_mprotect +354common rseqsys_rseq diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index a1fbf15d53aa..ed08f114ee91 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm
Re: [PATCH v4 1/3] fs: hoist EFSCORRUPTED definition into uapi header
On Fri, Jan 18, 2019 at 5:15 PM Jann Horn wrote: > > Multiple filesystems can already return EFSCORRUPTED errors to userspace; > however, so far, definitions of EFSCORRUPTED were in filesystem-private > headers. > > I wanted to use EUCLEAN to indicate data corruption in the VFS layer; > Dave Chinner says that I should instead hoist the definitions of > EFSCORRUPTED into the UAPI header and then use EFSCORRUPTED. > > This patch is marked for stable backport because it is a prerequisite for > the following patch. > > Cc: sta...@vger.kernel.org > Suggested-by: Dave Chinner > Signed-off-by: Jann Horn > --- > fs/ext2/ext2.h | 1 - > fs/ext4/ext4.h | 1 - > fs/xfs/xfs_linux.h | 1 - > include/linux/jbd2.h | 1 - > include/uapi/asm-generic/errno.h | 1 + > 5 files changed, 1 insertion(+), 4 deletions(-) For asm-generic: Acked-by: Arnd Bergmann
[PATCH v2 13/29] arch: add split IPC system calls where needed
The IPC system call handling is highly inconsistent across architectures, some use sys_ipc, some use separate calls, and some use both. We also have some architectures that require passing IPC_64 in the flags, and others that set it implicitly. For the additon of a y2083 safe semtimedop() system call, I chose to only support the separate entry points, but that requires first supporting the regular ones with their own syscall numbers. The IPC_64 is now implied by the new semctl/shmctl/msgctl system calls even on the architectures that require passing it with the ipc() multiplexer. I'm not adding the new semtimedop() or semop() on 32-bit architectures, those will get implemented using the new semtimedop_time64() version that gets added along with the other time64 calls. Three 64-bit architectures (powerpc, s390 and sparc) get semtimedop(). Signed-off-by: Arnd Bergmann --- One aspect here that might be a bit controversial is the use of the same system call numbers across all architectures, synchronizing all of them with the x86-32 numbers. With the new syscall.tbl files, I hope we can just keep doing that in the future, and no longer require the architecture maintainers to assign a number. This is mainly useful for implementers of the C libraries: if we can add future system calls everywhere at the same time, using a particular version of the kernel headers also guarantees that the system call number macro is visible. --- arch/m68k/kernel/syscalls/syscall.tbl | 11 +++ arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++ arch/powerpc/kernel/syscalls/syscall.tbl | 13 + arch/s390/kernel/syscalls/syscall.tbl | 12 arch/sh/kernel/syscalls/syscall.tbl | 11 +++ arch/sparc/kernel/syscalls/syscall.tbl| 12 arch/x86/entry/syscalls/syscall_32.tbl| 11 +++ 7 files changed, 81 insertions(+) diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 85779d6ef935..5354ba02eed2 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -388,3 +388,14 @@ 378common pwritev2sys_pwritev2 379common statx sys_statx 380common seccomp sys_seccomp +# room for arch specific calls +393common semget sys_semget +394common semctl sys_semctl +395common shmget sys_shmget +396common shmctl sys_shmctl +397common shmat sys_shmat +398common shmdt sys_shmdt +399common msgget sys_msgget +400common msgsnd sys_msgsnd +401common msgrcv sys_msgrcv +402common msgctl sys_msgctl diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 3d5a47b80d2b..fa47ea8cc6ef 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -380,3 +380,14 @@ 366o32 statx sys_statx 367o32 rseqsys_rseq 368o32 io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +# room for arch specific calls +393o32 semget sys_semget +394o32 semctl sys_semctl compat_sys_semctl +395o32 shmget sys_shmget +396o32 shmctl sys_shmctl compat_sys_shmctl +397o32 shmat sys_shmat compat_sys_shmat +398o32 shmdt sys_shmdt +399o32 msgget sys_msgget +400o32 msgsnd sys_msgsnd compat_sys_msgsnd +401o32 msgrcv sys_msgrcv compat_sys_msgrcv +402o32 msgctl sys_msgctl compat_sys_msgctl diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index db3bbb8744af..7555874ce39c 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -414,6 +414,7 @@ 363spu switch_endian sys_ni_syscall 364common userfaultfd sys_userfaultfd 365common membarrier sys_membarrier +# 366-377 originally left for IPC, now unused 378nospu mlock2 sys_mlock2 379nospu copy_file_range sys_copy_file_range 380common preadv2 sys_preadv2
[PATCH v2 15/29] alpha: add standard statfs64/fstatfs64 syscalls
As Joseph Myers points out, alpha has never had a standard statfs64 interface and instead returns only 32-bit numbers here. While there is an old osf_statfs64 system call that returns additional data, this has some other quirks and does not get used in glibc. I considered making the stat64 structure layout compatible with with the one used by the kernel on most other 64 bit architecture that implement it (ia64, parisc, powerpc, and sparc), but in the end decided to stay with the one that was traditionally defined in the alpha headers but not used, since this is also what glibc exposes to user space. Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 25b4a7e76943..0ebd59fdcb8b 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -456,3 +456,5 @@ 525common pkey_free sys_pkey_free 526common pkey_mprotect sys_pkey_mprotect 527common rseqsys_rseq +528common statfs64sys_statfs64 +529common fstatfs64 sys_fstatfs64 -- 2.20.0
[PATCH v2 09/29] sh: remove duplicate unistd_32.h file
When I merged this patch, the file was accidentally left intact instead of being removed, which means any changes to syscall.tbl have no effect. Fixes: 2b3c5a99d5f3 ("sh: generate uapi header and syscall table header files") Signed-off-by: Arnd Bergmann --- arch/sh/include/uapi/asm/unistd_32.h | 403 --- 1 file changed, 403 deletions(-) delete mode 100644 arch/sh/include/uapi/asm/unistd_32.h diff --git a/arch/sh/include/uapi/asm/unistd_32.h b/arch/sh/include/uapi/asm/unistd_32.h deleted file mode 100644 index 31c85aa251ab.. --- a/arch/sh/include/uapi/asm/unistd_32.h +++ /dev/null @@ -1,403 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __ASM_SH_UNISTD_32_H -#define __ASM_SH_UNISTD_32_H - -/* - * Copyright (C) 1999 Niibe Yutaka - */ - -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall 0 -#define __NR_exit1 -#define __NR_fork2 -#define __NR_read3 -#define __NR_write 4 -#define __NR_open5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -/* 17 was sys_break */ -#define __NR_oldstat18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -/* 31 was sys_stty */ -/* 32 was sys_gtty */ -#define __NR_access 33 -#define __NR_nice 34 -/* 35 was sys_ftime */ -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup41 -#define __NR_pipe 42 -#define __NR_times 43 -/* 44 was sys_prof */ -#define __NR_brk45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid49 -#define __NR_getegid50 -#define __NR_acct 51 -#define __NR_umount252 -/* 53 was sys_lock */ -#define __NR_ioctl 54 -#define __NR_fcntl 55 -/* 56 was sys_mpx */ -#define __NR_setpgid57 -/* 58 was sys_ulimit */ -/* 59 was sys_olduname */ -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid64 -#define __NR_getpgrp65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */ -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -/* 82 was sys_oldselect */ -#define __NR_symlink83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority96 -#define __NR_setpriority97 -/* 98 was sys_profil */ -#define __NR_statfs 99 -#define __NR_fstatfs 100 - /* 101 was sys_ioperm */ -#define __NR_socketcall102 -#define __NR_syslog103 -#define __NR_setitimer 104 -#define __NR
[PATCH v2 18/29] time: make adjtime compat handling available for 32 bit
We want to reuse the compat_timex handling on 32-bit architectures the same way we are using the compat handling for timespec when moving to 64-bit time_t. Move all definitions related to compat_timex out of the compat code into the normal timekeeping code, along with a rename to old_timex32, corresponding to the timespec/timeval structures, and make it controlled by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select. Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 35 ++- include/linux/time32.h | 32 - kernel/compat.c| 64 -- kernel/time/posix-timers.c | 14 ++-- kernel/time/time.c | 70 +++--- 5 files changed, 102 insertions(+), 113 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 056be0d03722..657ca6abd855 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -132,37 +132,6 @@ struct compat_tms { compat_clock_t tms_cstime; }; -struct compat_timex { - compat_uint_t modes; - compat_long_t offset; - compat_long_t freq; - compat_long_t maxerror; - compat_long_t esterror; - compat_int_t status; - compat_long_t constant; - compat_long_t precision; - compat_long_t tolerance; - struct old_timeval32 time; - compat_long_t tick; - compat_long_t ppsfreq; - compat_long_t jitter; - compat_int_t shift; - compat_long_t stabil; - compat_long_t jitcnt; - compat_long_t calcnt; - compat_long_t errcnt; - compat_long_t stbcnt; - compat_int_t tai; - - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; -}; - -struct timex; -int compat_get_timex(struct timex *, const struct compat_timex __user *); -int compat_put_timex(struct compat_timex __user *, const struct timex *); - #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { @@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); +asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); @@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, -struct compat_timex __user *tp); +struct old_timex32 __user *tp); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, diff --git a/include/linux/time32.h b/include/linux/time32.h index 118b9977080c..820a22e2b98b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -10,6 +10,7 @@ */ #include +#include #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) @@ -35,13 +36,42 @@ struct old_utimbuf32 { old_time32_tmodtime; }; +struct old_timex32 { + u32 modes; + s32 offset; + s32 freq; + s32 maxerror; + s32 esterror; + s32 status; + s32 constant; + s32 precision; + s32 tolerance; + struct old_timeval32 time; + s32 tick; + s32 ppsfreq; + s32 jitter; + s32 shift; + s32 stabil; + s32 jitcnt; + s32 calcnt; + s32 errcnt; + s32 stbcnt; + s32 tai; + + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; +}; + extern int get_old_timespec32(struct timespec64 *, const void __user *); extern int put_old_timespec32(const struct timespec64 *, void __user *); extern int get_old_itimerspec32(struct itimerspec64 *its, const struct old_itimerspec32 __user *uits); extern int put_old_itimerspec32(const struct itimerspec64 *its, struct old_itimerspec32 __user *uits); - +struct timex; +int get_old_timex32(struct timex *, const struct old_timex32 __user *); +int put_old_timex32(struct old_timex32 __user *, const struct timex *); #if __BITS_PER_LONG == 64 diff --git a/kernel/compat.c b/kernel/compat.c index f01affa17e22..d8a36c6ad7c9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -20,7 +20,6
[PATCH v2 17/29] syscalls: remove obsolete __IGNORE_ macros
These are all for ignoring the lack of obsolete system calls, which have been marked the same way in scripts/checksyscall.sh, so these can be removed. Signed-off-by: Arnd Bergmann --- arch/mips/include/asm/unistd.h | 16 arch/parisc/include/asm/unistd.h | 3 --- arch/s390/include/asm/unistd.h | 2 -- arch/xtensa/include/asm/unistd.h | 12 4 files changed, 33 deletions(-) diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index b23d74a601b3..5e9eeb83d8d4 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -53,22 +53,6 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_CLONE -/* whitelists for checksyscalls */ -#define __IGNORE_select -#define __IGNORE_vfork -#define __IGNORE_time -#define __IGNORE_uselib -#define __IGNORE_fadvise64_64 -#define __IGNORE_getdents64 -#if _MIPS_SIM == _MIPS_SIM_NABI32 -#define __IGNORE_truncate64 -#define __IGNORE_ftruncate64 -#define __IGNORE_stat64 -#define __IGNORE_lstat64 -#define __IGNORE_fstat64 -#define __IGNORE_fstatat64 -#endif - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_UNISTD_H */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 9ec1026af877..385eae49ed02 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -10,9 +10,6 @@ #define SYS_ify(syscall_name) __NR_##syscall_name -#define __IGNORE_select/* newselect */ -#define __IGNORE_fadvise64 /* fadvise64_64 */ - #ifndef ASM_LINE_SEP # define ASM_LINE_SEP ; #endif diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index ed08f114ee91..59202ceea1f6 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -10,8 +10,6 @@ #include #include -#define __IGNORE_time - #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index 0d34629dafc5..81cc52ea1bd5 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -10,18 +10,6 @@ #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_GETPGRP -/* - * Ignore legacy system calls in the checksyscalls.sh script - */ - -#define __IGNORE_fork /* use clone */ -#define __IGNORE_time -#define __IGNORE_alarm /* use setitimer */ -#define __IGNORE_pause -#define __IGNORE_mmap /* use mmap2 */ -#define __IGNORE_vfork /* use clone */ -#define __IGNORE_fadvise64 /* use fadvise64_64 */ - #define NR_syscalls__NR_syscalls #endif /* _XTENSA_UNISTD_H */ -- 2.20.0
[PATCH v2 26/29] y2038: use time32 syscall names on 32-bit
This is the big flip, where all 32-bit architectures set COMPAT_32BIT_TIME abd use the _time32 system calls from the former compat layer instead of the system calls that take __kernel_timespec and similar arguments. The temporary redirects for __kernel_timespec, __kernel_itimerspec and __kernel_timex can get removed with this. It would be easy to split this commit by architecture, but with the new generated system call tables, it's easy enough to do it all at once, which makes it a little easier to check that the changes are the same in each table. Signed-off-by: Arnd Bergmann --- arch/Kconfig| 2 +- arch/arm/kernel/sys_oabi-compat.c | 8 +- arch/arm/tools/syscall.tbl | 46 ++-- arch/m68k/kernel/syscalls/syscall.tbl | 42 +-- arch/microblaze/kernel/syscalls/syscall.tbl | 46 ++-- arch/mips/kernel/syscalls/syscall_o32.tbl | 44 +-- arch/parisc/kernel/syscalls/syscall.tbl | 69 +++-- arch/powerpc/kernel/syscalls/syscall.tbl| 82 +++-- arch/sh/kernel/syscalls/syscall.tbl | 42 +-- arch/sparc/kernel/syscalls/syscall.tbl | 64 ++-- arch/x86/entry/syscalls/syscall_32.tbl | 44 +-- arch/xtensa/kernel/syscalls/syscall.tbl | 44 +-- include/uapi/asm-generic/unistd.h | 56 +++--- 13 files changed, 335 insertions(+), 254 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 4cfb6de48f79..46db715a7f42 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -759,7 +759,7 @@ config 64BIT_TIME handling. config COMPAT_32BIT_TIME - def_bool (!64BIT && 64BIT_TIME) || COMPAT + def_bool !64BIT || COMPAT help This enables 32 bit time_t support in addition to 64 bit time_t support. This is relevant on all 32-bit architectures, and 64-bit architectures diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 92ab36f38795..acd054a42ba2 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -317,10 +317,10 @@ struct oabi_sembuf { asmlinkage long sys_oabi_semtimedop(int semid, struct oabi_sembuf __user *tsops, unsigned nsops, - const struct timespec __user *timeout) + const struct old_timespec32 __user *timeout) { struct sembuf *sops; - struct timespec local_timeout; + struct old_timespec32 local_timeout; long err; int i; @@ -350,7 +350,7 @@ asmlinkage long sys_oabi_semtimedop(int semid, } else { mm_segment_t fs = get_fs(); set_fs(KERNEL_DS); - err = sys_semtimedop(semid, sops, nsops, timeout); + err = sys_semtimedop_time32(semid, sops, nsops, timeout); set_fs(fs); } kfree(sops); @@ -375,7 +375,7 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, return sys_oabi_semtimedop(first, (struct oabi_sembuf __user *)ptr, second, - (const struct timespec __user *)fifth); + (const struct old_timespec32 __user *)fifth); default: return sys_ipc(call, first, second, third, ptr, fifth); } diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b54b7f2bc24a..200f4b878a46 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -137,7 +137,7 @@ 121common setdomainname sys_setdomainname 122common uname sys_newuname # 123 was sys_modify_ldt -124common adjtimexsys_adjtimex +124common adjtimexsys_adjtimex_time32 125common mprotectsys_mprotect 126common sigprocmask sys_sigprocmask # 127 was sys_create_module @@ -174,8 +174,8 @@ 158common sched_yield sys_sched_yield 159common sched_get_priority_max sys_sched_get_priority_max 160common sched_get_priority_min sys_sched_get_priority_min -161common sched_rr_get_interval sys_sched_rr_get_interval -162common nanosleep sys_nanosleep +161common sched_rr_get_interval sys_sched_rr_get_interval_time32 +162common nanosleep sys_nanosleep_time32 163common mremap sys_mremap 164common setresuid sys_setresuid16 165common getresuid sys_getresuid16 @@ -190,7 +190,7 @@ 174common rt_sigactionsys_rt_sigaction 175common rt_sigprocmask sys_rt_sigprocmask 176common rt_sigpending sys_rt_sigpending -177
[PATCH v2 19/29] time: Add struct __kernel_timex
From: Deepa Dinamani struct timex uses struct timeval internally. struct timeval is not y2038 safe. Introduce a new UAPI type struct __kernel_timex that is y2038 safe. struct __kernel_timex uses a timeval type that is similar to struct __kernel_timespec which preserves the same structure size across 32 bit and 64 bit ABIs. struct __kernel_timex also restructures other members of the structure to make the structure the same on 64 bit and 32 bit architectures. Note that struct __kernel_timex is the same as struct timex on a 64 bit architecture. The above solution is similar to other new y2038 syscalls that are being introduced: both 32 bit and 64 bit ABIs have a common entry, and the compat entry supports the old 32 bit syscall interface. Alternatives considered were: 1. Add new time type to struct timex that makes use of padded bits. This time type could be based on the struct __kernel_timespec. modes will use a flag to notify which time structure should be used internally. This needs some application level changes on both 64 bit and 32 bit architectures. Although 64 bit machines could continue to use the older timeval structure without any changes. 2. Add a new u8 type to struct timex that makes use of padded bits. This can be used to save higher order tv_sec bits. modes will use a flag to notify presence of such a type. This will need some application level changes on 32 bit architectures. 3. Add a new compat_timex structure that differs in only the size of the time type; keep rest of struct timex the same. This requires extra syscalls to manage all 3 cases on 64 bit architectures. This will not need any application level changes but will add more complexity from kernel side. Signed-off-by: Deepa Dinamani --- include/linux/timex.h | 7 +++ include/uapi/linux/timex.h | 41 ++ 2 files changed, 48 insertions(+) diff --git a/include/linux/timex.h b/include/linux/timex.h index 39c25dbebfe8..7f40e9e42ecc 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,6 +53,13 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H +/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path + * and 32-bit emulation. + */ +#ifndef CONFIG_64BIT_TIME +#define __kernel_timex timex +#endif + #include #define ADJ_ADJTIME0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index 92685d826444..a1c6b73016a5 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -92,6 +92,47 @@ struct timex { int :32; int :32; int :32; }; +struct __kernel_timex_timeval { + __kernel_time64_t tv_sec; + long long tv_usec; +}; + +#ifndef __kernel_timex +struct __kernel_timex { + unsigned int modes; /* mode selector */ + int :32;/* pad */ + long long offset; /* time offset (usec) */ + long long freq; /* frequency offset (scaled ppm) */ + long long maxerror;/* maximum error (usec) */ + long long esterror;/* estimated error (usec) */ + int status; /* clock command/status */ + int :32;/* pad */ + long long constant;/* pll time constant */ + long long precision;/* clock precision (usec) (read only) */ + long long tolerance;/* clock frequency tolerance (ppm) + * (read only) + */ + struct __kernel_timex_timeval time; /* (read only, except for ADJ_SETOFFSET) */ + long long tick; /* (modified) usecs between clock ticks */ + + long long ppsfreq;/* pps frequency (scaled ppm) (ro) */ + long long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + int :32;/* pad */ + long long stabil;/* pps stability (scaled ppm) (ro) */ + long long jitcnt; /* jitter limit exceeded (ro) */ + long long calcnt; /* calibration intervals (ro) */ + long long errcnt; /* calibration errors (ro) */ + long long stbcnt; /* stability limit exceeded (ro) */ + + int tai;/* TAI offset (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; +}; +#endif + /* * Mode codes (timex.mode) */ -- 2.20.0
[PATCH v2 12/29] ipc: rename old-style shmctl/semctl/msgctl syscalls
The behavior of these system calls is slightly different between architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION symbol. Most architectures that implement the split IPC syscalls don't set that symbol and only get the modern version, but alpha, arm, microblaze, mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag. For the architectures that so far only implement sys_ipc(), i.e. m68k, mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior when adding the split syscalls, so we need to distinguish between the two groups of architectures. The method I picked for this distinction is to have a separate system call entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl() does not. The system call tables of the five architectures are changed accordingly. As an additional benefit, we no longer need the configuration specific definition for ipc_parse_version(), it always does the same thing now, but simply won't get called on architectures with the modern interface. A small downside is that on architectures that do set ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points that are never called. They only add a few bytes of bloat, so it seems better to keep them compared to adding yet another Kconfig symbol. I considered adding new syscall numbers for the IPC_64 variants for consistency, but decided against that for now. Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 6 ++-- arch/arm/tools/syscall.tbl | 6 ++-- arch/arm64/include/asm/unistd32.h | 6 ++-- arch/microblaze/kernel/syscalls/syscall.tbl | 6 ++-- arch/mips/kernel/syscalls/syscall_n32.tbl | 6 ++-- arch/mips/kernel/syscalls/syscall_n64.tbl | 6 ++-- arch/xtensa/kernel/syscalls/syscall.tbl | 6 ++-- include/linux/syscalls.h| 3 ++ ipc/msg.c | 39 ipc/sem.c | 39 ipc/shm.c | 40 + ipc/syscall.c | 12 +++ ipc/util.h | 21 --- kernel/sys_ni.c | 3 ++ 14 files changed, 137 insertions(+), 62 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index f920b65e8c49..b0e247287908 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -174,17 +174,17 @@ 187common osf_alt_sigpending sys_ni_syscall 188common osf_alt_setsid sys_ni_syscall 199common osf_swapon sys_swapon -200common msgctl sys_msgctl +200common msgctl sys_old_msgctl 201common msgget sys_msgget 202common msgrcv sys_msgrcv 203common msgsnd sys_msgsnd -204common semctl sys_semctl +204common semctl sys_old_semctl 205common semget sys_semget 206common semop sys_semop 207common osf_utsname sys_osf_utsname 208common lchown sys_lchown 209common shmat sys_shmat -210common shmctl sys_shmctl +210common shmctl sys_old_shmctl 211common shmdt sys_shmdt 212common shmget sys_shmget 213common osf_mvalid sys_ni_syscall diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 20ed7e026723..b54b7f2bc24a 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -314,15 +314,15 @@ 297common recvmsg sys_recvmsg 298common semop sys_semop sys_oabi_semop 299common semget sys_semget -300common semctl sys_semctl +300common semctl sys_old_semctl 301common msgsnd sys_msgsnd 302common msgrcv sys_msgrcv 303common msgget sys_msgget -304common msgctl sys_msgctl +304common msgctl sys_old_msgctl 305common shmat sys_shmat 306common shmdt sys_shmdt 307common shmget sys_shmget -308common shmctl sys_shmctl +308common shmctl sys_old_shmctl 309common add_key sys_add_key 310common request_key sys_request_key 311common keyctl sys_keyctl diff --git a/arch/arm64/include/asm/unistd32
[PATCH v2 05/29] alpha: update syscall macro definitions
Other architectures commonly use __NR_umount2 for sys_umount, only ia64 and alpha use __NR_umount here. In order to synchronize the generated tables, use umount2 like everyone else, and add back the old name from asm/unistd.h for compatibility. For shmat, alpha uses the osf_shmat name, we can do the same thing here, which means we don't have to add an entry in the __IGNORE list now that shmat is mandatory everywhere alarm, creat, pause, time, and utime are optional everywhere these days, no need to list them here any more. I considered also adding the regular versions of the get*id system calls that have different names and calling conventions on alpha, which would further help unify the syscall ABI, but for now I decided against that. Signed-off-by: Arnd Bergmann --- arch/alpha/include/asm/unistd.h| 6 -- arch/alpha/include/uapi/asm/unistd.h | 5 + arch/alpha/kernel/syscalls/syscall.tbl | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 21b706a5b772..564ba87bdc38 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -22,18 +22,12 @@ /* * Ignore legacy syscalls that we don't use. */ -#define __IGNORE_alarm -#define __IGNORE_creat #define __IGNORE_getegid #define __IGNORE_geteuid #define __IGNORE_getgid #define __IGNORE_getpid #define __IGNORE_getppid #define __IGNORE_getuid -#define __IGNORE_pause -#define __IGNORE_time -#define __IGNORE_utime -#define __IGNORE_umount2 /* Alpha doesn't have protection keys. */ #define __IGNORE_pkey_mprotect diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index 9ba724f116f1..4507071f995f 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -2,6 +2,11 @@ #ifndef _UAPI_ALPHA_UNISTD_H #define _UAPI_ALPHA_UNISTD_H +/* These are traditionally the names linux-alpha uses for + * the two otherwise generic system calls */ +#define __NR_umount__NR_umount2 +#define __NR_osf_shmat __NR_shmat + #include #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index e09558edae73..f920b65e8c49 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -29,7 +29,7 @@ 19 common lseek sys_lseek 20 common getxpid sys_getxpid 21 common osf_mount sys_osf_mount -22 common umount sys_umount +22 common umount2 sys_umount 23 common setuid sys_setuid 24 common getxuid sys_getxuid 25 common exec_with_loadersys_ni_syscall @@ -183,7 +183,7 @@ 206common semop sys_semop 207common osf_utsname sys_osf_utsname 208common lchown sys_lchown -209common osf_shmat sys_shmat +209common shmat sys_shmat 210common shmctl sys_shmctl 211common shmdt sys_shmdt 212common shmget sys_shmget -- 2.20.0
[PATCH v2 20/29] time: fix sys_timer_settime prototype
A small typo has crept into the y2038 conversion of the timer_settime system call. So far this was completely harmless, but once we start using the new version, this has to be fixed. Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec") Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 938d8908b9e0..baa4b70b02d3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id, asmlinkage long sys_timer_getoverrun(timer_t timer_id); asmlinkage long sys_timer_settime(timer_t timer_id, int flags, const struct __kernel_itimerspec __user *new_setting, - struct itimerspec __user *old_setting); + struct __kernel_itimerspec __user *old_setting); asmlinkage long sys_timer_delete(timer_t timer_id); asmlinkage long sys_clock_settime(clockid_t which_clock, const struct __kernel_timespec __user *tp); -- 2.20.0
[PATCH v2 22/29] timex: use __kernel_timex internally
From: Deepa Dinamani struct timex is not y2038 safe. Replace all uses of timex with y2038 safe __kernel_timex. Note that struct __kernel_timex is an ABI interface definition. We could define a new structure based on __kernel_timex that is only available internally instead. Right now, there isn't a strong motivation for this as the structure is isolated to a few defined struct timex interfaces and such a structure would be exactly the same as struct timex. The patch was generated by the following coccinelle script: virtual patch @depends on patch forall@ identifier ts; expression e; @@ ( - struct timex ts; + struct __kernel_timex ts; | - struct timex ts = {}; + struct __kernel_timex ts = {}; | - struct timex ts = e; + struct __kernel_timex ts = e; | - struct timex *ts; + struct __kernel_timex *ts; | (memset \| copy_from_user \| copy_to_user \)(..., - sizeof(struct timex)) + sizeof(struct __kernel_timex)) ) @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts, + struct __kernel_timex *ts, ...) { ... } @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts) { + struct __kernel_timex *ts) { ... } Signed-off-by: Deepa Dinamani Cc: linux-alpha@vger.kernel.org Cc: net...@vger.kernel.org --- arch/alpha/kernel/osf_sys.c | 5 +++-- arch/sparc/kernel/sys_sparc_64.c | 4 ++-- drivers/ptp/ptp_clock.c | 2 +- include/linux/posix-clock.h | 2 +- include/linux/time32.h | 6 +++--- include/linux/timex.h| 4 ++-- kernel/time/ntp.c| 18 ++ kernel/time/ntp_internal.h | 2 +- kernel/time/posix-clock.c| 2 +- kernel/time/posix-timers.c | 8 kernel/time/posix-timers.h | 2 +- kernel/time/time.c | 14 +++--- kernel/time/timekeeping.c| 4 ++-- 13 files changed, 38 insertions(+), 35 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 792586038808..bf497b8b0ec6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1253,7 +1253,7 @@ struct timex32 { SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) { -struct timex txc; + struct __kernel_timex txc; int ret; /* copy relevant bits of struct timex. */ @@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) if (copy_to_user(txc_p, , offsetof(struct timex32, time)) || (copy_to_user(_p->tick, , sizeof(struct timex32) - offsetof(struct timex32, tick))) || - (put_tv_to_tv32(_p->time, ))) + (put_user(txc.time.tv_sec, _p->time.tv_sec)) || + (put_user(txc.time.tv_usec, _p->time.tv_usec))) return -EFAULT; return ret; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 37de18a11207..9825ca6a6020 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -548,7 +548,7 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *) + struct __kernel_timex *kt = (void *) int ret; /* Copy the user data space into the kernel copy @@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *) + struct __kernel_timex *kt = (void *) int ret; if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 48f3594a7458..79bd102c9bbc 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) return err; } -static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) +static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops; diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 3a3bc71017d5..18674d7d5b1c 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -51,7 +51,7 @@ struct posix_clock; struct posix_clock_operations { struct module *owner; - int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); + int (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex *tx); int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); diff --git a/include/linux/time32.h b/include/linux/time32.h index 820a22e2b98b..0a1f302a1753 100644 ---
[PATCH v2 00/29] y2038: add time64 syscalls
This is a minor update of the patches I posted last week, I would like to add this into linux-next now, but would still do changes if there are concerns about the contents. The first version did not see a lot of replies, which could mean that either everyone is happy with it, or that it was largely ignored. See also the article at https://lwn.net/Articles/776435/. Changes since v1: - posting as a combined series for simplicity - dropped one mips patch that was merged as a 5.0 fix - reworked s390 compat syscall handling (posted separately) and rebased on top of that series - minor fixes for arm64 and powerpc - added alpha statfs64 interfaces - added alpha get{eg,eu,g,p,u,pp}id() Arnd v1 description for cleanup: The system call tables have diverged a bit over the years, and a number of the recent additions never made it into all architectures, for one reason or another. This is an attempt to clean it up as far as we can without breaking compatibility, doing a number of steps: - Add system calls that have not yet been integrated into all architectures but that we definitely want there. - Add the separate ipc syscalls on all architectures that traditionally only had sys_ipc(). This version is done without support for IPC_OLD that is we have in sys_ipc. The new semtimedop_time64 syscall will only be added here, not in sys_ipc - Add syscall numbers for a couple of syscalls that we probably don't need everywhere, in particular pkey_* and rseq, for the purpose of symmetry: if it's in asm-generic/unistd.h, it makes sense to have it everywhere. - Prepare for having the same system call numbers for any future calls. In combination with the generated tables, this hopefully makes it easier to add new calls across all architectures together. Most of the contents of this series are unrelated to the actual y2038 work, but for the moment, that second series is based on this one. If there are any concerns about changes here, I can drop or rewrite any individual patch in this series. My plan is to merge any patches in this series that are found to be good together with the y2038 patches for linux-5.1, so please review and provide Acks for merging through my tree, or pick them up for 5.0 if they seem urgent enough. v1 description for y2038 patches: This series finally gets us to the point of having system calls with 64-bit time_t on all architectures, after a long time of incremental preparation patches. There was actually one conversion that I missed during the summer, i.e. Deepa's timex series, which I now updated based the 5.0-rc1 changes and review comments. I hope that the actual conversion should be uncontroversial by now, even if some of the patches are rather large. The one area that may need a little discussion is for the system call numbers assigned in the final patch: Can we get consensus on whether the idea of using the same numbers on all architectures, as well as my choice of numbers makes sense here? So far, I have done a lot of build testing across most architectures, which has found a number of bugs. I have also done an LTP run on arm32 with existing user space, but not on the other architectures. I did LTP tests with a modified musl libc[2] last summer on an older version of this series to make sure that the new 64-bit time_t interfaces work. The version there will need updates for testing with this new kernel patch series; I plan to do that next. For testing, the series plus the preparatory patches is available at [3]. Once there is a general agreement on this series and I have done more tests for the new system calls, I plan to add this to linux-next through my asm-generic tree or Thomas' timers tree. Please review and test! Arnd [1] https://lore.kernel.org/lkml/20190110162435.309262-1-a...@arndb.de/T/ [2] https://git.linaro.org/people/arnd/musl-y2038.git/ [3] https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git y2038-5.0-rc1 Arnd Bergmann (26): ia64: add __NR_umount2 definition ia64: add statx and io_pgetevents syscalls ia64: assign syscall numbers for perf and seccomp alpha: wire up io_pgetevents system call alpha: update syscall macro definitions ARM: add migrate_pages() system call ARM: add kexec_file_load system call number m68k: assign syscall number for seccomp sh: remove duplicate unistd_32.h file sh: add statx system call sparc64: fix sparc_ipc type conversion ipc: rename old-style shmctl/semctl/msgctl syscalls arch: add split IPC system calls where needed arch: add pkey and rseq syscall numbers everywhere alpha: add standard statfs64/fstatfs64 syscalls alpha: add generic get{eg,eu,g,p,u,pp}id() syscalls syscalls: remove obsolete __IGNORE_ macros time: make adjtime compat handling available for 32 bit time: fix sys_timer_settime prototype sparc64: add custom adjtimex/clock_adjtime functions x86/x32: use time64 versions of sigtimedwait and recvmmsg y2038: syscalls: rename
[PATCH v2 24/29] x86/x32: use time64 versions of sigtimedwait and recvmmsg
x32 has always followed the time64 calling conventions of these syscalls, which required a special hack in compat_get_timespec aka get_old_timespec32 to continue working. Since we now have the time64 syscalls, use those explicitly. Signed-off-by: Arnd Bergmann --- arch/x86/entry/syscalls/syscall_64.tbl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f0b1709a5ffb..43a622aec07e 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -361,7 +361,7 @@ 520x32 execve __x32_compat_sys_execve/ptregs 521x32 ptrace __x32_compat_sys_ptrace 522x32 rt_sigpending __x32_compat_sys_rt_sigpending -523x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait +523x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64 524x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo 525x32 sigaltstack __x32_compat_sys_sigaltstack 526x32 timer_create__x32_compat_sys_timer_create @@ -375,7 +375,7 @@ 534x32 preadv __x32_compat_sys_preadv64 535x32 pwritev __x32_compat_sys_pwritev64 536x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo -537x32 recvmmsg__x32_compat_sys_recvmmsg +537x32 recvmmsg__x32_compat_sys_recvmmsg_time64 538x32 sendmmsg__x32_compat_sys_sendmmsg 539x32 process_vm_readv__x32_compat_sys_process_vm_readv 540x32 process_vm_writev __x32_compat_sys_process_vm_writev -- 2.20.0
[PATCH v2 11/29] sparc64: fix sparc_ipc type conversion
__kernel_timespec and timespec are currently the same type, but once they are different, the type cast has to be changed here. Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 274ed0b9b3e0..1c079e7bab09 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -344,7 +344,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second goto out; case SEMTIMEDOP: err = sys_semtimedop(first, ptr, (unsigned int)second, - (const struct timespec __user *) + (const struct __kernel_timespec __user *) (unsigned long) fifth); goto out; case SEMGET: -- 2.20.0
Re: [PATCH 14/15] arch: add split IPC system calls where needed
On Tue, Jan 15, 2019 at 5:36 PM Geert Uytterhoeven wrote: > On Tue, Jan 15, 2019 at 4:19 PM Arnd Bergmann wrote: > > On Tue, Jan 15, 2019 at 4:01 PM Arnd Bergmann wrote: > > > On Mon, Jan 14, 2019 at 4:59 AM Michael Ellerman > > > wrote: > > > > Arnd Bergmann writes: > > > > > arch/m68k/kernel/syscalls/syscall.tbl | 11 +++ > > > > > arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++ > > > > > arch/powerpc/kernel/syscalls/syscall.tbl | 12 > > > > > > > > I have some changes I'd like to make to our syscall table that will > > > > clash with this. > > > > > > > > I'll try and send them today. > > > > > > Ok. Are those for 5.0 or 5.1? If they are intended for 5.0, it would be > > > nice for me to have a branch based on 5.0-rc1 that I can put > > > the other patches on top of. > > > > There is also another change that I considered: > > > > At the end of my series, we have a lot of entries like > > > > 245 32 clock_settime sys_clock_settime32 > > 245 64 clock_settime sys_clock_settime > > 245 spu clock_settime sys_clock_settime > > > > which could be folded into > > > > 245 32 clock_settime sys_clock_settime32 > > 245 spu64 clock_settime sys_clock_settime > > > > if we just add another option to the ABI field. Any thoughts on > > that? > > So "spu64" would mean "spu + 64"? > That makes it more difficult to read, and to grep. > What about allowing multiple ABIs, separated by commas? > So that line would become: > > 245 spu,64 clock_settime sys_clock_settime I agree that would be a nice representation, but doing this would again require changing the script, which then in turn clashes with Firoz' patches to unify it under the scripts/ directory. Arnd
Re: [PATCH 14/15] arch: add split IPC system calls where needed
On Tue, Jan 15, 2019 at 4:01 PM Arnd Bergmann wrote: > > On Mon, Jan 14, 2019 at 4:59 AM Michael Ellerman wrote: > > Arnd Bergmann writes: > > > arch/m68k/kernel/syscalls/syscall.tbl | 11 +++ > > > arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++ > > > arch/powerpc/kernel/syscalls/syscall.tbl | 12 > > > > I have some changes I'd like to make to our syscall table that will > > clash with this. > > > > I'll try and send them today. > > Ok. Are those for 5.0 or 5.1? If they are intended for 5.0, it would be > nice for me to have a branch based on 5.0-rc1 that I can put > the other patches on top of. There is also another change that I considered: At the end of my series, we have a lot of entries like 245 32 clock_settime sys_clock_settime32 245 64 clock_settime sys_clock_settime 245 spu clock_settime sys_clock_settime which could be folded into 245 32 clock_settime sys_clock_settime32 245 spu64 clock_settime sys_clock_settime if we just add another option to the ABI field. Any thoughts on that? Arnd
Re: [PATCH 14/15] arch: add split IPC system calls where needed
On Mon, Jan 14, 2019 at 4:59 AM Michael Ellerman wrote: > Arnd Bergmann writes: > > arch/m68k/kernel/syscalls/syscall.tbl | 11 +++ > > arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++ > > arch/powerpc/kernel/syscalls/syscall.tbl | 12 > > I have some changes I'd like to make to our syscall table that will > clash with this. > > I'll try and send them today. Ok. Are those for 5.0 or 5.1? If they are intended for 5.0, it would be nice for me to have a branch based on 5.0-rc1 that I can put the other patches on top of. > > diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl > > b/arch/powerpc/kernel/syscalls/syscall.tbl > > index db3bbb8744af..1bffab54ff35 100644 > > --- a/arch/powerpc/kernel/syscalls/syscall.tbl > > +++ b/arch/powerpc/kernel/syscalls/syscall.tbl > > @@ -425,3 +425,15 @@ > > 386 nospu pkey_mprotect sys_pkey_mprotect > > 387 nospu rseqsys_rseq > > 388 nospu io_pgetevents sys_io_pgetevents > > compat_sys_io_pgetevents > > +# room for arch specific syscalls > > +392 64 semtimedop sys_semtimedop > > +393 common semget sys_semget > > +394 common semctl sys_semctl > > compat_sys_semctl > > +395 common shmget sys_shmget > > +396 common shmctl sys_shmctl > > compat_sys_shmctl > > +397 common shmat sys_shmat > > compat_sys_shmat > > +398 common shmdt sys_shmdt > > +399 common msgget sys_msgget > > +400 common msgsnd sys_msgsnd > > compat_sys_msgsnd > > +401 common msgrcv sys_msgrcv > > compat_sys_msgrcv > > +402 common msgctl sys_msgctl > > compat_sys_msgctl > > We already have a gap at 366-377 from when we tried to add the split IPC > calls a few years back. > > I guess I don't mind leaving that gap and using the common numbers as > you've done here. > > But it would be good to add a comment pointing out that we have room > at 366 for more arch specific syscalls as well. Ah, I missed that. I've added this to my patch now: index 5c0936d862fc..2ddfba536d5f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -460,6 +460,7 @@ 363spu switch_endian sys_ni_syscall 364common userfaultfd sys_userfaultfd 365common membarrier sys_membarrier +# 366-377 originally left for IPC, now unused 378nospu mlock2 sys_mlock2 379nospu copy_file_range sys_copy_file_range 380common preadv2 sys_preadv2 compat_sys_preadv2 Arnd
Re: [PATCH 15/15] arch: add pkey and rseq syscall numbers everywhere
On Tue, Jan 15, 2019 at 12:52 PM Russell King - ARM Linux admin wrote: > > On Thu, Jan 10, 2019 at 05:24:35PM +0100, Arnd Bergmann wrote: > > Most architectures define system call numbers for the rseq and pkey system > > calls, even when they don't support the features, and perhaps never will. > > > > Only a few architectures are missing these, so just define them anyway > > for consistency. If we decide to add them later to one of these, the > > system call numbers won't get out of sync then. > > I was lambasted for adding the pkey syscalls for 32-bit ARM in 2016, > which will probably never support it. Why has the attitude towards > this kind of thing now apparently become acceptable? I was (and still am) a bit unsure about this one. A number of architectures added the numbers that won't ever support them, but I wasn't sure if any of those that didn't add them might need it later. I tried to just go by the rule that anything that we list in asm-generic/unistd.h is probably important enough that we want to list it everywhere, even if that includes a couple that we end up being rather architecture specific. I'm happy to drop this patch if you or others feel that we're better off without it though. Arnd
Re: [PATCH 14/15] arch: add split IPC system calls where needed
On Thu, Jan 10, 2019 at 9:33 PM Heiko Carstens wrote: > On Thu, Jan 10, 2019 at 05:24:34PM +0100, Arnd Bergmann wrote: > > diff --git a/arch/s390/kernel/syscalls/syscall.tbl > > b/arch/s390/kernel/syscalls/syscall.tbl > > index 022fc099b628..428cf512a757 100644 > > --- a/arch/s390/kernel/syscalls/syscall.tbl > > +++ b/arch/s390/kernel/syscalls/syscall.tbl > > @@ -391,3 +391,15 @@ > > 381 common kexec_file_load sys_kexec_file_load > > compat_sys_kexec_file_load > > 382 common io_pgetevents sys_io_pgetevents > > compat_sys_io_pgetevents > > 383 common rseqsys_rseq > > compat_sys_rseq > > +# room for arch specific syscalls > > +392 64 semtimedop sys_semtimedop - > > +393 common semget sys_semget > > sys_semget > ... > > +395 common shmget sys_shmget > > sys_shmget > ... > > +398 common shmdt sys_shmdt > > sys_shmdt > > +399 common msgget sys_msgget > > sys_msgget > > These four need compat system call wrappers, unfortunately... (well, > actually only shmget and shmdt require them, but let's add them for > all four). See arch/s390/kernel/compat_wrapper.c > > I'm afraid this compat special handling will be even more annoying in > the future, since s390 will be the only architecture which requires > this special handling. > > _Maybe_ it would make sense to automatically generate a weak compat > system call wrapper for s390 with the SYSCALL_DEFINE macros, but that > probably won't work in all cases. For some reason I was under the impression that s390 already did that. However, it seems that x86 does, so I'll try to convert the x86 version for s390, and see if I can get rid of all the wrappers that way. It would certainly be safer to have the wrappers always present, especially if we expect future system calls to be added to the s390 table by whoever implements the syscall itself. Arnd
Re: [PATCH 15/15] arch: add pkey and rseq syscall numbers everywhere
On Thu, Jan 10, 2019 at 9:36 PM Heiko Carstens wrote: > On Thu, Jan 10, 2019 at 05:24:35PM +0100, Arnd Bergmann wrote: > Since you only need/want the system call numbers, could you please > change these lines to: > > > +384 common pkey_alloc - - > > +385 common pkey_free - - > > +386 common pkey_mprotect - - > > Otherwise it _looks_ like we would need compat wrappers here as well, > even though all of them would just jump to sys_ni_syscall() in this > case. Making this explicit seems to better. Ok, fair enough. I considered doing this originally and then decided against it for consistency with the asm-generic file, but I don't care much either way. Is this something you may want to add later? I'm not sure exactly how pkey compares to s390 storage keys, or if this is something completely unrelated. Arnd
Re: [PATCH 00/15] arch: synchronize syscall tables in preparation for y2038
On Thu, Jan 10, 2019 at 7:11 PM Geert Uytterhoeven wrote: > On Thu, Jan 10, 2019 at 6:06 PM Arnd Bergmann wrote: > > On Thu, Jan 10, 2019 at 5:59 PM Geert Uytterhoeven > > wrote: > > > On Thu, Jan 10, 2019 at 5:26 PM Arnd Bergmann wrote: > > > > The system call tables have diverged a bit over the years, and a number > > > > of the recent additions never made it into all architectures, for one > > > > reason or another. > > > > > > > > This is an attempt to clean it up as far as we can without breaking > > > > compatibility, doing a number of steps: > > > > > > Thanks a lot! > > > > > > > - Add system calls that have not yet been integrated into all > > > > architectures but that we definitely want there. > > > > > > It looks like you missed wiring up io_pgetevents() on m68k. > > > Is that intentional? > > > > Yes, I thought I had described that somewhere but maybe I > > forgot: semtimedop() and io_pgetevents() get replaced with > > time64 versions in the follow-up, so I only added them in > > 64-bit architectures. If you think we should have both > > io_pgetevents() and io_pgetevents_time32() on all 32-bit > > architectures, I can add that as well. > > Thanks, sounds fine to me. Just to be sure, you mean it's fine to not add it, not that we should add it? Arnd
Re: [PATCH 00/15] arch: synchronize syscall tables in preparation for y2038
On Thu, Jan 10, 2019 at 7:10 PM Joseph Myers wrote: > > On Thu, 10 Jan 2019, Arnd Bergmann wrote: > > > - Add system calls that have not yet been integrated into all > > architectures but that we definitely want there. > > glibc has a note that alpha lacks statfs64, any plans for that? Good catch, I missed that because all other 64-bit architectures have a statfs() call with 64-bit fields. I see that it also has an osf_statfs64 structure and system call with lots of padding and some oddly sized fields: f_type, f_flags and f_namemax are only 16 bits wide, the rest is all 64-bit. Adding the regular statfs64() should be easy enough, we just need to decide which layout to use: a) use the currently unused 'struct statfs64' as provided by the alpha uapi headers, which has a 32-bit __statfs_word but 64-bit f_blocks, f_bfree, f_bavail, f_files, and f_ffree. b) copy asm-generic/statfs.h to the alpha asm/statfs.h and change statfs64 to have the regular layout that we use on all other 64-bit architectures, using all 64-bit fields. The other open question for alpha (as mentioned in one of the patches I sent) would be whether to add get{eg,eu,g,p,pp,u}id() with the regular calling conventions. Arnd
[PATCH 00/11] y2038: add time64 syscalls
This series finally gets us to the point of having system calls with 64-bit time_t on all architectures, after a long time of incremental preparation patches. There was actually one conversion that I missed during the summer, i.e. Deepa's timex series, which I now updated based the 5.0-rc1 changes and review comments. I hope that the actual conversion should be uncontroversial by now, even if some of the patches are rather large. The one area that may need a little discussion is for the system call numbers assigned in the final patch: Can we get consensus on whether the idea of using the same numbers on all architectures, as well as my choice of numbers makes sense here? So far, I have done a lot of build testing across most architectures, which has found a number of bugs. I have also done an LTP run on arm32 with existing user space, but not on the other architectures. I did LTP tests with a modified musl libc[2] last summer on an older version of this series to make sure that the new 64-bit time_t interfaces work. The version there will need updates for testing with this new kernel patch series; I plan to do that next. For testing, the series plus the preparatory patches is available at [3]. Once there is a general agreement on this series and I have done more tests for the new system calls, I plan to add this to linux-next through my asm-generic tree or Thomas' timers tree. Please review and test! Arnd [1] https://lore.kernel.org/lkml/20190110162435.309262-1-a...@arndb.de/T/ [2] https://git.linaro.org/people/arnd/musl-y2038.git/ [3] https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git y2038-5.0-rc1 Arnd Bergmann (8): time: make adjtime compat handling available for 32 bit time: fix sys_timer_settime prototype sparc64: add custom adjtimex/clock_adjtime functions y2038: syscalls: rename y2038 compat syscalls y2038: use time32 syscall names on 32-bit y2038: remove struct definition redirects y2038: rename old time and utime syscalls y2038: add 64-bit time_t syscalls to all 32-bit architectures Deepa Dinamani (3): time: Add struct __kernel_timex timex: use __kernel_timex internally timex: change syscalls to use struct __kernel_timex arch/Kconfig| 2 +- arch/alpha/kernel/osf_sys.c | 5 +- arch/alpha/kernel/syscalls/syscall.tbl | 2 + arch/arm/include/asm/unistd.h | 4 +- arch/arm/kernel/sys_oabi-compat.c | 8 +- arch/arm/tools/syscall.tbl | 77 - arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 89 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/include/asm/unistd.h | 4 +- arch/m68k/kernel/syscalls/syscall.tbl | 72 +++- arch/microblaze/include/asm/unistd.h| 4 +- arch/microblaze/kernel/syscalls/syscall.tbl | 77 - arch/mips/include/asm/unistd.h | 4 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 71 arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 74 +++- arch/parisc/include/asm/unistd.h| 9 +- arch/parisc/kernel/syscalls/syscall.tbl | 105 - arch/powerpc/include/asm/unistd.h | 8 +- arch/powerpc/kernel/syscalls/syscall.tbl| 121 +++- arch/s390/include/asm/unistd.h | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 72 +++- arch/sh/include/asm/unistd.h| 4 +- arch/sh/kernel/syscalls/syscall.tbl | 72 +++- arch/sparc/include/asm/unistd.h | 8 +- arch/sparc/kernel/sys_sparc_64.c| 59 +- arch/sparc/kernel/syscalls/syscall.tbl | 100 +++- arch/x86/entry/syscalls/syscall_32.tbl | 74 +++- arch/x86/entry/syscalls/syscall_64.tbl | 4 +- arch/x86/include/asm/unistd.h | 8 +- arch/xtensa/include/asm/unistd.h| 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 71 drivers/ptp/ptp_clock.c | 2 +- fs/aio.c| 10 +- fs/select.c | 4 +- fs/timerfd.c| 4 +- fs/utimes.c | 10 +- include/linux/compat.h | 104 + include/linux/posix-clock.h | 2 +- include/linux/syscalls.h| 65 ++- include/linux/time32.h | 32 +- include/linux/time64.h | 8 -- include/linux/timex.h | 4 +- include/uapi/asm-generic/unistd.h | 103 - include/uapi/linux/time.h | 4 - include/uapi/linux/timex.h | 39 +++ ipc/mqueue.c
[PATCH 10/11] y2038: rename old time and utime syscalls
The time, stime, utime, utimes, and futimesat system calls are only used on older architectures, and we do not provide y2038 safe variants of them, as they are replaced by clock_gettime64, clock_settime64, and utimensat_time64. However, for consistency it seems better to have the 32-bit architectures that still use them call the "time32" entry points (leaving the traditional handlers for the 64-bit architectures), like we do for system calls that now require two versions. Note: We used to always define __ARCH_WANT_SYS_TIME and __ARCH_WANT_SYS_UTIME and only set __ARCH_WANT_COMPAT_SYS_TIME and __ARCH_WANT_SYS_UTIME32 for compat mode on 64-bit kernels. Now this is reversed: only 64-bit architectures set __ARCH_WANT_SYS_TIME/UTIME, while we need __ARCH_WANT_SYS_TIME32/UTIME32 for 32-bit architectures and compat mode. The resulting asm/unistd.h changes look a bit counterintuitive. This is only a cleanup patch and it should not change any behavior. Signed-off-by: Arnd Bergmann --- arch/arm/include/asm/unistd.h | 4 ++-- arch/arm/tools/syscall.tbl | 10 +- arch/m68k/include/asm/unistd.h | 4 ++-- arch/m68k/kernel/syscalls/syscall.tbl | 10 +- arch/microblaze/include/asm/unistd.h| 4 ++-- arch/microblaze/kernel/syscalls/syscall.tbl | 10 +- arch/mips/include/asm/unistd.h | 4 ++-- arch/mips/kernel/syscalls/syscall_o32.tbl | 10 +- arch/parisc/include/asm/unistd.h| 9 ++--- arch/parisc/kernel/syscalls/syscall.tbl | 15 ++- arch/powerpc/include/asm/unistd.h | 8 arch/powerpc/kernel/syscalls/syscall.tbl| 19 ++- arch/s390/include/asm/unistd.h | 2 +- arch/sh/include/asm/unistd.h| 4 ++-- arch/sh/kernel/syscalls/syscall.tbl | 10 +- arch/sparc/include/asm/unistd.h | 8 arch/sparc/kernel/syscalls/syscall.tbl | 14 +- arch/x86/entry/syscalls/syscall_32.tbl | 10 +- arch/x86/include/asm/unistd.h | 8 arch/xtensa/include/asm/unistd.h| 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 6 +++--- kernel/time/time.c | 4 ++-- 22 files changed, 98 insertions(+), 77 deletions(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index d713587dfcf4..7a39e77984ef 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -26,10 +26,10 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_OLD_MMAP #define __ARCH_WANT_SYS_OLD_SELECT -#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_UTIME32 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT) -#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_SYS_TIME32 #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_ALARM diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 200f4b878a46..a96d9b5ee04e 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -24,7 +24,7 @@ 10 common unlink sys_unlink 11 common execve sys_execve 12 common chdir sys_chdir -13 oabitimesys_time +13 oabitimesys_time32 14 common mknod sys_mknod 15 common chmod sys_chmod 16 common lchown sys_lchown16 @@ -36,12 +36,12 @@ 22 oabiumount sys_oldumount 23 common setuid sys_setuid16 24 common getuid sys_getuid16 -25 oabistime sys_stime +25 oabistime sys_stime32 26 common ptrace sys_ptrace 27 oabialarm sys_alarm # 28 was sys_fstat 29 common pause sys_pause -30 oabiutime sys_utime +30 oabiutime sys_utime32 # 31 was sys_stty # 32 was sys_gtty 33 common access sys_access @@ -283,7 +283,7 @@ 266common statfs64sys_statfs64_wrapper 267common fstatfs64 sys_fstatfs64_wrapper 268common tgkill sys_tgkill -269common utimes sys_utimes +269common utimes sys_utimes_time32 270common arm_fadvise64_64sys_arm_fadvise64_64 271common pciconfig_iobasesys_pciconfig_iobase 272common pciconfig_read sys_pciconfig_read @@ -340,7 +340,7 @@ 323common mkdirat sys_mkdirat 324common mknodat sys_mknodat 325common fchownatsys_fchownat -326common futimesat sys_futimesat +326common futimesat sys_futimesat_time32 327common
[PATCH 06/11] timex: change syscalls to use struct __kernel_timex
From: Deepa Dinamani struct timex is not y2038 safe. Switch all the syscall apis to use y2038 safe __kernel_timex. Note that sys_adjtimex() does not have a y2038 safe solution. C libraries can implement it by calling clock_adjtime(CLOCK_REALTIME, ...). Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 6 +++--- kernel/time/posix-timers.c | 2 +- kernel/time/time.c | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8e86d9623d4e..394e8db7e57e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct __sysctl_args; struct sysinfo; struct timespec; struct timeval; -struct timex; +struct __kernel_timex; struct timezone; struct tms; struct utimbuf; @@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); -asmlinkage long sys_adjtimex(struct timex __user *txc_p); +asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); /* kernel/timer.c */ asmlinkage long sys_getpid(void); @@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, - struct timex __user *tx); + struct __kernel_timex __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2d84b3db1ade..de79f85ae14f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) } SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, - struct timex __user *, utx) + struct __kernel_timex __user *, utx) { struct __kernel_timex ktx; int err; diff --git a/kernel/time/time.c b/kernel/time/time.c index d179d33f639a..78b5c8f1495a 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, } #endif -SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) +#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) +SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p) { struct __kernel_timex txc; /* Local copy of parameter */ int ret; @@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) ret = do_adjtimex(); return copy_to_user(txc_p, , sizeof(struct __kernel_timex)) ? -EFAULT : ret; } +#endif #ifdef CONFIG_COMPAT_32BIT_TIME int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp) -- 2.20.0
[PATCH 11/11] y2038: add 64-bit time_t syscalls to all 32-bit architectures
This adds 21 new system calls on each ABI that has 32-bit time_t today. All of these have the exact same semantics as their existing counterparts, and the new ones all have macro names that end in 'time64' for clarification. This gets us to the point of being able to safely use a C library that has 64-bit time_t in user space. There are still a couple of loose ends to tie up in various areas of the code, but this is the big one, and should be entirely uncontroversial at this point. In particular, there are four system calls (getitimer, setitimer, waitid, and getrusage) that don't have a 64-bit counterpart yet, but these can all be safely implemented in the C library by wrapping around the existing system calls because the 32-bit time_t they pass only counts elapsed time, not time since the epoch. They will be dealt with later. Signed-off-by: Arnd Bergmann --- The one point that still needs to be agreed on is the actual number assignment. Following the earlier patch that added the sysv IPC calls with common numbers where possible, I also tried the same here, using consistent numbers on all 32-bit architectures. There are a couple of minor issues with this: - On asm-generic, we now leave the numbers from 295 to 402 unassigned, which wastes a small amount of kernel .data segment. Originally I had asm-generic start at 300 and everyone else start at 400 here, which was also not perfect, and we have gone beyond 400 already, so I ended up just using the same numbers as the rest here. - Once we get to 512, we clash with the x32 numbers (unless we remove x32 support first), and probably have to skip a few more. I also considered using the 512..547 space for 32-bit-only calls (which never clash with x32), but that also seems to add a bit of complexity. - On alpha, we have already used up the space up to 527 (with a small hole between 261 and 299). We could sync up with that as well, but my feeling was that alpha syscalls are already special enough that I don't care. Let me know if you have other ideas. --- arch/alpha/kernel/syscalls/syscall.tbl | 2 + arch/arm/tools/syscall.tbl | 21 ++ arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 41 +++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 20 + arch/microblaze/kernel/syscalls/syscall.tbl | 21 ++ arch/mips/kernel/syscalls/syscall_n32.tbl | 21 ++ arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 20 + arch/parisc/kernel/syscalls/syscall.tbl | 21 ++ arch/powerpc/kernel/syscalls/syscall.tbl| 20 + arch/s390/kernel/syscalls/syscall.tbl | 20 + arch/sh/kernel/syscalls/syscall.tbl | 20 + arch/sparc/kernel/syscalls/syscall.tbl | 20 + arch/x86/entry/syscalls/syscall_32.tbl | 20 + arch/xtensa/kernel/syscalls/syscall.tbl | 21 ++ include/uapi/asm-generic/unistd.h | 45 - scripts/checksyscalls.sh| 40 ++ 19 files changed, 375 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 25b4a7e76943..04d96d042180 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -456,3 +456,5 @@ 525common pkey_free sys_pkey_free 526common pkey_mprotect sys_pkey_mprotect 527common rseqsys_rseq +# all other architectures have common numbers for new syscall, alpha +# is the exception. diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index a96d9b5ee04e..286afdc43283 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -416,3 +416,24 @@ 399common io_pgetevents sys_io_pgetevents_time32 400common migrate_pages sys_migrate_pages 401common kexec_file_load sys_kexec_file_load +# 402 is unused +403common clock_gettime64 sys_clock_gettime +404common clock_settime64 sys_clock_settime +405common clock_adjtime64 sys_clock_adjtime +406common clock_getres_time64 sys_clock_getres +407common clock_nanosleep_time64 sys_clock_nanosleep +408common timer_gettime64 sys_timer_gettime +409common timer_settime64 sys_timer_settime +410common timerfd_gettime64 sys_timerfd_gettime +411common timerfd_settime64 sys_timerfd_settime +412common utimensat_time64sys_utimensat +413common pselect6_time64 sys_pselect6 +414common ppoll_time64sys_ppoll +416common io_pgetevents_time64
[PATCH 09/11] y2038: remove struct definition redirects
We now use 64-bit time_t on all architectures, so the __kernel_timex, __kernel_timeval and __kernel_timespec redirects can be removed after having served their purpose. This makes it all much less confusing, as the __kernel_* types now always refer to the same layout based on 64-bit time_t across all 32-bit and 64-bit architectures. Signed-off-by: Arnd Bergmann --- include/linux/time64.h | 8 include/linux/timex.h | 7 --- include/uapi/linux/time.h | 4 include/uapi/linux/timex.h | 2 -- 4 files changed, 21 deletions(-) diff --git a/include/linux/time64.h b/include/linux/time64.h index 05634afba0db..f38d382ffec1 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -7,14 +7,6 @@ typedef __s64 time64_t; typedef __u64 timeu64_t; -/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path - * and 32-bit emulation. - */ -#ifndef CONFIG_64BIT_TIME -#define __kernel_timespec timespec -#define __kernel_itimerspec itimerspec -#endif - #include struct timespec64 { diff --git a/include/linux/timex.h b/include/linux/timex.h index 4aff9f0d1367..ce0859763670 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,13 +53,6 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H -/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path - * and 32-bit emulation. - */ -#ifndef CONFIG_64BIT_TIME -#define __kernel_timex timex -#endif - #include #define ADJ_ADJTIME0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 6b56a2208be7..b03f8717c312 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -42,19 +42,15 @@ struct itimerval { struct timeval it_value;/* current value */ }; -#ifndef __kernel_timespec struct __kernel_timespec { __kernel_time64_t tv_sec; /* seconds */ long long tv_nsec;/* nanoseconds */ }; -#endif -#ifndef __kernel_itimerspec struct __kernel_itimerspec { struct __kernel_timespec it_interval;/* timer period */ struct __kernel_timespec it_value; /* timer expiration */ }; -#endif /* * legacy timeval structure, only embedded in structures that diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index a1c6b73016a5..9f517f9010bb 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -97,7 +97,6 @@ struct __kernel_timex_timeval { long long tv_usec; }; -#ifndef __kernel_timex struct __kernel_timex { unsigned int modes; /* mode selector */ int :32;/* pad */ @@ -131,7 +130,6 @@ struct __kernel_timex { int :32; int :32; int :32; int :32; int :32; int :32; int :32; }; -#endif /* * Mode codes (timex.mode) -- 2.20.0
[PATCH 08/11] y2038: use time32 syscall names on 32-bit
This is the big flip, where all 32-bit architectures set COMPAT_32BIT_TIME abd use the _time32 system calls from the former compat layer instead of the system calls that take __kernel_timespec and similar arguments. The temporary redirects for __kernel_timespec, __kernel_itimerspec and __kernel_timex can get removed with this. It would be easy to split this commit by architecture, but with the new generated system call tables, it's easy enough to do it all at once, which makes it a little easier to check that the changes are the same in each table. Signed-off-by: Arnd Bergmann --- arch/Kconfig| 2 +- arch/arm/kernel/sys_oabi-compat.c | 8 +- arch/arm/tools/syscall.tbl | 46 ++-- arch/m68k/kernel/syscalls/syscall.tbl | 42 +-- arch/microblaze/kernel/syscalls/syscall.tbl | 46 ++-- arch/mips/kernel/syscalls/syscall_o32.tbl | 44 +-- arch/parisc/kernel/syscalls/syscall.tbl | 69 +++-- arch/powerpc/kernel/syscalls/syscall.tbl| 82 +++-- arch/sh/kernel/syscalls/syscall.tbl | 42 +-- arch/sparc/kernel/syscalls/syscall.tbl | 64 ++-- arch/x86/entry/syscalls/syscall_32.tbl | 44 +-- arch/xtensa/kernel/syscalls/syscall.tbl | 44 +-- include/uapi/asm-generic/unistd.h | 56 +++--- 13 files changed, 335 insertions(+), 254 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 4cfb6de48f79..46db715a7f42 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -759,7 +759,7 @@ config 64BIT_TIME handling. config COMPAT_32BIT_TIME - def_bool (!64BIT && 64BIT_TIME) || COMPAT + def_bool !64BIT || COMPAT help This enables 32 bit time_t support in addition to 64 bit time_t support. This is relevant on all 32-bit architectures, and 64-bit architectures diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 92ab36f38795..acd054a42ba2 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -317,10 +317,10 @@ struct oabi_sembuf { asmlinkage long sys_oabi_semtimedop(int semid, struct oabi_sembuf __user *tsops, unsigned nsops, - const struct timespec __user *timeout) + const struct old_timespec32 __user *timeout) { struct sembuf *sops; - struct timespec local_timeout; + struct old_timespec32 local_timeout; long err; int i; @@ -350,7 +350,7 @@ asmlinkage long sys_oabi_semtimedop(int semid, } else { mm_segment_t fs = get_fs(); set_fs(KERNEL_DS); - err = sys_semtimedop(semid, sops, nsops, timeout); + err = sys_semtimedop_time32(semid, sops, nsops, timeout); set_fs(fs); } kfree(sops); @@ -375,7 +375,7 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, return sys_oabi_semtimedop(first, (struct oabi_sembuf __user *)ptr, second, - (const struct timespec __user *)fifth); + (const struct old_timespec32 __user *)fifth); default: return sys_ipc(call, first, second, third, ptr, fifth); } diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b54b7f2bc24a..200f4b878a46 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -137,7 +137,7 @@ 121common setdomainname sys_setdomainname 122common uname sys_newuname # 123 was sys_modify_ldt -124common adjtimexsys_adjtimex +124common adjtimexsys_adjtimex_time32 125common mprotectsys_mprotect 126common sigprocmask sys_sigprocmask # 127 was sys_create_module @@ -174,8 +174,8 @@ 158common sched_yield sys_sched_yield 159common sched_get_priority_max sys_sched_get_priority_max 160common sched_get_priority_min sys_sched_get_priority_min -161common sched_rr_get_interval sys_sched_rr_get_interval -162common nanosleep sys_nanosleep +161common sched_rr_get_interval sys_sched_rr_get_interval_time32 +162common nanosleep sys_nanosleep_time32 163common mremap sys_mremap 164common setresuid sys_setresuid16 165common getresuid sys_getresuid16 @@ -190,7 +190,7 @@ 174common rt_sigactionsys_rt_sigaction 175common rt_sigprocmask sys_rt_sigprocmask 176common rt_sigpending sys_rt_sigpending -177
[PATCH 02/11] time: Add struct __kernel_timex
From: Deepa Dinamani struct timex uses struct timeval internally. struct timeval is not y2038 safe. Introduce a new UAPI type struct __kernel_timex that is y2038 safe. struct __kernel_timex uses a timeval type that is similar to struct __kernel_timespec which preserves the same structure size across 32 bit and 64 bit ABIs. struct __kernel_timex also restructures other members of the structure to make the structure the same on 64 bit and 32 bit architectures. Note that struct __kernel_timex is the same as struct timex on a 64 bit architecture. The above solution is similar to other new y2038 syscalls that are being introduced: both 32 bit and 64 bit ABIs have a common entry, and the compat entry supports the old 32 bit syscall interface. Alternatives considered were: 1. Add new time type to struct timex that makes use of padded bits. This time type could be based on the struct __kernel_timespec. modes will use a flag to notify which time structure should be used internally. This needs some application level changes on both 64 bit and 32 bit architectures. Although 64 bit machines could continue to use the older timeval structure without any changes. 2. Add a new u8 type to struct timex that makes use of padded bits. This can be used to save higher order tv_sec bits. modes will use a flag to notify presence of such a type. This will need some application level changes on 32 bit architectures. 3. Add a new compat_timex structure that differs in only the size of the time type; keep rest of struct timex the same. This requires extra syscalls to manage all 3 cases on 64 bit architectures. This will not need any application level changes but will add more complexity from kernel side. Signed-off-by: Deepa Dinamani --- include/linux/timex.h | 7 +++ include/uapi/linux/timex.h | 41 ++ 2 files changed, 48 insertions(+) diff --git a/include/linux/timex.h b/include/linux/timex.h index 39c25dbebfe8..7f40e9e42ecc 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,6 +53,13 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H +/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path + * and 32-bit emulation. + */ +#ifndef CONFIG_64BIT_TIME +#define __kernel_timex timex +#endif + #include #define ADJ_ADJTIME0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index 92685d826444..a1c6b73016a5 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -92,6 +92,47 @@ struct timex { int :32; int :32; int :32; }; +struct __kernel_timex_timeval { + __kernel_time64_t tv_sec; + long long tv_usec; +}; + +#ifndef __kernel_timex +struct __kernel_timex { + unsigned int modes; /* mode selector */ + int :32;/* pad */ + long long offset; /* time offset (usec) */ + long long freq; /* frequency offset (scaled ppm) */ + long long maxerror;/* maximum error (usec) */ + long long esterror;/* estimated error (usec) */ + int status; /* clock command/status */ + int :32;/* pad */ + long long constant;/* pll time constant */ + long long precision;/* clock precision (usec) (read only) */ + long long tolerance;/* clock frequency tolerance (ppm) + * (read only) + */ + struct __kernel_timex_timeval time; /* (read only, except for ADJ_SETOFFSET) */ + long long tick; /* (modified) usecs between clock ticks */ + + long long ppsfreq;/* pps frequency (scaled ppm) (ro) */ + long long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + int :32;/* pad */ + long long stabil;/* pps stability (scaled ppm) (ro) */ + long long jitcnt; /* jitter limit exceeded (ro) */ + long long calcnt; /* calibration intervals (ro) */ + long long errcnt; /* calibration errors (ro) */ + long long stbcnt; /* stability limit exceeded (ro) */ + + int tai;/* TAI offset (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; +}; +#endif + /* * Mode codes (timex.mode) */ -- 2.20.0
[PATCH 03/11] time: fix sys_timer_settime prototype
A small typo has crept into the y2038 conversion of the timer_settime system call. So far this was completely harmless, but once we start using the new version, this has to be fixed. Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec") Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0296772e8fe5..8e86d9623d4e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id, asmlinkage long sys_timer_getoverrun(timer_t timer_id); asmlinkage long sys_timer_settime(timer_t timer_id, int flags, const struct __kernel_itimerspec __user *new_setting, - struct itimerspec __user *old_setting); + struct __kernel_itimerspec __user *old_setting); asmlinkage long sys_timer_delete(timer_t timer_id); asmlinkage long sys_clock_settime(clockid_t which_clock, const struct __kernel_timespec __user *tp); -- 2.20.0
[PATCH 04/11] sparc64: add custom adjtimex/clock_adjtime functions
sparc64 is the only architecture on Linux that has a 'timeval' definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes problems for sparc32 compat mode when we convert it to use the new __kernel_timex type that has the same layout as all other 64-bit architectures. To avoid adding sparc64 specific code into the generic adjtimex implementation, this adds a wrapper in the sparc64 system call handling that converts the sparc64 'timex' into the new '__kernel_timex'. At this point, the two structures are defined to be identical, but that will change in the next step once we convert sparc32. Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 59 +- arch/sparc/kernel/syscalls/syscall.tbl | 6 ++- include/linux/timex.h | 2 + kernel/time/posix-timers.c | 24 +-- 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 1c079e7bab09..37de18a11207 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -28,8 +28,9 @@ #include #include #include - +#include #include + #include #include @@ -544,6 +545,62 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) return err; } +SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *) + int ret; + + /* Copy the user data space into the kernel copy +* structure. But bear in mind that the structures +* may change +*/ + if (copy_from_user(, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* +* override for sparc64 specific timeval type: tv_usec +* is 32 bit wide instead of 64-bit in __kernel_timex +*/ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_adjtimex(kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret; +} + +SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *) + int ret; + + if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { + pr_err_once("process %d (%s) attempted a POSIX timer syscall " + "while CONFIG_POSIX_TIMERS is not set\n", + current->pid, current->comm); + + return -ENOSYS; + } + + /* Copy the user data space into the kernel copy +* structure. But bear in mind that the structures +* may change +*/ + if (copy_from_user(, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* +* override for sparc64 specific timeval type: tv_usec +* is 32 bit wide instead of 64-bit in __kernel_timex +*/ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_clock_adjtime(which_clock, kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, , sizeof(struct timex)) ? -EFAULT : ret; +} + SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type, utrap_handler_t, new_p, utrap_handler_t, new_d, utrap_handler_t __user *, old_p, diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 24ebef675184..e70110375399 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -258,7 +258,8 @@ 21664 sigreturn sys_nis_syscall 217common clone sys_clone 218common ioprio_get sys_ioprio_get -219common adjtimexsys_adjtimex compat_sys_adjtimex +21932 adjtimexsys_adjtimex compat_sys_adjtimex +21964 adjtimexsys_sparc_adjtimex 22032 sigprocmask sys_sigprocmask compat_sys_sigprocmask 22064 sigprocmask sys_nis_syscall 221common create_module sys_ni_syscall @@ -377,7 +378,8 @@ 331common prlimit64 sys_prlimit64 332common name_to_handle_at sys_name_to_handle_at 333common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -334common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +33432 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +33464 clock_adjtime sys_sparc_clock_adjtime 335common syncfs sys_syncfs 336common sendmmsgsys_sendmmsg compat_sys_sendmmsg 337common setns sys_setns diff -
[PATCH 01/11] time: make adjtime compat handling available for 32 bit
We want to reuse the compat_timex handling on 32-bit architectures the same way we are using the compat handling for timespec when moving to 64-bit time_t. Move all definitions related to compat_timex out of the compat code into the normal timekeeping code, along with a rename to old_timex32, corresponding to the timespec/timeval structures, and make it controlled by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select. Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 35 ++- include/linux/time32.h | 32 - kernel/compat.c| 64 -- kernel/time/posix-timers.c | 14 ++-- kernel/time/time.c | 70 +++--- 5 files changed, 102 insertions(+), 113 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 056be0d03722..657ca6abd855 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -132,37 +132,6 @@ struct compat_tms { compat_clock_t tms_cstime; }; -struct compat_timex { - compat_uint_t modes; - compat_long_t offset; - compat_long_t freq; - compat_long_t maxerror; - compat_long_t esterror; - compat_int_t status; - compat_long_t constant; - compat_long_t precision; - compat_long_t tolerance; - struct old_timeval32 time; - compat_long_t tick; - compat_long_t ppsfreq; - compat_long_t jitter; - compat_int_t shift; - compat_long_t stabil; - compat_long_t jitcnt; - compat_long_t calcnt; - compat_long_t errcnt; - compat_long_t stbcnt; - compat_int_t tai; - - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; -}; - -struct timex; -int compat_get_timex(struct timex *, const struct compat_timex __user *); -int compat_put_timex(struct compat_timex __user *, const struct timex *); - #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { @@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); +asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); @@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, -struct compat_timex __user *tp); +struct old_timex32 __user *tp); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, diff --git a/include/linux/time32.h b/include/linux/time32.h index 118b9977080c..820a22e2b98b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -10,6 +10,7 @@ */ #include +#include #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) @@ -35,13 +36,42 @@ struct old_utimbuf32 { old_time32_tmodtime; }; +struct old_timex32 { + u32 modes; + s32 offset; + s32 freq; + s32 maxerror; + s32 esterror; + s32 status; + s32 constant; + s32 precision; + s32 tolerance; + struct old_timeval32 time; + s32 tick; + s32 ppsfreq; + s32 jitter; + s32 shift; + s32 stabil; + s32 jitcnt; + s32 calcnt; + s32 errcnt; + s32 stbcnt; + s32 tai; + + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; +}; + extern int get_old_timespec32(struct timespec64 *, const void __user *); extern int put_old_timespec32(const struct timespec64 *, void __user *); extern int get_old_itimerspec32(struct itimerspec64 *its, const struct old_itimerspec32 __user *uits); extern int put_old_itimerspec32(const struct itimerspec64 *its, struct old_itimerspec32 __user *uits); - +struct timex; +int get_old_timex32(struct timex *, const struct old_timex32 __user *); +int put_old_timex32(struct old_timex32 __user *, const struct timex *); #if __BITS_PER_LONG == 64 diff --git a/kernel/compat.c b/kernel/compat.c index f01affa17e22..d8a36c6ad7c9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -20,7 +20,6
[PATCH 05/11] timex: use __kernel_timex internally
From: Deepa Dinamani struct timex is not y2038 safe. Replace all uses of timex with y2038 safe __kernel_timex. Note that struct __kernel_timex is an ABI interface definition. We could define a new structure based on __kernel_timex that is only available internally instead. Right now, there isn't a strong motivation for this as the structure is isolated to a few defined struct timex interfaces and such a structure would be exactly the same as struct timex. The patch was generated by the following coccinelle script: virtual patch @depends on patch forall@ identifier ts; expression e; @@ ( - struct timex ts; + struct __kernel_timex ts; | - struct timex ts = {}; + struct __kernel_timex ts = {}; | - struct timex ts = e; + struct __kernel_timex ts = e; | - struct timex *ts; + struct __kernel_timex *ts; | (memset \| copy_from_user \| copy_to_user \)(..., - sizeof(struct timex)) + sizeof(struct __kernel_timex)) ) @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts, + struct __kernel_timex *ts, ...) { ... } @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts) { + struct __kernel_timex *ts) { ... } Signed-off-by: Deepa Dinamani Cc: linux-alpha@vger.kernel.org Cc: net...@vger.kernel.org --- arch/alpha/kernel/osf_sys.c | 5 +++-- arch/sparc/kernel/sys_sparc_64.c | 4 ++-- drivers/ptp/ptp_clock.c | 2 +- include/linux/posix-clock.h | 2 +- include/linux/time32.h | 6 +++--- include/linux/timex.h| 4 ++-- kernel/time/ntp.c| 18 ++ kernel/time/ntp_internal.h | 2 +- kernel/time/posix-clock.c| 2 +- kernel/time/posix-timers.c | 8 kernel/time/posix-timers.h | 2 +- kernel/time/time.c | 14 +++--- kernel/time/timekeeping.c| 4 ++-- 13 files changed, 38 insertions(+), 35 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 792586038808..bf497b8b0ec6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1253,7 +1253,7 @@ struct timex32 { SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) { -struct timex txc; + struct __kernel_timex txc; int ret; /* copy relevant bits of struct timex. */ @@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) if (copy_to_user(txc_p, , offsetof(struct timex32, time)) || (copy_to_user(_p->tick, , sizeof(struct timex32) - offsetof(struct timex32, tick))) || - (put_tv_to_tv32(_p->time, ))) + (put_user(txc.time.tv_sec, _p->time.tv_sec)) || + (put_user(txc.time.tv_usec, _p->time.tv_usec))) return -EFAULT; return ret; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 37de18a11207..9825ca6a6020 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -548,7 +548,7 @@ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *) + struct __kernel_timex *kt = (void *) int ret; /* Copy the user data space into the kernel copy @@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *) + struct __kernel_timex *kt = (void *) int ret; if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 48f3594a7458..79bd102c9bbc 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) return err; } -static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) +static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops; diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 3a3bc71017d5..18674d7d5b1c 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -51,7 +51,7 @@ struct posix_clock; struct posix_clock_operations { struct module *owner; - int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); + int (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex *tx); int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); diff --git a/include/linux/time32.h b/include/linux/time32.h index 820a22e2b98b..0a1f302a1753 100644 ---
Re: [PATCH 07/15] ARM: add kexec_file_load system call number
On Thu, Jan 10, 2019 at 5:39 PM Will Deacon wrote: > > > diff --git a/arch/arm64/include/asm/unistd32.h > > b/arch/arm64/include/asm/unistd32.h > > index 355fe2bc035b..19f3f58b6146 100644 > > --- a/arch/arm64/include/asm/unistd32.h > > +++ b/arch/arm64/include/asm/unistd32.h > > @@ -823,6 +823,8 @@ __SYSCALL(__NR_rseq, sys_rseq) > > __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) > > #define __NR_migrate_pages 400 > > __SYSCALL(__NR_migrate_pages, sys_migrate_pages) > > +#define __NR_kexec_file_load 401 > > +__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) > > Hmm, I wonder if we need a compat wrapper for this, or are we assuming > that the early entry code has already zero-extended the long and pointer > arguments? Yes, that is generally the assumption for compat syscalls. s390 needs some extra magic to do a 31-to-64 extension on pointer arguments, and I think sometimes we need a special wrapper to do sign-extension of 32-bit arguments into 64-bit, but the arguments here should not need that. Arnd
Re: [PATCH 06/15] ARM: add migrate_pages() system call
On Thu, Jan 10, 2019 at 5:32 PM Will Deacon wrote: > > diff --git a/arch/arm64/include/asm/unistd32.h > > b/arch/arm64/include/asm/unistd32.h > > index 04ee190b90fe..355fe2bc035b 100644 > > --- a/arch/arm64/include/asm/unistd32.h > > +++ b/arch/arm64/include/asm/unistd32.h > > @@ -821,6 +821,8 @@ __SYSCALL(__NR_statx, sys_statx) > > __SYSCALL(__NR_rseq, sys_rseq) > > #define __NR_io_pgetevents 399 > > __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) > > +#define __NR_migrate_pages 400 > > +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) > > Should be compat_sys_migrate_pages instead? Yes, good catch! Fixed now. Thanks, Arnd
Re: [PATCH 00/15] arch: synchronize syscall tables in preparation for y2038
On Thu, Jan 10, 2019 at 5:59 PM Geert Uytterhoeven wrote: > > Hi Arnd, > > On Thu, Jan 10, 2019 at 5:26 PM Arnd Bergmann wrote: > > The system call tables have diverged a bit over the years, and a number > > of the recent additions never made it into all architectures, for one > > reason or another. > > > > This is an attempt to clean it up as far as we can without breaking > > compatibility, doing a number of steps: > > Thanks a lot! > > > - Add system calls that have not yet been integrated into all > > architectures but that we definitely want there. > > It looks like you missed wiring up io_pgetevents() on m68k. > Is that intentional? Yes, I thought I had described that somewhere but maybe I forgot: semtimedop() and io_pgetevents() get replaced with time64 versions in the follow-up, so I only added them in 64-bit architectures. If you think we should have both io_pgetevents() and io_pgetevents_time32() on all 32-bit architectures, I can add that as well. Arnd
[PATCH 11/15] mips: fix n32 compat_ipc_parse_version
While reading through the sysvipc implementation, I noticed that the n32 semctl/shmctl/msgctl system calls behave differently based on whether o32 support is enabled or not: Without o32, the IPC_64 flag passed by user space is rejected but calls without that flag get IPC_64 behavior. As far as I can tell, this was inadvertently changed by a cleanup patch but never noticed by anyone, possibly nobody has tried using sysvipc on n32 after linux-3.19. Change it back to the old behavior now. Fixes: 78aaf956ba3a ("MIPS: Compat: Fix build error if CONFIG_MIPS32_COMPAT but no compat ABI.") Cc: sta...@vger.kernel.org Signed-off-by: Arnd Bergmann --- As stated above, this was only found by inspection, the patch is not tested. Please review accordingly. --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 787290781b8c..0d14f51d0002 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3155,6 +3155,7 @@ config MIPS32_O32 config MIPS32_N32 bool "Kernel support for n32 binaries" depends on 64BIT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select COMPAT select MIPS32_COMPAT select SYSVIPC_COMPAT if SYSVIPC -- 2.20.0
[PATCH 12/15] sparc64: fix sparc_ipc type conversion
__kernel_timespec and timespec are currently the same type, but once they are different, the type cast has to be changed here. Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 274ed0b9b3e0..1c079e7bab09 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -344,7 +344,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second goto out; case SEMTIMEDOP: err = sys_semtimedop(first, ptr, (unsigned int)second, - (const struct timespec __user *) + (const struct __kernel_timespec __user *) (unsigned long) fifth); goto out; case SEMGET: -- 2.20.0
[PATCH 05/15] alpha: update syscall macro definitions
Other architectures commonly use __NR_umount2 for sys_umount, only ia64 and alpha use __NR_umount here. In order to synchronize the generated tables, use umount2 like everyone else, and add back the old name from asm/unistd.h for compatibility. For shmat, alpha uses the osf_shmat name, we can do the same thing here, which means we don't have to add an entry in the __IGNORE list now that shmat is mandatory everywhere alarm, creat, pause, time, and utime are optional everywhere these days, no need to list them here any more. I considered also adding the regular versions of the get*id system calls that have different names and calling conventions on alpha, which would further help unify the syscall ABI, but for now I decided against that. Signed-off-by: Arnd Bergmann --- arch/alpha/include/asm/unistd.h| 6 -- arch/alpha/include/uapi/asm/unistd.h | 5 + arch/alpha/kernel/syscalls/syscall.tbl | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 21b706a5b772..564ba87bdc38 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -22,18 +22,12 @@ /* * Ignore legacy syscalls that we don't use. */ -#define __IGNORE_alarm -#define __IGNORE_creat #define __IGNORE_getegid #define __IGNORE_geteuid #define __IGNORE_getgid #define __IGNORE_getpid #define __IGNORE_getppid #define __IGNORE_getuid -#define __IGNORE_pause -#define __IGNORE_time -#define __IGNORE_utime -#define __IGNORE_umount2 /* Alpha doesn't have protection keys. */ #define __IGNORE_pkey_mprotect diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index 9ba724f116f1..4507071f995f 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -2,6 +2,11 @@ #ifndef _UAPI_ALPHA_UNISTD_H #define _UAPI_ALPHA_UNISTD_H +/* These are traditionally the names linux-alpha uses for + * the two otherwise generic system calls */ +#define __NR_umount__NR_umount2 +#define __NR_osf_shmat __NR_shmat + #include #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index e09558edae73..f920b65e8c49 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -29,7 +29,7 @@ 19 common lseek sys_lseek 20 common getxpid sys_getxpid 21 common osf_mount sys_osf_mount -22 common umount sys_umount +22 common umount2 sys_umount 23 common setuid sys_setuid 24 common getxuid sys_getxuid 25 common exec_with_loadersys_ni_syscall @@ -183,7 +183,7 @@ 206common semop sys_semop 207common osf_utsname sys_osf_utsname 208common lchown sys_lchown -209common osf_shmat sys_shmat +209common shmat sys_shmat 210common shmctl sys_shmctl 211common shmdt sys_shmdt 212common shmget sys_shmget -- 2.20.0
[PATCH 02/15] ia64: add statx and io_pgetevents syscalls
All architectures should implement these two, so assign numbers and hook them up on ia64. Signed-off-by: Arnd Bergmann --- arch/ia64/kernel/syscalls/syscall.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index e97caf51be42..52585281205b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -335,3 +335,5 @@ 323common copy_file_range sys_copy_file_range 324common preadv2 sys_preadv2 325common pwritev2sys_pwritev2 +326common statx sys_statx +327common io_pgetevents sys_io_pgetevents -- 2.20.0
[PATCH 03/15] ia64: assign syscall numbers for perf and seccomp
Most architectures have assigned numbers for both seccomp and perf_event_open, even when they do not implement either. ia64 is an exception here, so for consistency lets add numbers for both of them. Unless CONFIG_PERF_EVENTS and CONFIG_SECCOMP are implemented, the system calls just return -ENOSYS. Signed-off-by: Arnd Bergmann --- arch/ia64/kernel/syscalls/syscall.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 52585281205b..2e93dbdcdb80 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -337,3 +337,5 @@ 325common pwritev2sys_pwritev2 326common statx sys_statx 327common io_pgetevents sys_io_pgetevents +328common perf_event_open sys_perf_event_open +329common seccomp sys_seccomp -- 2.20.0
[PATCH 06/15] ARM: add migrate_pages() system call
The migrate_pages system call has an assigned number on all architectures except ARM. When it got added initially in commit d80ade7b3231 ("ARM: Fix warning: #warning syscall migrate_pages not implemented"), it was intentionally left out based on the observation that there are no 32-bit ARM NUMA systems. However, there are now arm64 NUMA machines that can in theory run 32-bit kernels (actually enabling NUMA there would require additional work) as well as 32-bit user space on 64-bit kernels, so that argument is no longer very strong. Assigning the number lets us use the system call on 64-bit kernels as well as providing a more consistent set of syscalls across architectures. Signed-off-by: Arnd Bergmann --- arch/arm/include/asm/unistd.h | 1 - arch/arm/tools/syscall.tbl| 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 88ef2ce1f69a..d713587dfcf4 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -45,7 +45,6 @@ * Unimplemented (or alternatively implemented) syscalls */ #define __IGNORE_fadvise64_64 -#define __IGNORE_migrate_pages #ifdef __ARM_EABI__ /* diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 8edf93b4490f..86de9eb34296 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -414,3 +414,4 @@ 397common statx sys_statx 398common rseqsys_rseq 399common io_pgetevents sys_io_pgetevents +400common migrate_pages sys_migrate_pages diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index a7b1fc58ffdf..261216c3336e 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 400 +#define __NR_compat_syscalls 401 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 04ee190b90fe..355fe2bc035b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -821,6 +821,8 @@ __SYSCALL(__NR_statx, sys_statx) __SYSCALL(__NR_rseq, sys_rseq) #define __NR_io_pgetevents 399 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) +#define __NR_migrate_pages 400 +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) /* * Please add new compat syscalls above this comment and update -- 2.20.0