[PATCH v5 3/6] x86/arch_prctl Add a new do_arch_prctl
Add a new do_arch_prctl to handle arch_prctls that are not specific to 64 bits. Call it from the syscall entry point, but not any of the other callsites in the kernel, which all want one of the existing 64 bit only arch_prctls. Signed-off-by: Kyle Huey --- arch/x86/include/asm/proto.h | 1 + arch/x86/kernel/process.c| 5 + arch/x86/kernel/process_64.c | 8 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 95c3e51..94a57cc 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,7 @@ void x86_report_nx(void); extern int reboot_force; +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2); #ifdef CONFIG_X86_64 long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..97aa104 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -567,3 +567,8 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + return -EINVAL; +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 292ce48..5c60e2c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -590,7 +590,13 @@ long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) { - return do_arch_prctl_64(current, code, arg2); + long ret; + + ret = do_arch_prctl_64(current, code, arg2); + if (ret == -EINVAL) + ret = do_arch_prctl(current, code, arg2); + + return ret; } unsigned long KSTK_ESP(struct task_struct *task) -- 2.9.3
[PATCH v5 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat mode on x86-64. This allows us to have arch_prctls that are not specific to 64 bits. On UML, simply stub out this syscall. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process_32.c | 7 +++ arch/x86/kernel/process_64.c | 7 +++ arch/x86/um/Makefile | 2 +- arch/x86/um/syscalls_32.c | 7 +++ include/linux/compat.h | 2 ++ 6 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..300fdf8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29..71770a4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); @@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5c60e2c..aa2b99a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) return ret; } +#ifdef CONFIG_IA32_EMULATION +COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} +#endif + unsigned long KSTK_ESP(struct task_struct *task) { return task_pt_regs(task)->sp; diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 3ee2bb6..5e039d6 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ifeq ($(CONFIG_X86_32),y) -obj-y += checksum_32.o +obj-y += checksum_32.o syscalls_32.o obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000..ccf0598 --- /dev/null +++ b/arch/x86/um/syscalls_32.c @@ -0,0 +1,7 @@ +#include +#include + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return -EINVAL; +} diff --git a/include/linux/compat.h b/include/linux/compat.h index f964ef7..0039d53 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -722,6 +722,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int, unsigned long); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which -- 2.9.3
[PATCH v5 5/6] x86/cpufeature Detect CPUID faulting support
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. This will allow a ptracer to emulate the CPUID instruction. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT. Signed-off-by: Kyle Huey <kh...@kylehuey.com> Reviewed-by: Andy Lutomirski <l...@kernel.org> --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 13 + 3 files changed, 15 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..39aa563 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define PLATINFO_CPUID_FAULT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..7901481 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,16 @@ enum cpuid_regs { CR_EBX }; +static bool supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , )) + return false; + + return lo & PLATINFO_CPUID_FAULT; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } -- 2.9.3
[PATCH v5 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at CPL > 0. Expose this capability to userspace as a new pair of arch_prctls, ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and ARCH_CPUID_SIGSEGV. The following changes have been made since v4: Patch 1: - Fix missing include on 64bit UML. Patch 6: - Fix comment in the test that still referred to an earlier design of the API. The following changes have been made since v3: Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6, respectively. Patch 1: - Use SYSCALL_DEFINE in UML. Patch 2: - More descriptive commit message. Patch 3: - More decriptive commit message. - Name the common arch_prctl function do_arch_prctl instead of do_arch_prctl_common Patch 4: - Move the 32-bit syscall entry point to process_32.c, place the compat entry point in process_64.c Patch 5 (previously Patch 2): - More descriptive commit message. - Prefix the #define for the cpuid faulting bit with PLATINFO - supports_cpuid_faulting returns bool - Rearrange supports_cpuid_faulting to avoid linebreaks Patch 6 (previously Patch 3): - ARCH_GET_CPUID now takes 0 for the second argument, and returns the result directly. - arch_post_exec is now a #define, called from setup_new_exec - The test now uses errx - The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after fork()
[PATCH v5 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat mode on x86-64. This allows us to have arch_prctls that are not specific to 64 bits. On UML, simply stub out this syscall. Signed-off-by: Kyle Huey --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process_32.c | 7 +++ arch/x86/kernel/process_64.c | 7 +++ arch/x86/um/Makefile | 2 +- arch/x86/um/syscalls_32.c | 7 +++ include/linux/compat.h | 2 ++ 6 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..300fdf8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29..71770a4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); @@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5c60e2c..aa2b99a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) return ret; } +#ifdef CONFIG_IA32_EMULATION +COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} +#endif + unsigned long KSTK_ESP(struct task_struct *task) { return task_pt_regs(task)->sp; diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 3ee2bb6..5e039d6 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ifeq ($(CONFIG_X86_32),y) -obj-y += checksum_32.o +obj-y += checksum_32.o syscalls_32.o obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000..ccf0598 --- /dev/null +++ b/arch/x86/um/syscalls_32.c @@ -0,0 +1,7 @@ +#include +#include + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return -EINVAL; +} diff --git a/include/linux/compat.h b/include/linux/compat.h index f964ef7..0039d53 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -722,6 +722,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int, unsigned long); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which -- 2.9.3
[PATCH v5 5/6] x86/cpufeature Detect CPUID faulting support
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. This will allow a ptracer to emulate the CPUID instruction. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT. Signed-off-by: Kyle Huey Reviewed-by: Andy Lutomirski --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 13 + 3 files changed, 15 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..39aa563 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define PLATINFO_CPUID_FAULT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..7901481 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,16 @@ enum cpuid_regs { CR_EBX }; +static bool supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , )) + return false; + + return lo & PLATINFO_CPUID_FAULT; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } -- 2.9.3
[PATCH v5 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at CPL > 0. Expose this capability to userspace as a new pair of arch_prctls, ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and ARCH_CPUID_SIGSEGV. The following changes have been made since v4: Patch 1: - Fix missing include on 64bit UML. Patch 6: - Fix comment in the test that still referred to an earlier design of the API. The following changes have been made since v3: Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6, respectively. Patch 1: - Use SYSCALL_DEFINE in UML. Patch 2: - More descriptive commit message. Patch 3: - More decriptive commit message. - Name the common arch_prctl function do_arch_prctl instead of do_arch_prctl_common Patch 4: - Move the 32-bit syscall entry point to process_32.c, place the compat entry point in process_64.c Patch 5 (previously Patch 2): - More descriptive commit message. - Prefix the #define for the cpuid faulting bit with PLATINFO - supports_cpuid_faulting returns bool - Rearrange supports_cpuid_faulting to avoid linebreaks Patch 6 (previously Patch 3): - ARCH_GET_CPUID now takes 0 for the second argument, and returns the result directly. - arch_post_exec is now a #define, called from setup_new_exec - The test now uses errx - The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after fork()
[PATCH v4 6/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. Exposing this feature to userspace will allow a ptracer to trap and emulate the CPUID instruction. When supported, this feature is controlled by toggling bit 0 of MSR_MISC_FEATURES_ENABLES. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Implement a new pair of arch_prctls, available on both x86-32 and x86-64. ARCH_GET_CPUID: Returns the current CPUID faulting state, either ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. arg2 must be 0. ARCH_SET_CPUID: Set the CPUID faulting state to arg2, which must be either ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. Returns EINVAL if arg2 is another value or CPUID faulting is not supported on this system. The state of the CPUID faulting flag is propagated across forks, but reset upon exec. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 6 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 94 +++- fs/exec.c | 1 + include/linux/thread_info.h | 4 + tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 231 ++ 8 files changed, 342 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 39aa563..cddefdd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..1bc79bc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -293,6 +295,8 @@ static inline bool in_ia32_syscall(void) extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); +extern void arch_post_exec(void); +#define arch_post_exec arch_post_exec #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 97aa104..3ac90eb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include
[PATCH v4 6/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. Exposing this feature to userspace will allow a ptracer to trap and emulate the CPUID instruction. When supported, this feature is controlled by toggling bit 0 of MSR_MISC_FEATURES_ENABLES. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Implement a new pair of arch_prctls, available on both x86-32 and x86-64. ARCH_GET_CPUID: Returns the current CPUID faulting state, either ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. arg2 must be 0. ARCH_SET_CPUID: Set the CPUID faulting state to arg2, which must be either ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. Returns EINVAL if arg2 is another value or CPUID faulting is not supported on this system. The state of the CPUID faulting flag is propagated across forks, but reset upon exec. Signed-off-by: Kyle Huey --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 6 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 94 +++- fs/exec.c | 1 + include/linux/thread_info.h | 4 + tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 231 ++ 8 files changed, 342 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 39aa563..cddefdd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..1bc79bc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -293,6 +295,8 @@ static inline bool in_ia32_syscall(void) extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); +extern void arch_post_exec(void); +#define arch_post_exec arch_post_exec #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 97aa104..3ac90eb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #inclu
[PATCH v4 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat mode on x86-64. This allows us to have arch_prctls that are not specific to 64 bits. On UML, simply stub out this syscall. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process_32.c | 7 +++ arch/x86/kernel/process_64.c | 7 +++ arch/x86/um/Makefile | 2 +- arch/x86/um/syscalls_32.c | 7 +++ include/linux/compat.h | 2 ++ 6 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..300fdf8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29..71770a4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); @@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5c60e2c..aa2b99a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) return ret; } +#ifdef CONFIG_IA32_EMULATION +COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} +#endif + unsigned long KSTK_ESP(struct task_struct *task) { return task_pt_regs(task)->sp; diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 3ee2bb6..5e039d6 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ifeq ($(CONFIG_X86_32),y) -obj-y += checksum_32.o +obj-y += checksum_32.o syscalls_32.o obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000..ccf0598 --- /dev/null +++ b/arch/x86/um/syscalls_32.c @@ -0,0 +1,7 @@ +#include +#include + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return -EINVAL; +} diff --git a/include/linux/compat.h b/include/linux/compat.h index f964ef7..0039d53 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -722,6 +722,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int, unsigned long); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which -- 2.9.3
[PATCH v4 3/6] x86/arch_prctl Add a new do_arch_prctl
Add a new do_arch_prctl to handle arch_prctls that are not specific to 64 bits. Call it from the syscall entry point, but not any of the other callsites in the kernel, which all want one of the existing 64 bit only arch_prctls. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/proto.h | 1 + arch/x86/kernel/process.c| 5 + arch/x86/kernel/process_64.c | 8 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 95c3e51..94a57cc 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,7 @@ void x86_report_nx(void); extern int reboot_force; +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2); #ifdef CONFIG_X86_64 long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..97aa104 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -567,3 +567,8 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + return -EINVAL; +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 292ce48..5c60e2c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -590,7 +590,13 @@ long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) { - return do_arch_prctl_64(current, code, arg2); + long ret; + + ret = do_arch_prctl_64(current, code, arg2); + if (ret == -EINVAL) + ret = do_arch_prctl(current, code, arg2); + + return ret; } unsigned long KSTK_ESP(struct task_struct *task) -- 2.9.3
[PATCH v4 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat mode on x86-64. This allows us to have arch_prctls that are not specific to 64 bits. On UML, simply stub out this syscall. Signed-off-by: Kyle Huey --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process_32.c | 7 +++ arch/x86/kernel/process_64.c | 7 +++ arch/x86/um/Makefile | 2 +- arch/x86/um/syscalls_32.c | 7 +++ include/linux/compat.h | 2 ++ 6 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..300fdf8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29..71770a4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); @@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5c60e2c..aa2b99a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) return ret; } +#ifdef CONFIG_IA32_EMULATION +COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} +#endif + unsigned long KSTK_ESP(struct task_struct *task) { return task_pt_regs(task)->sp; diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 3ee2bb6..5e039d6 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ifeq ($(CONFIG_X86_32),y) -obj-y += checksum_32.o +obj-y += checksum_32.o syscalls_32.o obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000..ccf0598 --- /dev/null +++ b/arch/x86/um/syscalls_32.c @@ -0,0 +1,7 @@ +#include +#include + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return -EINVAL; +} diff --git a/include/linux/compat.h b/include/linux/compat.h index f964ef7..0039d53 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -722,6 +722,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int, unsigned long); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which -- 2.9.3
[PATCH v4 3/6] x86/arch_prctl Add a new do_arch_prctl
Add a new do_arch_prctl to handle arch_prctls that are not specific to 64 bits. Call it from the syscall entry point, but not any of the other callsites in the kernel, which all want one of the existing 64 bit only arch_prctls. Signed-off-by: Kyle Huey --- arch/x86/include/asm/proto.h | 1 + arch/x86/kernel/process.c| 5 + arch/x86/kernel/process_64.c | 8 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 95c3e51..94a57cc 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,7 @@ void x86_report_nx(void); extern int reboot_force; +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2); #ifdef CONFIG_X86_64 long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..97aa104 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -567,3 +567,8 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + return -EINVAL; +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 292ce48..5c60e2c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -590,7 +590,13 @@ long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) { - return do_arch_prctl_64(current, code, arg2); + long ret; + + ret = do_arch_prctl_64(current, code, arg2); + if (ret == -EINVAL) + ret = do_arch_prctl(current, code, arg2); + + return ret; } unsigned long KSTK_ESP(struct task_struct *task) -- 2.9.3
[PATCH v4 2/6] x86/arch_prctl/64 Rename do_arch_prctl to do_arch_prctl_64
In order to introduce new arch_prctls that are not 64 bit only, rename the existing 64 bit implementation to do_arch_prctl_64. Also rename the second argument to arch_prctl, which will no longer always be an address. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/proto.h | 4 +++- arch/x86/kernel/process_64.c | 26 ++ arch/x86/kernel/ptrace.c | 8 arch/x86/um/syscalls_64.c| 4 ++-- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 9b9b30b..95c3e51 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,8 @@ void x86_report_nx(void); extern int reboot_force; -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +#ifdef CONFIG_X86_64 +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2); +#endif #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4d6363c..292ce48 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -197,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, (struct user_desc __user *)tls, 0); else #endif - err = do_arch_prctl(p, ARCH_SET_FS, tls); + err = do_arch_prctl_64(p, ARCH_SET_FS, tls); if (err) goto out; } @@ -525,7 +525,7 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) { int ret = 0; int doit = task == current; @@ -533,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) switch (code) { case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; - task->thread.gsbase = addr; + task->thread.gsbase = arg2; if (doit) { load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); } put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry with gs */ - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; - task->thread.fsbase = addr; + task->thread.fsbase = arg2; if (doit) { /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); } put_cpu(); break; case ARCH_GET_FS: { unsigned long base; + if (doit) rdmsrl(MSR_FS_BASE, base); else base = task->thread.fsbase; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)arg2); break; } case ARCH_GET_GS: { unsigned long base; + if (doit) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gsbase; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)arg2); break; } @@ -586,9 +588,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return ret; } -SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr) +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) { - return do_arch_prctl(current, code, addr); + return do_arch_prctl_64(current, code, arg2); } unsigned long KSTK_ESP(struct task_struct *task) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f79576a..030cbc5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -395,12 +395,12 @@ static int putreg(struct task_struct *child, if (value >= TASK_SIZE_MAX) return -EIO; /* -* When changing the segment base, use do_arch_prctl +* When changing the segment base, u
[PATCH v4 5/6] x86/cpufeature Detect CPUID faulting support
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. This will allow a ptracer to emulate the CPUID instruction. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT. Signed-off-by: Kyle Huey <kh...@kylehuey.com> Reviewed-by: Andy Lutomirski <l...@kernel.org> --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 13 + 3 files changed, 15 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..39aa563 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define PLATINFO_CPUID_FAULT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..7901481 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,16 @@ enum cpuid_regs { CR_EBX }; +static bool supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , )) + return false; + + return lo & PLATINFO_CPUID_FAULT; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } -- 2.9.3
[PATCH v4 5/6] x86/cpufeature Detect CPUID faulting support
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. This will allow a ptracer to emulate the CPUID instruction. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT. Signed-off-by: Kyle Huey Reviewed-by: Andy Lutomirski --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 13 + 3 files changed, 15 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..39aa563 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define PLATINFO_CPUID_FAULT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..7901481 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,16 @@ enum cpuid_regs { CR_EBX }; +static bool supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , )) + return false; + + return lo & PLATINFO_CPUID_FAULT; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } -- 2.9.3
[PATCH v4 2/6] x86/arch_prctl/64 Rename do_arch_prctl to do_arch_prctl_64
In order to introduce new arch_prctls that are not 64 bit only, rename the existing 64 bit implementation to do_arch_prctl_64. Also rename the second argument to arch_prctl, which will no longer always be an address. Signed-off-by: Kyle Huey --- arch/x86/include/asm/proto.h | 4 +++- arch/x86/kernel/process_64.c | 26 ++ arch/x86/kernel/ptrace.c | 8 arch/x86/um/syscalls_64.c| 4 ++-- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 9b9b30b..95c3e51 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,8 @@ void x86_report_nx(void); extern int reboot_force; -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +#ifdef CONFIG_X86_64 +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2); +#endif #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4d6363c..292ce48 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -197,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, (struct user_desc __user *)tls, 0); else #endif - err = do_arch_prctl(p, ARCH_SET_FS, tls); + err = do_arch_prctl_64(p, ARCH_SET_FS, tls); if (err) goto out; } @@ -525,7 +525,7 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) { int ret = 0; int doit = task == current; @@ -533,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) switch (code) { case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; - task->thread.gsbase = addr; + task->thread.gsbase = arg2; if (doit) { load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); } put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry with gs */ - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; - task->thread.fsbase = addr; + task->thread.fsbase = arg2; if (doit) { /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); } put_cpu(); break; case ARCH_GET_FS: { unsigned long base; + if (doit) rdmsrl(MSR_FS_BASE, base); else base = task->thread.fsbase; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)arg2); break; } case ARCH_GET_GS: { unsigned long base; + if (doit) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gsbase; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)arg2); break; } @@ -586,9 +588,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return ret; } -SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr) +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) { - return do_arch_prctl(current, code, addr); + return do_arch_prctl_64(current, code, arg2); } unsigned long KSTK_ESP(struct task_struct *task) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f79576a..030cbc5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -395,12 +395,12 @@ static int putreg(struct task_struct *child, if (value >= TASK_SIZE_MAX) return -EIO; /* -* When changing the segment base, use do_arch_prctl +* When changing the segment base, use do_arch_prctl_64
[PATCH v4 1/6] x86/arch_prctl/64 Use SYSCALL_DEFINE2 to define sys_arch_prctl
Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/kernel/process_64.c | 3 ++- arch/x86/um/syscalls_64.c| 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..4d6363c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -585,7 +586,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return ret; } -long sys_arch_prctl(int code, unsigned long addr) +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr) { return do_arch_prctl(current, code, addr); } diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index e655227..3282066 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -72,7 +72,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) return ret; } -long sys_arch_prctl(int code, unsigned long addr) +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr) { return arch_prctl(current, code, (unsigned long __user *) addr); } -- 2.9.3 base-commit: 024c7e3756d8a42fc41fe8a9488488b9b09d1dcc
[PATCH v4 1/6] x86/arch_prctl/64 Use SYSCALL_DEFINE2 to define sys_arch_prctl
Signed-off-by: Kyle Huey --- arch/x86/kernel/process_64.c | 3 ++- arch/x86/um/syscalls_64.c| 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..4d6363c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -585,7 +586,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return ret; } -long sys_arch_prctl(int code, unsigned long addr) +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr) { return do_arch_prctl(current, code, addr); } diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index e655227..3282066 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -72,7 +72,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) return ret; } -long sys_arch_prctl(int code, unsigned long addr) +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr) { return arch_prctl(current, code, (unsigned long __user *) addr); } -- 2.9.3 base-commit: 024c7e3756d8a42fc41fe8a9488488b9b09d1dcc
[PATCH v4 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at CPL > 0. Expose this capability to userspace as a new pair of arch_prctls, ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and ARCH_CPUID_SIGSEGV. The following changes have been made since v3: Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6, respectively. Patch 1: - Use SYSCALL_DEFINE in UML. Patch 2: - More descriptive commit message. Patch 3: - More decriptive commit message. - Name the common arch_prctl function do_arch_prctl instead of do_arch_prctl_common Patch 4: - Move the 32-bit syscall entry point to process_32.c, place the compat entry point in process_64.c Patch 5 (previously Patch 2): - More descriptive commit message. - Prefix the #define for the cpuid faulting bit with PLATINFO - supports_cpuid_faulting returns bool - Rearrange supports_cpuid_faulting to avoid linebreaks Patch 6 (previously Patch 3): - ARCH_GET_CPUID now takes 0 for the second argument, and returns the result directly. - arch_post_exec is now a #define, called from setup_new_exec - The test now uses errx - The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after fork()
[PATCH v4 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at CPL > 0. Expose this capability to userspace as a new pair of arch_prctls, ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and ARCH_CPUID_SIGSEGV. The following changes have been made since v3: Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6, respectively. Patch 1: - Use SYSCALL_DEFINE in UML. Patch 2: - More descriptive commit message. Patch 3: - More decriptive commit message. - Name the common arch_prctl function do_arch_prctl instead of do_arch_prctl_common Patch 4: - Move the 32-bit syscall entry point to process_32.c, place the compat entry point in process_64.c Patch 5 (previously Patch 2): - More descriptive commit message. - Prefix the #define for the cpuid faulting bit with PLATINFO - supports_cpuid_faulting returns bool - Rearrange supports_cpuid_faulting to avoid linebreaks Patch 6 (previously Patch 3): - ARCH_GET_CPUID now takes 0 for the second argument, and returns the result directly. - arch_post_exec is now a #define, called from setup_new_exec - The test now uses errx - The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after fork()
Re: [PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Fri, Sep 16, 2016 at 12:50 AM, Thomas Gleixner <t...@linutronix.de> wrote: > On Thu, 15 Sep 2016, Kyle Huey wrote: > > First of all, please add a cover letter [PATCH 0/N] to your patch series > and send it with something which provides proper mail threading. > See: git-send-email, quilt I did ... seems like using git-send-email with --cc-cmd=scripts/get_maintainer.pl is not a good idea since people get CCd to some parts of the thread and not others. https://lkml.org/lkml/2016/9/15/811 >> arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for >> now. Rename the second arg to a more generic name. > > This changelog is useless. > > - it does not provide any rationale for this change, i.e. why this is > required. Just because its 64bit only is not a reason. > > - "Rename the second arg to a more generic name" does not give > any useful information. > > Misleading information is worse than no information. > > Further your patch does 5 things at once. It wants to be split into parts: > > 1) Rename do_arch_prctl() and change the argument name, > >> -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) >> +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long >> arg2) > > 2) Provide do_arch_prctl_common() and hook it up to the arch_prctl syscall > >> -long sys_arch_prctl(int code, unsigned long addr) >> +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) >> { >> - return do_arch_prctl(current, code, addr); >> + long ret; >> + >> + ret = do_arch_prctl_64(current, code, arg2); >> + if (ret == -EINVAL) >> + ret = do_arch_prctl_common(current, code, arg2); >> + >> + return ret; >> } > > 3) Implement the compat version Ok. - Kyle
Re: [PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Fri, Sep 16, 2016 at 12:50 AM, Thomas Gleixner wrote: > On Thu, 15 Sep 2016, Kyle Huey wrote: > > First of all, please add a cover letter [PATCH 0/N] to your patch series > and send it with something which provides proper mail threading. > See: git-send-email, quilt I did ... seems like using git-send-email with --cc-cmd=scripts/get_maintainer.pl is not a good idea since people get CCd to some parts of the thread and not others. https://lkml.org/lkml/2016/9/15/811 >> arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for >> now. Rename the second arg to a more generic name. > > This changelog is useless. > > - it does not provide any rationale for this change, i.e. why this is > required. Just because its 64bit only is not a reason. > > - "Rename the second arg to a more generic name" does not give > any useful information. > > Misleading information is worse than no information. > > Further your patch does 5 things at once. It wants to be split into parts: > > 1) Rename do_arch_prctl() and change the argument name, > >> -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) >> +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long >> arg2) > > 2) Provide do_arch_prctl_common() and hook it up to the arch_prctl syscall > >> -long sys_arch_prctl(int code, unsigned long addr) >> +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) >> { >> - return do_arch_prctl(current, code, addr); >> + long ret; >> + >> + ret = do_arch_prctl_64(current, code, arg2); >> + if (ret == -EINVAL) >> + ret = do_arch_prctl_common(current, code, arg2); >> + >> + return ret; >> } > > 3) Implement the compat version Ok. - Kyle
Re: [PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
On Thu, Sep 15, 2016 at 5:07 PM, Andy Lutomirski <l...@amacapital.net> wrote: > On Thu, Sep 15, 2016 at 4:33 PM, Kyle Huey <m...@kylehuey.com> wrote: >> +int get_cpuid_mode(unsigned long adr) >> +{ >> + unsigned int val; >> + >> + if (test_thread_flag(TIF_NOCPUID)) >> + val = ARCH_CPUID_SIGSEGV; >> + else >> + val = ARCH_CPUID_ENABLE; >> + >> + return put_user(val, (unsigned int __user *)adr); >> +} > > Can we just do: > > if (arg2 != 0) > return -EINVAL; > else > return test_thread_flag(TIF_NOCPUID) ? ARCH_CPUID_SIGSEGBV : > ARCH_CPUID_ENABLE; We could. I copied the pattern of PR_GET_TSC here, but I don't feel strongly about it. >> diff --git a/tools/testing/selftests/x86/cpuid-fault.c >> b/tools/testing/selftests/x86/cpuid-fault.c >> new file mode 100644 >> index 000..a9f3f68 >> --- /dev/null >> +++ b/tools/testing/selftests/x86/cpuid-fault.c >> @@ -0,0 +1,234 @@ >> + >> +/* >> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / prctl(ARCH_SET_CPUID, ...) >> + * >> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID >> + */ >> + >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#include >> +#include >> + >> +const char *cpuid_names[] = { >> + [0] = "[not set]", > > Is 0 even possible? Only if the call fails. - Kyle
Re: [PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
On Thu, Sep 15, 2016 at 5:07 PM, Andy Lutomirski wrote: > On Thu, Sep 15, 2016 at 4:33 PM, Kyle Huey wrote: >> +int get_cpuid_mode(unsigned long adr) >> +{ >> + unsigned int val; >> + >> + if (test_thread_flag(TIF_NOCPUID)) >> + val = ARCH_CPUID_SIGSEGV; >> + else >> + val = ARCH_CPUID_ENABLE; >> + >> + return put_user(val, (unsigned int __user *)adr); >> +} > > Can we just do: > > if (arg2 != 0) > return -EINVAL; > else > return test_thread_flag(TIF_NOCPUID) ? ARCH_CPUID_SIGSEGBV : > ARCH_CPUID_ENABLE; We could. I copied the pattern of PR_GET_TSC here, but I don't feel strongly about it. >> diff --git a/tools/testing/selftests/x86/cpuid-fault.c >> b/tools/testing/selftests/x86/cpuid-fault.c >> new file mode 100644 >> index 000..a9f3f68 >> --- /dev/null >> +++ b/tools/testing/selftests/x86/cpuid-fault.c >> @@ -0,0 +1,234 @@ >> + >> +/* >> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / prctl(ARCH_SET_CPUID, ...) >> + * >> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID >> + */ >> + >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#include >> +#include >> + >> +const char *cpuid_names[] = { >> + [0] = "[not set]", > > Is 0 even possible? Only if the call fails. - Kyle
Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Thu, Sep 15, 2016 at 12:37 PM, Andy Lutomirski <l...@amacapital.net> wrote: > On Thu, Sep 15, 2016 at 12:11 PM, Kyle Huey <m...@kylehuey.com> wrote: >> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <jbeul...@suse.com> wrote: >>>>>> On 15.09.16 at 12:05, <david.vra...@citrix.com> wrote: >>>> On 14/09/16 22:01, Kyle Huey wrote: >>>>> Xen advertises the underlying support for CPUID faulting but not does pass >>>>> through writes to the relevant MSR, nor does it virtualize it, so it does >>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. >>>> >>>> Could you clarify in the commit message that it is PV guests that are >>>> affected. >>> >>> What makes you think HVM ones aren't? >> >> Testing on EC2, HVM guests are affected as well. Not sure what to do >> about that. >> > > It's kind of nasty, but it shouldn't be *too* hard to probe for this > thing during early boot. Allocate a page somewhere that has the user > bit set, put something like this in it: > > cpuid > inc %eax /* return 1 */ > movw %ax, %ss /* force %GP to get out of here */ > > Call it like this from asm (real asm, not inline): > > FRAME_BEGIN > pushq %rbx > > xorl %eax, %eax > > /* Push return frame */ > pushq %ss > pushq %rsp > addq $8, (%rsp) > pushfq > pushq %cs > pushq $end_of_cpuid_faulting_test > > /* Call it! */ > pushq $__USER_DS > pushq $0 > pushq $X86_EFLAGS_FIXED /* leave IF off when running the CPL3 stub */ > pushq $__USER_CS > pushq [address of userspace stub] > INTERRUPT_RETURN > > end_of_cpuid_faulting_test: > pop %rbx > > FRAME_END > > Run this after the main GDT is loaded but while the #GP vector is > temporarily pointing to: > > movq SS-RIP(%rsp), %rsp /* pop the real return frame */ > INTERRUPT_RETURN > > and with interrupts off. The function should return 0 if CPUID > faulting works and 1 if it doesn't. > > Yeah, this is gross, but it should work. I'm not sure how okay I am > with putting this crap in the kernel... This is rather heroic :) I think it's more trouble than it's worth though. The latest series I submitted doesn't try to handle this. Instead I'll patch Xen to fix the bug. - Kyle
Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Thu, Sep 15, 2016 at 12:37 PM, Andy Lutomirski wrote: > On Thu, Sep 15, 2016 at 12:11 PM, Kyle Huey wrote: >> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich wrote: >>>>>> On 15.09.16 at 12:05, wrote: >>>> On 14/09/16 22:01, Kyle Huey wrote: >>>>> Xen advertises the underlying support for CPUID faulting but not does pass >>>>> through writes to the relevant MSR, nor does it virtualize it, so it does >>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. >>>> >>>> Could you clarify in the commit message that it is PV guests that are >>>> affected. >>> >>> What makes you think HVM ones aren't? >> >> Testing on EC2, HVM guests are affected as well. Not sure what to do >> about that. >> > > It's kind of nasty, but it shouldn't be *too* hard to probe for this > thing during early boot. Allocate a page somewhere that has the user > bit set, put something like this in it: > > cpuid > inc %eax /* return 1 */ > movw %ax, %ss /* force %GP to get out of here */ > > Call it like this from asm (real asm, not inline): > > FRAME_BEGIN > pushq %rbx > > xorl %eax, %eax > > /* Push return frame */ > pushq %ss > pushq %rsp > addq $8, (%rsp) > pushfq > pushq %cs > pushq $end_of_cpuid_faulting_test > > /* Call it! */ > pushq $__USER_DS > pushq $0 > pushq $X86_EFLAGS_FIXED /* leave IF off when running the CPL3 stub */ > pushq $__USER_CS > pushq [address of userspace stub] > INTERRUPT_RETURN > > end_of_cpuid_faulting_test: > pop %rbx > > FRAME_END > > Run this after the main GDT is loaded but while the #GP vector is > temporarily pointing to: > > movq SS-RIP(%rsp), %rsp /* pop the real return frame */ > INTERRUPT_RETURN > > and with interrupts off. The function should return 0 if CPUID > faulting works and 1 if it doesn't. > > Yeah, this is gross, but it should work. I'm not sure how okay I am > with putting this crap in the kernel... This is rather heroic :) I think it's more trouble than it's worth though. The latest series I submitted doesn't try to handle this. Instead I'll patch Xen to fix the bug. - Kyle
[PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Support for this is implemented as a new pair of arch_prctls, available on both x86-32 and x86-64. The structure mirrors PR_[GET|SET]_TSC. Like the TSC flag, CPUID faulting is propagated across forks. Unlike the TSC flag, it is reset (to CPUID enabled) on exec. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 5 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 98 - fs/exec.c | 6 + tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 234 ++ 7 files changed, 349 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83908d5..4aebec2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..e3c40c6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -293,6 +295,7 @@ static inline bool in_ia32_syscall(void) extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); +extern void arch_post_exec(void); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1421451..f307d5c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -191,6 +192,75 @@ int set_tsc_mode(unsigned int val) return 0; } +static void switch_cpuid_faulting(bool on) +{ + if (on) + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); + else + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +
[PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Support for this is implemented as a new pair of arch_prctls, available on both x86-32 and x86-64. The structure mirrors PR_[GET|SET]_TSC. Like the TSC flag, CPUID faulting is propagated across forks. Unlike the TSC flag, it is reset (to CPUID enabled) on exec. Signed-off-by: Kyle Huey --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 5 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 98 - fs/exec.c | 6 + tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 234 ++ 7 files changed, 349 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83908d5..4aebec2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..e3c40c6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -293,6 +295,7 @@ static inline bool in_ia32_syscall(void) extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); +extern void arch_post_exec(void); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1421451..f307d5c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -191,6 +192,75 @@ int set_tsc_mode(unsigned int val) return 0; } +static void switch_cpuid_faulting(bool on) +{ + if (on) + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); + else + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchro
[PATCH v3 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 14 ++ 3 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..83908d5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define CPUID_FAULTING_SUPPORT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..d502da1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,17 @@ enum cpuid_regs { CR_EBX }; +static int supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && + (lo & CPUID_FAULTING_SUPPORT)) + return 1; + else + return 0; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } -- 2.9.3
[PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.
arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for now. Rename the second arg to a more generic name. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/include/asm/proto.h | 5 - arch/x86/kernel/process.c | 10 ++ arch/x86/kernel/process_64.c | 33 + arch/x86/kernel/ptrace.c | 8 arch/x86/um/Makefile | 2 +- arch/x86/um/syscalls_32.c | 7 +++ arch/x86/um/syscalls_64.c | 4 ++-- include/linux/compat.h | 2 ++ 9 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..666fa61 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl compat_sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 9b9b30b..f0e86aa 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,9 @@ void x86_report_nx(void); extern int reboot_force; -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +long do_arch_prctl_common(struct task_struct *task, int code, unsigned long addr); +#ifdef CONFIG_X86_64 +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long addr); +#endif #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..1421451 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -567,3 +567,13 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl_common(struct task_struct *task, int code, unsigned long arg2) +{ + return -EINVAL; +} + +asmlinkage long compat_sys_arch_prctl(int code, unsigned long arg2) +{ + return do_arch_prctl_common(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..0e44608 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -196,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, (struct user_desc __user *)tls, 0); else #endif - err = do_arch_prctl(p, ARCH_SET_FS, tls); + err = do_arch_prctl_64(p, ARCH_SET_FS, tls); if (err) goto out; } @@ -524,7 +525,7 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) { int ret = 0; int doit = task == current; @@ -532,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) switch (code) { case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; - task->thread.gsbase = addr; + task->thread.gsbase = arg2; if (doit) { load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); } put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry with gs */ - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; - task->thread.fsbase = addr; + task->thread.fsbase = arg2; if (doit) { /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); } put_cpu(); break; case ARCH_GET_FS: {
[PATCH v3 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
Signed-off-by: Kyle Huey --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 14 ++ 3 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..83908d5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define CPUID_FAULTING_SUPPORT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..d502da1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,17 @@ enum cpuid_regs { CR_EBX }; +static int supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && + (lo & CPUID_FAULTING_SUPPORT)) + return 1; + else + return 0; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } -- 2.9.3
[PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.
arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for now. Rename the second arg to a more generic name. Signed-off-by: Kyle Huey --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/include/asm/proto.h | 5 - arch/x86/kernel/process.c | 10 ++ arch/x86/kernel/process_64.c | 33 + arch/x86/kernel/ptrace.c | 8 arch/x86/um/Makefile | 2 +- arch/x86/um/syscalls_32.c | 7 +++ arch/x86/um/syscalls_64.c | 4 ++-- include/linux/compat.h | 2 ++ 9 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 arch/x86/um/syscalls_32.c diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..666fa61 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl compat_sys_arch_prctl compat_sys_arch_prctl diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 9b9b30b..f0e86aa 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -30,6 +30,9 @@ void x86_report_nx(void); extern int reboot_force; -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +long do_arch_prctl_common(struct task_struct *task, int code, unsigned long addr); +#ifdef CONFIG_X86_64 +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long addr); +#endif #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..1421451 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -567,3 +567,13 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl_common(struct task_struct *task, int code, unsigned long arg2) +{ + return -EINVAL; +} + +asmlinkage long compat_sys_arch_prctl(int code, unsigned long arg2) +{ + return do_arch_prctl_common(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..0e44608 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -196,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, (struct user_desc __user *)tls, 0); else #endif - err = do_arch_prctl(p, ARCH_SET_FS, tls); + err = do_arch_prctl_64(p, ARCH_SET_FS, tls); if (err) goto out; } @@ -524,7 +525,7 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2) { int ret = 0; int doit = task == current; @@ -532,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) switch (code) { case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; - task->thread.gsbase = addr; + task->thread.gsbase = arg2; if (doit) { load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); } put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry with gs */ - if (addr >= TASK_SIZE_MAX) + if (arg2 >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; - task->thread.fsbase = addr; + task->thread.fsbase = arg2; if (doit) { /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); } put_cpu(); break; case ARCH_GET_FS: { unsigned long base; +
[PATCH v3] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. The following changes have been made since v2. Patch 1: - Use of compat_sys_arch_prctl and separate do_arch_prctl_[common|64] functions to separate generic and 64-bit only arch_prctls. Patch 2: - The hack to suppress the mistakenly advertised CPUID faulting support in Xen guests is removed. Doing this for both PV and HVM guests is quite tricky, and likely more trouble than it's worth. Instead I'll submit a patch to Xen. Patch 3: - TIF_NOCPUID is now droppped on exec. I added the arch_post_exec hook as I didn't see any existing place to run arch-specific code during exec. The test is updated for the new exec behavior.
[PATCH v3] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. The following changes have been made since v2. Patch 1: - Use of compat_sys_arch_prctl and separate do_arch_prctl_[common|64] functions to separate generic and 64-bit only arch_prctls. Patch 2: - The hack to suppress the mistakenly advertised CPUID faulting support in Xen guests is removed. Doing this for both PV and HVM guests is quite tricky, and likely more trouble than it's worth. Instead I'll submit a patch to Xen. Patch 3: - TIF_NOCPUID is now droppped on exec. I added the arch_post_exec hook as I didn't see any existing place to run arch-specific code during exec. The test is updated for the new exec behavior.
Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <jbeul...@suse.com> wrote: >>>> On 15.09.16 at 12:05, <david.vra...@citrix.com> wrote: >> On 14/09/16 22:01, Kyle Huey wrote: >>> Xen advertises the underlying support for CPUID faulting but not does pass >>> through writes to the relevant MSR, nor does it virtualize it, so it does >>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. >> >> Could you clarify in the commit message that it is PV guests that are >> affected. > > What makes you think HVM ones aren't? Testing on EC2, HVM guests are affected as well. Not sure what to do about that. - Kyle
Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich wrote: >>>> On 15.09.16 at 12:05, wrote: >> On 14/09/16 22:01, Kyle Huey wrote: >>> Xen advertises the underlying support for CPUID faulting but not does pass >>> through writes to the relevant MSR, nor does it virtualize it, so it does >>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. >> >> Could you clarify in the commit message that it is PV guests that are >> affected. > > What makes you think HVM ones aren't? Testing on EC2, HVM guests are affected as well. Not sure what to do about that. - Kyle
Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Wed, Sep 14, 2016 at 6:17 PM, Andy Lutomirski <l...@amacapital.net> wrote: > On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey <m...@kylehuey.com> wrote: >> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen >> <dave.han...@linux.intel.com> wrote: >>> On 09/14/2016 02:01 PM, Kyle Huey wrote: > >>> Is any of this useful to optimize away at compile-time? We have config >>> options for when we're running as a guest, and this seems like a feature >>> that isn't available when running on bare metal. >> >> On the contrary, this is only available when we're on bare metal. >> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly >> suppresses MSR_PLATFORM_INFO's report of support for it). > > KVM could easily support this. If rr starts using it, I think KVM > *should* add support, possibly even for older CPUs that don't support > the feature in hardware. > > It's too bad that x86 doesn't give us the instruction bytes on a > fault. Otherwise we could lazily switch this feature. We are *very* interested in having KVM and Xen support virtualization of this feature. I am planning to work on KVM after I get this series of patches in :) - Kyle
Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Wed, Sep 14, 2016 at 6:17 PM, Andy Lutomirski wrote: > On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey wrote: >> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen >> wrote: >>> On 09/14/2016 02:01 PM, Kyle Huey wrote: > >>> Is any of this useful to optimize away at compile-time? We have config >>> options for when we're running as a guest, and this seems like a feature >>> that isn't available when running on bare metal. >> >> On the contrary, this is only available when we're on bare metal. >> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly >> suppresses MSR_PLATFORM_INFO's report of support for it). > > KVM could easily support this. If rr starts using it, I think KVM > *should* add support, possibly even for older CPUs that don't support > the feature in hardware. > > It's too bad that x86 doesn't give us the instruction bytes on a > fault. Otherwise we could lazily switch this feature. We are *very* interested in having KVM and Xen support virtualization of this feature. I am planning to work on KVM after I get this series of patches in :) - Kyle
Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
On Wed, Sep 14, 2016 at 6:54 PM, Andy Lutomirski <l...@amacapital.net> wrote: > On Wed, Sep 14, 2016 at 6:47 PM, Kyle Huey <m...@kylehuey.com> wrote: >> On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <l...@amacapital.net> wrote: >>> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <m...@kylehuey.com> wrote: > >>>> + >>>> +int set_cpuid_mode(struct task_struct *task, unsigned long val) >>>> +{ >>>> + /* Only disable/enable_cpuid() if it is supported on this >>>> hardware. */ >>>> + bool cpuid_fault_supported = >>>> static_cpu_has(X86_FEATURE_CPUID_FAULT); >>>> + >>>> + if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) { >>>> + if (task_no_new_privs(task) && >>>> test_thread_flag(TIF_NOCPUID)) >>>> + return -EACCES; >>> >>> This check seems confused. If this flag were preserved on execve, >>> it's the SIGSEGV mode that would need the check. >> >> Not sure I follow this one. no_new_privs should block transitions >> from SIGSEGV to ENABLE, right? That's what this check does. > > It's the other way around entirely: if you make a change to your > process context such that a subseqently execve()'d setuid program > might malfunction, you've just done something dangerous. This is only > okay, at least in newly-supported instances, if you are either > privileged or if you have no_new_privs set. Having privilege makes it > okay: unprivileged programs can't use it to subvert setuid programs. > no_new_privs makes it safe as well: if no_new_privs is set, you can't > gain privilege via execve(), so there's no attack surface. So, if you > have execve() keep ARCH_CPUID_SIGSEGV set, then setting it that way in > the first place should require privilege or no_new_privs. > > I personally favor resetting to ARCH_CPUID_ENABLE on execve() and not > worrying about no_new_privs. > > Does that make sense? Yes, ok. Robert and I agree that resetting does make the most sense. Using this usefully requires a ptrace supervisor (to catch the traps), which can easily inject a call to arch_prctl to reenable ARCH_CPUID_SIGSEGV when desired. - Kyle
Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
On Wed, Sep 14, 2016 at 6:54 PM, Andy Lutomirski wrote: > On Wed, Sep 14, 2016 at 6:47 PM, Kyle Huey wrote: >> On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski wrote: >>> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey wrote: > >>>> + >>>> +int set_cpuid_mode(struct task_struct *task, unsigned long val) >>>> +{ >>>> + /* Only disable/enable_cpuid() if it is supported on this >>>> hardware. */ >>>> + bool cpuid_fault_supported = >>>> static_cpu_has(X86_FEATURE_CPUID_FAULT); >>>> + >>>> + if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) { >>>> + if (task_no_new_privs(task) && >>>> test_thread_flag(TIF_NOCPUID)) >>>> + return -EACCES; >>> >>> This check seems confused. If this flag were preserved on execve, >>> it's the SIGSEGV mode that would need the check. >> >> Not sure I follow this one. no_new_privs should block transitions >> from SIGSEGV to ENABLE, right? That's what this check does. > > It's the other way around entirely: if you make a change to your > process context such that a subseqently execve()'d setuid program > might malfunction, you've just done something dangerous. This is only > okay, at least in newly-supported instances, if you are either > privileged or if you have no_new_privs set. Having privilege makes it > okay: unprivileged programs can't use it to subvert setuid programs. > no_new_privs makes it safe as well: if no_new_privs is set, you can't > gain privilege via execve(), so there's no attack surface. So, if you > have execve() keep ARCH_CPUID_SIGSEGV set, then setting it that way in > the first place should require privilege or no_new_privs. > > I personally favor resetting to ARCH_CPUID_ENABLE on execve() and not > worrying about no_new_privs. > > Does that make sense? Yes, ok. Robert and I agree that resetting does make the most sense. Using this usefully requires a ptrace supervisor (to catch the traps), which can easily inject a call to arch_prctl to reenable ARCH_CPUID_SIGSEGV when desired. - Kyle
Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <l...@amacapital.net> wrote: > On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <m...@kylehuey.com> wrote: >> Intel supports faulting on the CPUID instruction in newer processors. Bit >> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is >> documented in detail in Section 2.3.2 of >> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf >> >> Signed-off-by: Kyle Huey <kh...@kylehuey.com> >> --- >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/include/asm/thread_info.h| 4 +- >> arch/x86/include/uapi/asm/prctl.h | 6 + >> arch/x86/kernel/process.c | 81 +++ >> tools/testing/selftests/x86/Makefile | 2 +- >> tools/testing/selftests/x86/cpuid-fault.c | 223 >> ++ >> 6 files changed, 315 insertions(+), 2 deletions(-) >> create mode 100644 tools/testing/selftests/x86/cpuid-fault.c >> >> diff --git a/arch/x86/include/asm/msr-index.h >> b/arch/x86/include/asm/msr-index.h >> index 83908d5..4aebec2 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -53,6 +53,7 @@ >> #define MSR_MTRRcap0x00fe >> #define MSR_IA32_BBL_CR_CTL0x0119 >> #define MSR_IA32_BBL_CR_CTL3 0x011e >> +#define MSR_MISC_FEATURES_ENABLES 0x0140 >> >> #define MSR_IA32_SYSENTER_CS 0x0174 >> #define MSR_IA32_SYSENTER_ESP 0x0175 >> diff --git a/arch/x86/include/asm/thread_info.h >> b/arch/x86/include/asm/thread_info.h >> index 8b7c8d8..ec93976 100644 >> --- a/arch/x86/include/asm/thread_info.h >> +++ b/arch/x86/include/asm/thread_info.h >> @@ -93,6 +93,7 @@ struct thread_info { >> #define TIF_SECCOMP8 /* secure computing */ >> #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return >> */ >> #define TIF_UPROBE 12 /* breakpointed or singlestepping */ >> +#define TIF_NOCPUID15 /* CPUID is not accessible in >> userland */ >> #define TIF_NOTSC 16 /* TSC is not accessible in userland >> */ >> #define TIF_IA32 17 /* IA32 compatibility process */ >> #define TIF_FORK 18 /* ret_from_fork */ >> @@ -117,6 +118,7 @@ struct thread_info { >> #define _TIF_SECCOMP (1 << TIF_SECCOMP) >> #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) >> #define _TIF_UPROBE(1 << TIF_UPROBE) >> +#define _TIF_NOCPUID (1 << TIF_NOCPUID) >> #define _TIF_NOTSC (1 << TIF_NOTSC) >> #define _TIF_IA32 (1 << TIF_IA32) >> #define _TIF_FORK (1 << TIF_FORK) >> @@ -146,7 +148,7 @@ struct thread_info { >> >> /* flags to check in __switch_to() */ >> #define _TIF_WORK_CTXSW >>\ >> - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) >> + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) >> >> #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) >> #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) >> diff --git a/arch/x86/include/uapi/asm/prctl.h >> b/arch/x86/include/uapi/asm/prctl.h >> index 3ac5032..c087e55 100644 >> --- a/arch/x86/include/uapi/asm/prctl.h >> +++ b/arch/x86/include/uapi/asm/prctl.h >> @@ -6,4 +6,10 @@ >> #define ARCH_GET_FS 0x1003 >> #define ARCH_GET_GS 0x1004 >> >> +/* Get/set the process' ability to use the CPUID instruction */ >> +#define ARCH_GET_CPUID 0x1005 >> +#define ARCH_SET_CPUID 0x1006 >> +# define ARCH_CPUID_ENABLE 1 /* allow the use of the >> CPUID instruction */ >> +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead >> of reading the CPUID */ >> + >> #endif /* _ASM_X86_PRCTL_H */ >> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c >> index 0f857c3..5fc8e9d 100644 >> --- a/arch/x86/kernel/process.c >> +++ b/arch/x86/kernel/process.c >> @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val) >> return 0; >> } >> >> +static void switch_cpuid_faulting(bool on) >> +{ >> + if (on) >> + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); >> + else >> + ms
Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski wrote: > On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey wrote: >> Intel supports faulting on the CPUID instruction in newer processors. Bit >> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is >> documented in detail in Section 2.3.2 of >> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf >> >> Signed-off-by: Kyle Huey >> --- >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/include/asm/thread_info.h| 4 +- >> arch/x86/include/uapi/asm/prctl.h | 6 + >> arch/x86/kernel/process.c | 81 +++ >> tools/testing/selftests/x86/Makefile | 2 +- >> tools/testing/selftests/x86/cpuid-fault.c | 223 >> ++ >> 6 files changed, 315 insertions(+), 2 deletions(-) >> create mode 100644 tools/testing/selftests/x86/cpuid-fault.c >> >> diff --git a/arch/x86/include/asm/msr-index.h >> b/arch/x86/include/asm/msr-index.h >> index 83908d5..4aebec2 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -53,6 +53,7 @@ >> #define MSR_MTRRcap0x00fe >> #define MSR_IA32_BBL_CR_CTL0x0119 >> #define MSR_IA32_BBL_CR_CTL3 0x011e >> +#define MSR_MISC_FEATURES_ENABLES 0x0140 >> >> #define MSR_IA32_SYSENTER_CS 0x0174 >> #define MSR_IA32_SYSENTER_ESP 0x0175 >> diff --git a/arch/x86/include/asm/thread_info.h >> b/arch/x86/include/asm/thread_info.h >> index 8b7c8d8..ec93976 100644 >> --- a/arch/x86/include/asm/thread_info.h >> +++ b/arch/x86/include/asm/thread_info.h >> @@ -93,6 +93,7 @@ struct thread_info { >> #define TIF_SECCOMP8 /* secure computing */ >> #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return >> */ >> #define TIF_UPROBE 12 /* breakpointed or singlestepping */ >> +#define TIF_NOCPUID15 /* CPUID is not accessible in >> userland */ >> #define TIF_NOTSC 16 /* TSC is not accessible in userland >> */ >> #define TIF_IA32 17 /* IA32 compatibility process */ >> #define TIF_FORK 18 /* ret_from_fork */ >> @@ -117,6 +118,7 @@ struct thread_info { >> #define _TIF_SECCOMP (1 << TIF_SECCOMP) >> #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) >> #define _TIF_UPROBE(1 << TIF_UPROBE) >> +#define _TIF_NOCPUID (1 << TIF_NOCPUID) >> #define _TIF_NOTSC (1 << TIF_NOTSC) >> #define _TIF_IA32 (1 << TIF_IA32) >> #define _TIF_FORK (1 << TIF_FORK) >> @@ -146,7 +148,7 @@ struct thread_info { >> >> /* flags to check in __switch_to() */ >> #define _TIF_WORK_CTXSW >>\ >> - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) >> + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) >> >> #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) >> #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) >> diff --git a/arch/x86/include/uapi/asm/prctl.h >> b/arch/x86/include/uapi/asm/prctl.h >> index 3ac5032..c087e55 100644 >> --- a/arch/x86/include/uapi/asm/prctl.h >> +++ b/arch/x86/include/uapi/asm/prctl.h >> @@ -6,4 +6,10 @@ >> #define ARCH_GET_FS 0x1003 >> #define ARCH_GET_GS 0x1004 >> >> +/* Get/set the process' ability to use the CPUID instruction */ >> +#define ARCH_GET_CPUID 0x1005 >> +#define ARCH_SET_CPUID 0x1006 >> +# define ARCH_CPUID_ENABLE 1 /* allow the use of the >> CPUID instruction */ >> +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead >> of reading the CPUID */ >> + >> #endif /* _ASM_X86_PRCTL_H */ >> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c >> index 0f857c3..5fc8e9d 100644 >> --- a/arch/x86/kernel/process.c >> +++ b/arch/x86/kernel/process.c >> @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val) >> return 0; >> } >> >> +static void switch_cpuid_faulting(bool on) >> +{ >> + if (on) >> + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); >> + else >> + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); >> +} &
Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 3:29 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote: > 2016-09-15 1:08 GMT+03:00 Kyle Huey <m...@kylehuey.com>: >> On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote: >>> 2016-09-15 0:08 GMT+03:00 Kyle Huey <m...@kylehuey.com>: >>>> Signed-off-by: Kyle Huey <kh...@kylehuey.com> >>>> --- >>>> arch/x86/entry/syscalls/syscall_32.tbl | 1 + >>>> arch/x86/kernel/process.c | 80 >>>> ++ >>>> arch/x86/kernel/process_64.c | 66 >>>> 3 files changed, 81 insertions(+), 66 deletions(-) >>>> >>>> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl >>>> b/arch/x86/entry/syscalls/syscall_32.tbl >>>> index f848572..3b6965b 100644 >>>> --- a/arch/x86/entry/syscalls/syscall_32.tbl >>>> +++ b/arch/x86/entry/syscalls/syscall_32.tbl >>>> @@ -386,3 +386,4 @@ >>>> 377i386copy_file_range sys_copy_file_range >>>> 378i386preadv2 sys_preadv2 >>>> compat_sys_preadv2 >>>> 379i386pwritev2sys_pwritev2 >>>> compat_sys_pwritev2 >>>> +380i386arch_prctl sys_arch_prctl >>> >>> Why not define it as other 32-bit syscalls with compat_sys_ prefix >>> with the help of COMPAT_SYSCALL_DEFINE() macro? >>> Then you could omit code moving, drop is_32 helper. >>> I miss something obvious? >> >> The code will have to move regardless, because right now do_arch_prctl >> is in process-64.c which is only compiled on a 64 bit kernel. > > Why? This code will not work anyway for 32-bit in your patches > by obscuring it with is_32. > >> As I told Dave Hansen in the non-RESEND thread (not sure why >> git-send-email didn't put him in this one ...) I considered doing a >> compat_sys_arch_prctl that would reject the relevant arch_prctls that >> don't apply on 32 bit but I didn't see any prior art for it (in my >> admittedly non-exhaustive search). > > Well, you could just add to 64-bit do_arch_prctl() new cases for your > prctls - that would be just a two-lines for each new prctl. > Also add compat_sys_ and define *only* what's needed there for you, > do not add there ARCH_{SET,GET}_{FS,GS}. > Does this make sense? Yeah, I should have spoken more clearly. We'll need some implementation of the syscall outside of process_64.c. But we could leave the 64 bit specific stuff behind in it. Dave Hansen suggested something similar (though without the compat_sys_bit) >FWIW, I don't think it would be horrible to leave the existing > do_arch_prctl() code in process_64.h and call it > do_64_bit_only_something_arch_prctl(), and only call in to it from the > generic do_arch_prctl(). You really have one reason for all the "if > (is_32)"'s and it would be nice to document why in one single place. - Kyle
Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 3:29 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote: > 2016-09-15 1:08 GMT+03:00 Kyle Huey : >> On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote: >>> 2016-09-15 0:08 GMT+03:00 Kyle Huey : >>>> Signed-off-by: Kyle Huey >>>> --- >>>> arch/x86/entry/syscalls/syscall_32.tbl | 1 + >>>> arch/x86/kernel/process.c | 80 >>>> ++ >>>> arch/x86/kernel/process_64.c | 66 >>>> 3 files changed, 81 insertions(+), 66 deletions(-) >>>> >>>> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl >>>> b/arch/x86/entry/syscalls/syscall_32.tbl >>>> index f848572..3b6965b 100644 >>>> --- a/arch/x86/entry/syscalls/syscall_32.tbl >>>> +++ b/arch/x86/entry/syscalls/syscall_32.tbl >>>> @@ -386,3 +386,4 @@ >>>> 377i386copy_file_range sys_copy_file_range >>>> 378i386preadv2 sys_preadv2 >>>> compat_sys_preadv2 >>>> 379i386pwritev2sys_pwritev2 >>>> compat_sys_pwritev2 >>>> +380i386arch_prctl sys_arch_prctl >>> >>> Why not define it as other 32-bit syscalls with compat_sys_ prefix >>> with the help of COMPAT_SYSCALL_DEFINE() macro? >>> Then you could omit code moving, drop is_32 helper. >>> I miss something obvious? >> >> The code will have to move regardless, because right now do_arch_prctl >> is in process-64.c which is only compiled on a 64 bit kernel. > > Why? This code will not work anyway for 32-bit in your patches > by obscuring it with is_32. > >> As I told Dave Hansen in the non-RESEND thread (not sure why >> git-send-email didn't put him in this one ...) I considered doing a >> compat_sys_arch_prctl that would reject the relevant arch_prctls that >> don't apply on 32 bit but I didn't see any prior art for it (in my >> admittedly non-exhaustive search). > > Well, you could just add to 64-bit do_arch_prctl() new cases for your > prctls - that would be just a two-lines for each new prctl. > Also add compat_sys_ and define *only* what's needed there for you, > do not add there ARCH_{SET,GET}_{FS,GS}. > Does this make sense? Yeah, I should have spoken more clearly. We'll need some implementation of the syscall outside of process_64.c. But we could leave the 64 bit specific stuff behind in it. Dave Hansen suggested something similar (though without the compat_sys_bit) >FWIW, I don't think it would be horrible to leave the existing > do_arch_prctl() code in process_64.h and call it > do_64_bit_only_something_arch_prctl(), and only call in to it from the > generic do_arch_prctl(). You really have one reason for all the "if > (is_32)"'s and it would be nice to document why in one single place. - Kyle
Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote: > 2016-09-15 0:08 GMT+03:00 Kyle Huey <m...@kylehuey.com>: >> Signed-off-by: Kyle Huey <kh...@kylehuey.com> >> --- >> arch/x86/entry/syscalls/syscall_32.tbl | 1 + >> arch/x86/kernel/process.c | 80 >> ++ >> arch/x86/kernel/process_64.c | 66 >> 3 files changed, 81 insertions(+), 66 deletions(-) >> >> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl >> b/arch/x86/entry/syscalls/syscall_32.tbl >> index f848572..3b6965b 100644 >> --- a/arch/x86/entry/syscalls/syscall_32.tbl >> +++ b/arch/x86/entry/syscalls/syscall_32.tbl >> @@ -386,3 +386,4 @@ >> 377i386copy_file_range sys_copy_file_range >> 378i386preadv2 sys_preadv2 >> compat_sys_preadv2 >> 379i386pwritev2sys_pwritev2 >> compat_sys_pwritev2 >> +380i386arch_prctl sys_arch_prctl > > Why not define it as other 32-bit syscalls with compat_sys_ prefix > with the help of COMPAT_SYSCALL_DEFINE() macro? > Then you could omit code moving, drop is_32 helper. > I miss something obvious? The code will have to move regardless, because right now do_arch_prctl is in process-64.c which is only compiled on a 64 bit kernel. As I told Dave Hansen in the non-RESEND thread (not sure why git-send-email didn't put him in this one ...) I considered doing a compat_sys_arch_prctl that would reject the relevant arch_prctls that don't apply on 32 bit but I didn't see any prior art for it (in my admittedly non-exhaustive search). - Kyle
Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote: > 2016-09-15 0:08 GMT+03:00 Kyle Huey : >> Signed-off-by: Kyle Huey >> --- >> arch/x86/entry/syscalls/syscall_32.tbl | 1 + >> arch/x86/kernel/process.c | 80 >> ++ >> arch/x86/kernel/process_64.c | 66 >> 3 files changed, 81 insertions(+), 66 deletions(-) >> >> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl >> b/arch/x86/entry/syscalls/syscall_32.tbl >> index f848572..3b6965b 100644 >> --- a/arch/x86/entry/syscalls/syscall_32.tbl >> +++ b/arch/x86/entry/syscalls/syscall_32.tbl >> @@ -386,3 +386,4 @@ >> 377i386copy_file_range sys_copy_file_range >> 378i386preadv2 sys_preadv2 >> compat_sys_preadv2 >> 379i386pwritev2sys_pwritev2 >> compat_sys_pwritev2 >> +380i386arch_prctl sys_arch_prctl > > Why not define it as other 32-bit syscalls with compat_sys_ prefix > with the help of COMPAT_SYSCALL_DEFINE() macro? > Then you could omit code moving, drop is_32 helper. > I miss something obvious? The code will have to move regardless, because right now do_arch_prctl is in process-64.c which is only compiled on a 64 bit kernel. As I told Dave Hansen in the non-RESEND thread (not sure why git-send-email didn't put him in this one ...) I considered doing a compat_sys_arch_prctl that would reject the relevant arch_prctls that don't apply on 32 bit but I didn't see any prior art for it (in my admittedly non-exhaustive search). - Kyle
Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen <dave.han...@linux.intel.com> wrote: > On 09/14/2016 02:01 PM, Kyle Huey wrote: >> Xen advertises the underlying support for CPUID faulting but not does pass >> through writes to the relevant MSR, nor does it virtualize it, so it does >> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. > > That needs to make it into a comment, please. > > That *is* a Xen bug, right? Yes. Xen needs to either not advertise the feature or actually support it. This came up in the prior thread ("[PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction."). >> Signed-off-by: Kyle Huey <kh...@kylehuey.com> >> --- >> arch/x86/include/asm/cpufeatures.h | 1 + >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/kernel/cpu/scattered.c| 14 ++ >> arch/x86/xen/enlighten.c | 3 +++ >> 4 files changed, 19 insertions(+) >> >> diff --git a/arch/x86/include/asm/cpufeatures.h >> b/arch/x86/include/asm/cpufeatures.h >> index 92a8308..78b9d06 100644 >> --- a/arch/x86/include/asm/cpufeatures.h >> +++ b/arch/x86/include/asm/cpufeatures.h >> @@ -190,6 +190,7 @@ >> >> #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance >> Boost */ >> #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS >> support */ >> +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ >> >> #define X86_FEATURE_HW_PSTATE( 7*32+ 8) /* AMD HW-PState */ >> #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ >> diff --git a/arch/x86/include/asm/msr-index.h >> b/arch/x86/include/asm/msr-index.h >> index 56f4c66..83908d5 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -41,6 +41,7 @@ >> #define MSR_IA32_PERFCTR10x00c2 >> #define MSR_FSB_FREQ 0x00cd >> #define MSR_PLATFORM_INFO0x00ce >> +#define CPUID_FAULTING_SUPPORT (1UL << 31) >> >> #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x00e2 >> #define NHM_C3_AUTO_DEMOTE (1UL << 25) >> diff --git a/arch/x86/kernel/cpu/scattered.c >> b/arch/x86/kernel/cpu/scattered.c >> index 8cb57df..d502da1 100644 >> --- a/arch/x86/kernel/cpu/scattered.c >> +++ b/arch/x86/kernel/cpu/scattered.c >> @@ -24,6 +24,17 @@ enum cpuid_regs { >> CR_EBX >> }; >> >> +static int supports_cpuid_faulting(void) >> +{ >> + unsigned int lo, hi; >> + >> + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && >> + (lo & CPUID_FAULTING_SUPPORT)) >> + return 1; >> + else >> + return 0; >> +} > > Is any of this useful to optimize away at compile-time? We have config > options for when we're running as a guest, and this seems like a feature > that isn't available when running on bare metal. On the contrary, this is only available when we're on bare metal. Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly suppresses MSR_PLATFORM_INFO's report of support for it). >> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c >> index b86ebb1..2c47f0c 100644 >> --- a/arch/x86/xen/enlighten.c >> +++ b/arch/x86/xen/enlighten.c >> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int >> *err) >> #endif >> val &= ~X2APIC_ENABLE; >> break; >> + case MSR_PLATFORM_INFO: >> + val &= ~CPUID_FAULTING_SUPPORT; >> + break; >> } >> return val; >> } > > Does this mean that Xen guests effectively can't take advantage of this > feature? Yes. - Kyle
Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen wrote: > On 09/14/2016 02:01 PM, Kyle Huey wrote: >> Xen advertises the underlying support for CPUID faulting but not does pass >> through writes to the relevant MSR, nor does it virtualize it, so it does >> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. > > That needs to make it into a comment, please. > > That *is* a Xen bug, right? Yes. Xen needs to either not advertise the feature or actually support it. This came up in the prior thread ("[PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction."). >> Signed-off-by: Kyle Huey >> --- >> arch/x86/include/asm/cpufeatures.h | 1 + >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/kernel/cpu/scattered.c| 14 ++ >> arch/x86/xen/enlighten.c | 3 +++ >> 4 files changed, 19 insertions(+) >> >> diff --git a/arch/x86/include/asm/cpufeatures.h >> b/arch/x86/include/asm/cpufeatures.h >> index 92a8308..78b9d06 100644 >> --- a/arch/x86/include/asm/cpufeatures.h >> +++ b/arch/x86/include/asm/cpufeatures.h >> @@ -190,6 +190,7 @@ >> >> #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance >> Boost */ >> #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS >> support */ >> +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ >> >> #define X86_FEATURE_HW_PSTATE( 7*32+ 8) /* AMD HW-PState */ >> #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ >> diff --git a/arch/x86/include/asm/msr-index.h >> b/arch/x86/include/asm/msr-index.h >> index 56f4c66..83908d5 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -41,6 +41,7 @@ >> #define MSR_IA32_PERFCTR10x00c2 >> #define MSR_FSB_FREQ 0x00cd >> #define MSR_PLATFORM_INFO0x00ce >> +#define CPUID_FAULTING_SUPPORT (1UL << 31) >> >> #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x00e2 >> #define NHM_C3_AUTO_DEMOTE (1UL << 25) >> diff --git a/arch/x86/kernel/cpu/scattered.c >> b/arch/x86/kernel/cpu/scattered.c >> index 8cb57df..d502da1 100644 >> --- a/arch/x86/kernel/cpu/scattered.c >> +++ b/arch/x86/kernel/cpu/scattered.c >> @@ -24,6 +24,17 @@ enum cpuid_regs { >> CR_EBX >> }; >> >> +static int supports_cpuid_faulting(void) >> +{ >> + unsigned int lo, hi; >> + >> + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && >> + (lo & CPUID_FAULTING_SUPPORT)) >> + return 1; >> + else >> + return 0; >> +} > > Is any of this useful to optimize away at compile-time? We have config > options for when we're running as a guest, and this seems like a feature > that isn't available when running on bare metal. On the contrary, this is only available when we're on bare metal. Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly suppresses MSR_PLATFORM_INFO's report of support for it). >> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c >> index b86ebb1..2c47f0c 100644 >> --- a/arch/x86/xen/enlighten.c >> +++ b/arch/x86/xen/enlighten.c >> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int >> *err) >> #endif >> val &= ~X2APIC_ENABLE; >> break; >> + case MSR_PLATFORM_INFO: >> + val &= ~CPUID_FAULTING_SUPPORT; >> + break; >> } >> return val; >> } > > Does this mean that Xen guests effectively can't take advantage of this > feature? Yes. - Kyle
Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 2:46 PM, Dave Hansen <dave.han...@linux.intel.com> wrote: > On 09/14/2016 02:35 PM, Kyle Huey wrote: >> It's not quite a plain move. To leave the existing arch_prctls only >> accessible to 64 bit callers, I added the is_32 bit and the four early >> returns for each existing ARCH_BLAH. These cases are now >> conditionally compiled out in a 32 bit kernel, so we only have to >> handle the 32 bit process on a 64 bit kernel case at runtime. > > I think it would make a lot of sense to do the move and the modification > in two patches. Ok. > Oh, and arch_prctl() really *is* 64-bit only. I didn't realize that. > That would have been nice to call out in the changelog, too. It's > totally non-obvious. Ok. > You're going to owe some manpage updates after this too, I guess. It > says: "arch_prctl() is supported only on Linux/x86-64 for 64-bit > programs currently." Indeed. There's a patch at the end of the series (sent to LKML, but you're not directly CCd on it) with a suggested manpage patch. > FWIW, I don't think it would be horrible to leave the existing > do_arch_prctl() code in process_64.h and call it > do_64_bit_only_something_arch_prctl(), and only call in to it from the > generic do_arch_prctl(). You really have one reason for all the "if > (is_32)"'s and it would be nice to document why in one single place. Yeah, that seems like a good idea. - Kyle
Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 2:46 PM, Dave Hansen wrote: > On 09/14/2016 02:35 PM, Kyle Huey wrote: >> It's not quite a plain move. To leave the existing arch_prctls only >> accessible to 64 bit callers, I added the is_32 bit and the four early >> returns for each existing ARCH_BLAH. These cases are now >> conditionally compiled out in a 32 bit kernel, so we only have to >> handle the 32 bit process on a 64 bit kernel case at runtime. > > I think it would make a lot of sense to do the move and the modification > in two patches. Ok. > Oh, and arch_prctl() really *is* 64-bit only. I didn't realize that. > That would have been nice to call out in the changelog, too. It's > totally non-obvious. Ok. > You're going to owe some manpage updates after this too, I guess. It > says: "arch_prctl() is supported only on Linux/x86-64 for 64-bit > programs currently." Indeed. There's a patch at the end of the series (sent to LKML, but you're not directly CCd on it) with a suggested manpage patch. > FWIW, I don't think it would be horrible to leave the existing > do_arch_prctl() code in process_64.h and call it > do_64_bit_only_something_arch_prctl(), and only call in to it from the > generic do_arch_prctl(). You really have one reason for all the "if > (is_32)"'s and it would be nice to document why in one single place. Yeah, that seems like a good idea. - Kyle
Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 2:29 PM, Dave Hansen <dave.han...@linux.intel.com> wrote: > On 09/14/2016 02:01 PM, Kyle Huey wrote: >> Signed-off-by: Kyle Huey <kh...@kylehuey.com> >> --- >> arch/x86/entry/syscalls/syscall_32.tbl | 1 + >> arch/x86/kernel/process.c | 80 >> ++ >> arch/x86/kernel/process_64.c | 66 >> 3 files changed, 81 insertions(+), 66 deletions(-) > > Could you explain a bit about what is going on here? Is it just a plain > old code move, _why_ you had to do it this way, etc...? Sure. In the subsequent patches in this series I add an arch_prctl that is available for both 64 and 32 bit programs/kernels. Since process_64.c is only built for 64 bit kernels, this syscall can't stay there anymore. It's not quite a plain move. To leave the existing arch_prctls only accessible to 64 bit callers, I added the is_32 bit and the four early returns for each existing ARCH_BLAH. These cases are now conditionally compiled out in a 32 bit kernel, so we only have to handle the 32 bit process on a 64 bit kernel case at runtime. I considered doing this instead with a compat wrapper for the syscall on 32 bit systems that would filter these arch_prctls before getting to do_arch_prctl. I didn't see any prior art for it, so decided not to proceed that way. - Kyle
Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
On Wed, Sep 14, 2016 at 2:29 PM, Dave Hansen wrote: > On 09/14/2016 02:01 PM, Kyle Huey wrote: >> Signed-off-by: Kyle Huey >> --- >> arch/x86/entry/syscalls/syscall_32.tbl | 1 + >> arch/x86/kernel/process.c | 80 >> ++ >> arch/x86/kernel/process_64.c | 66 >> 3 files changed, 81 insertions(+), 66 deletions(-) > > Could you explain a bit about what is going on here? Is it just a plain > old code move, _why_ you had to do it this way, etc...? Sure. In the subsequent patches in this series I add an arch_prctl that is available for both 64 and 32 bit programs/kernels. Since process_64.c is only built for 64 bit kernels, this syscall can't stay there anymore. It's not quite a plain move. To leave the existing arch_prctls only accessible to 64 bit callers, I added the is_32 bit and the four early returns for each existing ARCH_BLAH. These cases are now conditionally compiled out in a 32 bit kernel, so we only have to handle the 32 bit process on a 64 bit kernel case at runtime. I considered doing this instead with a compat wrapper for the syscall on 32 bit systems that would filter these arch_prctls before getting to do_arch_prctl. I didn't see any prior art for it, so decided not to proceed that way. - Kyle
[PATCH (man-pages)] arch_prctl.2: Note new support on x86-32, ARCH_[GET|SET]_CPUID.
Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- man2/arch_prctl.2 | 73 +-- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/man2/arch_prctl.2 b/man2/arch_prctl.2 index 989d369..c388797 100644 --- a/man2/arch_prctl.2 +++ b/man2/arch_prctl.2 @@ -22,7 +22,7 @@ .\" the source, must acknowledge the copyright and authors of this work. .\" %%%LICENSE_END .\" -.TH ARCH_PRCTL 2 2015-02-21 "Linux" "Linux Programmer's Manual" +.TH ARCH_PRCTL 2 2016-09-14 "Linux" "Linux Programmer's Manual" .SH NAME arch_prctl \- set architecture-specific thread state .SH SYNOPSIS @@ -31,8 +31,8 @@ arch_prctl \- set architecture-specific thread state .br .B #include .sp -.BI "int arch_prctl(int " code ", unsigned long " addr ); -.BI "int arch_prctl(int " code ", unsigned long *" addr ); +.BI "int arch_prctl(int " code ", unsigned long " arg2 ); +.BI "int arch_prctl(int " code ", unsigned long *" arg2 ); .fi .SH DESCRIPTION The @@ -41,22 +41,47 @@ function sets architecture-specific process or thread state. .I code selects a subfunction and passes argument -.I addr +.I arg2 to it; -.I addr +.I arg2 is interpreted as either an .I "unsigned long" for the "set" operations, or as an .IR "unsigned long\ *" , for the "get" operations. .LP +Subfunctions for both x86-64 and x86-32 are: +.TP +.B ARCH_GET_CPUID " (since Linux 4.X)" +Return the state of the flag determining whether the +.I cpuid +instruction can be executed by the process, in the +.I unsigned long +pointed to by +.IR arg2 . +.TP +.B ARCH_SET_CPUID " (since Linux 4.X)" +Set the state of the flag determining whether the +.I cpuid +instruction can be executed by the process. Pass +.B ARCH_CPUID_ENABLE +in +.I arg2 +to allow it to be executed, or +.B ARCH_CPUID_SIGSEGV +to generate a +.B SIGSEGV +when the process tries to execute the +.I cpuid +instruction. This flag is propagated across fork and exec. +.LP Subfunctions for x86-64 are: .TP .B ARCH_SET_FS Set the 64-bit base for the .I FS register to -.IR addr . +.IR arg2 . .TP .B ARCH_GET_FS Return the 64-bit base value for the @@ -64,13 +89,13 @@ Return the 64-bit base value for the register of the current thread in the .I unsigned long pointed to by -.IR addr . +.IR arg2 . .TP .B ARCH_SET_GS Set the 64-bit base for the .I GS register to -.IR addr . +.IR arg2 . .TP .B ARCH_GET_GS Return the 64-bit base value for the @@ -78,7 +103,7 @@ Return the 64-bit base value for the register of the current thread in the .I unsigned long pointed to by -.IR addr . +.IR arg2 . .SH RETURN VALUE On success, .BR arch_prctl () @@ -87,26 +112,48 @@ returns 0; on error, \-1 is returned, and is set to indicate the error. .SH ERRORS .TP +.B EACCES +.I code +is +.B ARCH_SET_CPUID +and +.I arg2 +is +.B ARCH_CPUID_ENABLE +and cpuid was previously disabled with +.B ARCH_CPUID_SIGSEGV +and the +.I no_new_privs +bit is set on this thread. +.TP .B EFAULT -.I addr +.I arg2 points to an unmapped address or is outside the process address space. .TP .B EINVAL .I code is not a valid subcommand. .TP +.B EINVAL +.I code +is +.B ARCH_SET_CPUID +and +.I cpuid +faulting is not supported on this machine. +.TP .B EPERM -.I addr +.I arg2 is outside the process address space. .\" .SH AUTHOR .\" Man page written by Andi Kleen. .SH CONFORMING TO .BR arch_prctl () -is a Linux/x86-64 extension and should not be used in programs intended +is a Linux/x86 extension and should not be used in programs intended to be portable. .SH NOTES .BR arch_prctl () -is supported only on Linux/x86-64 for 64-bit programs currently. +is supported only on Linux/x86 currently. The 64-bit base changes when a new 32-bit segment selector is loaded. -- 2.7.4
[RESEND][PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 4 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 81 +++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 223 ++ 6 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83908d5..4aebec2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..ec93976 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0f857c3..5fc8e9d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val) return 0; } +static void switch_cpuid_faulting(bool on) +{ + if (on) + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); + else + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(true); + preempt_enable(); +} + +static void enable_cpuid(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(false); + preempt_enable(); +} + +int get_cpuid_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOCPUID)) + val = ARCH_CPUID_SIGSEGV; + else + val = ARCH_CPUID_ENABLE; + + return put_user(val, (unsigned in
[PATCH (man-pages)] arch_prctl.2: Note new support on x86-32, ARCH_[GET|SET]_CPUID.
Signed-off-by: Kyle Huey --- man2/arch_prctl.2 | 73 +-- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/man2/arch_prctl.2 b/man2/arch_prctl.2 index 989d369..c388797 100644 --- a/man2/arch_prctl.2 +++ b/man2/arch_prctl.2 @@ -22,7 +22,7 @@ .\" the source, must acknowledge the copyright and authors of this work. .\" %%%LICENSE_END .\" -.TH ARCH_PRCTL 2 2015-02-21 "Linux" "Linux Programmer's Manual" +.TH ARCH_PRCTL 2 2016-09-14 "Linux" "Linux Programmer's Manual" .SH NAME arch_prctl \- set architecture-specific thread state .SH SYNOPSIS @@ -31,8 +31,8 @@ arch_prctl \- set architecture-specific thread state .br .B #include .sp -.BI "int arch_prctl(int " code ", unsigned long " addr ); -.BI "int arch_prctl(int " code ", unsigned long *" addr ); +.BI "int arch_prctl(int " code ", unsigned long " arg2 ); +.BI "int arch_prctl(int " code ", unsigned long *" arg2 ); .fi .SH DESCRIPTION The @@ -41,22 +41,47 @@ function sets architecture-specific process or thread state. .I code selects a subfunction and passes argument -.I addr +.I arg2 to it; -.I addr +.I arg2 is interpreted as either an .I "unsigned long" for the "set" operations, or as an .IR "unsigned long\ *" , for the "get" operations. .LP +Subfunctions for both x86-64 and x86-32 are: +.TP +.B ARCH_GET_CPUID " (since Linux 4.X)" +Return the state of the flag determining whether the +.I cpuid +instruction can be executed by the process, in the +.I unsigned long +pointed to by +.IR arg2 . +.TP +.B ARCH_SET_CPUID " (since Linux 4.X)" +Set the state of the flag determining whether the +.I cpuid +instruction can be executed by the process. Pass +.B ARCH_CPUID_ENABLE +in +.I arg2 +to allow it to be executed, or +.B ARCH_CPUID_SIGSEGV +to generate a +.B SIGSEGV +when the process tries to execute the +.I cpuid +instruction. This flag is propagated across fork and exec. +.LP Subfunctions for x86-64 are: .TP .B ARCH_SET_FS Set the 64-bit base for the .I FS register to -.IR addr . +.IR arg2 . .TP .B ARCH_GET_FS Return the 64-bit base value for the @@ -64,13 +89,13 @@ Return the 64-bit base value for the register of the current thread in the .I unsigned long pointed to by -.IR addr . +.IR arg2 . .TP .B ARCH_SET_GS Set the 64-bit base for the .I GS register to -.IR addr . +.IR arg2 . .TP .B ARCH_GET_GS Return the 64-bit base value for the @@ -78,7 +103,7 @@ Return the 64-bit base value for the register of the current thread in the .I unsigned long pointed to by -.IR addr . +.IR arg2 . .SH RETURN VALUE On success, .BR arch_prctl () @@ -87,26 +112,48 @@ returns 0; on error, \-1 is returned, and is set to indicate the error. .SH ERRORS .TP +.B EACCES +.I code +is +.B ARCH_SET_CPUID +and +.I arg2 +is +.B ARCH_CPUID_ENABLE +and cpuid was previously disabled with +.B ARCH_CPUID_SIGSEGV +and the +.I no_new_privs +bit is set on this thread. +.TP .B EFAULT -.I addr +.I arg2 points to an unmapped address or is outside the process address space. .TP .B EINVAL .I code is not a valid subcommand. .TP +.B EINVAL +.I code +is +.B ARCH_SET_CPUID +and +.I cpuid +faulting is not supported on this machine. +.TP .B EPERM -.I addr +.I arg2 is outside the process address space. .\" .SH AUTHOR .\" Man page written by Andi Kleen. .SH CONFORMING TO .BR arch_prctl () -is a Linux/x86-64 extension and should not be used in programs intended +is a Linux/x86 extension and should not be used in programs intended to be portable. .SH NOTES .BR arch_prctl () -is supported only on Linux/x86-64 for 64-bit programs currently. +is supported only on Linux/x86 currently. The 64-bit base changes when a new 32-bit segment selector is loaded. -- 2.7.4
[RESEND][PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Signed-off-by: Kyle Huey --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 4 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 81 +++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 223 ++ 6 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83908d5..4aebec2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..ec93976 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0f857c3..5fc8e9d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val) return 0; } +static void switch_cpuid_faulting(bool on) +{ + if (on) + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); + else + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(true); + preempt_enable(); +} + +static void enable_cpuid(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(false); + preempt_enable(); +} + +int get_cpuid_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOCPUID)) + val = ARCH_CPUID_SIGSEGV; + else + val = ARCH_CPUID_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_cpu
[RESEND][PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
Xen advertises the underlying support for CPUID faulting but not does pass through writes to the relevant MSR, nor does it virtualize it, so it does not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 14 ++ arch/x86/xen/enlighten.c | 3 +++ 4 files changed, 19 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..83908d5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define CPUID_FAULTING_SUPPORT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..d502da1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,17 @@ enum cpuid_regs { CR_EBX }; +static int supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && + (lo & CPUID_FAULTING_SUPPORT)) + return 1; + else + return 0; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b86ebb1..2c47f0c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) #endif val &= ~X2APIC_ENABLE; break; + case MSR_PLATFORM_INFO: + val &= ~CPUID_FAULTING_SUPPORT; + break; } return val; } -- 2.7.4
[RESEND][PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
Xen advertises the underlying support for CPUID faulting but not does pass through writes to the relevant MSR, nor does it virtualize it, so it does not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. Signed-off-by: Kyle Huey --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 14 ++ arch/x86/xen/enlighten.c | 3 +++ 4 files changed, 19 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..83908d5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define CPUID_FAULTING_SUPPORT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..d502da1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,17 @@ enum cpuid_regs { CR_EBX }; +static int supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && + (lo & CPUID_FAULTING_SUPPORT)) + return 1; + else + return 0; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b86ebb1..2c47f0c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) #endif val &= ~X2APIC_ENABLE; break; + case MSR_PLATFORM_INFO: + val &= ~CPUID_FAULTING_SUPPORT; + break; } return val; } -- 2.7.4
[RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process.c | 80 ++ arch/x86/kernel/process_64.c | 66 3 files changed, 81 insertions(+), 66 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..3b6965b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..0f857c3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + int ret = 0; + int doit = task == current; + int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32); + int cpu; + + switch (code) { +#ifdef CONFIG_X86_64 + case ARCH_SET_GS: + if (is_32) + return -EINVAL; + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.gsindex = 0; + task->thread.gsbase = arg2; + if (doit) { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_SET_FS: + if (is_32) + return -EINVAL; + /* Not strictly needed for fs, but do it for symmetry + with gs */ + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.fsindex = 0; + task->thread.fsbase = arg2; + if (doit) { + /* set the selector to 0 to not confuse __switch_to */ + loadsegment(fs, 0); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_GET_FS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_FS_BASE, base); + else + base = task->thread.fsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } + case ARCH_GET_GS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } +#endif + default: + ret = -EINVAL; + break; + } + + return ret; +} + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..e8c6302 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -524,72 +524,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; - int doit = task == current; - int cpu; - - switch (code) { - case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) - return -EPERM; - cpu = get_cpu(); - task->thread.gsindex = 0; - task->thread.gsbase = addr; - if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); - } - put_cpu(); - break; - case ARCH_SET_FS: - /* Not strictly needed for fs, but do it for symmetry - with gs */ - if (addr >= TASK_SIZE_MAX) -
[RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
Signed-off-by: Kyle Huey --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process.c | 80 ++ arch/x86/kernel/process_64.c | 66 3 files changed, 81 insertions(+), 66 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..3b6965b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..0f857c3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + int ret = 0; + int doit = task == current; + int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32); + int cpu; + + switch (code) { +#ifdef CONFIG_X86_64 + case ARCH_SET_GS: + if (is_32) + return -EINVAL; + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.gsindex = 0; + task->thread.gsbase = arg2; + if (doit) { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_SET_FS: + if (is_32) + return -EINVAL; + /* Not strictly needed for fs, but do it for symmetry + with gs */ + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.fsindex = 0; + task->thread.fsbase = arg2; + if (doit) { + /* set the selector to 0 to not confuse __switch_to */ + loadsegment(fs, 0); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_GET_FS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_FS_BASE, base); + else + base = task->thread.fsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } + case ARCH_GET_GS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } +#endif + default: + ret = -EINVAL; + break; + } + + return ret; +} + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..e8c6302 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -524,72 +524,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; - int doit = task == current; - int cpu; - - switch (code) { - case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) - return -EPERM; - cpu = get_cpu(); - task->thread.gsindex = 0; - task->thread.gsbase = addr; - if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); - } - put_cpu(); - break; - case ARCH_SET_FS: - /* Not strictly needed for fs, but do it for symmetry - with gs */ - if (addr >= TASK_SIZE_MAX) - return -EPERM; -
[RESEND][PATCH v2] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
(Resending because I screwed up the cover email, sorry about that.) rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. 4 patches follow, the first 3 to the kernel, and the final patch to man-pages. The following changes have been made since v1: Suggested by Borislav Petkov: - Uses arch_prctl instead of prctl. - Uses rdmsr_safe. - Added sample man-pages patch. - Various functions are renamed, style fixes. Suggested by Andy Lutomirski: - Added a cpufeature bit to show up in /proc/cpuinfo. - Added sane behavior in Xen, by masking away the MSR_PLATFORM_INFO bit showing support for this feature for now. - Added a selftest, clarifying the bit is preserved on fork/exec. The following issues were raised and are not addressed: Use of cpuid within interrupt handlers: as Linus pointed out, CPUID only faults at cpl>0, so this is not a concern. Use a static_key instead of a TIF: I don't believe this solves anything. There are currently 8 free TIF bits (after this patch), and it's always possible to move this (or others) later if they are needed. Even if we were to use a static_key we would still need to maintain state about which tasks are subject to CPUID faulting and which are not somewhere else.
[RESEND][PATCH v2] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
(Resending because I screwed up the cover email, sorry about that.) rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. 4 patches follow, the first 3 to the kernel, and the final patch to man-pages. The following changes have been made since v1: Suggested by Borislav Petkov: - Uses arch_prctl instead of prctl. - Uses rdmsr_safe. - Added sample man-pages patch. - Various functions are renamed, style fixes. Suggested by Andy Lutomirski: - Added a cpufeature bit to show up in /proc/cpuinfo. - Added sane behavior in Xen, by masking away the MSR_PLATFORM_INFO bit showing support for this feature for now. - Added a selftest, clarifying the bit is preserved on fork/exec. The following issues were raised and are not addressed: Use of cpuid within interrupt handlers: as Linus pointed out, CPUID only faults at cpl>0, so this is not a concern. Use a static_key instead of a TIF: I don't believe this solves anything. There are currently 8 free TIF bits (after this patch), and it's always possible to move this (or others) later if they are needed. Even if we were to use a static_key we would still need to maintain state about which tasks are subject to CPUID faulting and which are not somewhere else.
[PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 4 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 81 +++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 223 ++ 6 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83908d5..4aebec2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..ec93976 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0f857c3..5fc8e9d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val) return 0; } +static void switch_cpuid_faulting(bool on) +{ + if (on) + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); + else + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(true); + preempt_enable(); +} + +static void enable_cpuid(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(false); + preempt_enable(); +} + +int get_cpuid_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOCPUID)) + val = ARCH_CPUID_SIGSEGV; + else + val = ARCH_CPUID_ENABLE; + + return put_user(val, (unsigned in
[PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf Signed-off-by: Kyle Huey --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/thread_info.h| 4 +- arch/x86/include/uapi/asm/prctl.h | 6 + arch/x86/kernel/process.c | 81 +++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/cpuid-fault.c | 223 ++ 6 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/x86/cpuid-fault.c diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83908d5..4aebec2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -53,6 +53,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..ec93976 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032..c087e55 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +/* Get/set the process' ability to use the CPUID instruction */ +#define ARCH_GET_CPUID 0x1005 +#define ARCH_SET_CPUID 0x1006 +# define ARCH_CPUID_ENABLE 1 /* allow the use of the CPUID instruction */ +# define ARCH_CPUID_SIGSEGV2 /* throw a SIGSEGV instead of reading the CPUID */ + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0f857c3..5fc8e9d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val) return 0; } +static void switch_cpuid_faulting(bool on) +{ + if (on) + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); + else + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(true); + preempt_enable(); +} + +static void enable_cpuid(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + switch_cpuid_faulting(false); + preempt_enable(); +} + +int get_cpuid_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOCPUID)) + val = ARCH_CPUID_SIGSEGV; + else + val = ARCH_CPUID_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_cpu
[PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process.c | 80 ++ arch/x86/kernel/process_64.c | 66 3 files changed, 81 insertions(+), 66 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..3b6965b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..0f857c3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + int ret = 0; + int doit = task == current; + int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32); + int cpu; + + switch (code) { +#ifdef CONFIG_X86_64 + case ARCH_SET_GS: + if (is_32) + return -EINVAL; + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.gsindex = 0; + task->thread.gsbase = arg2; + if (doit) { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_SET_FS: + if (is_32) + return -EINVAL; + /* Not strictly needed for fs, but do it for symmetry + with gs */ + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.fsindex = 0; + task->thread.fsbase = arg2; + if (doit) { + /* set the selector to 0 to not confuse __switch_to */ + loadsegment(fs, 0); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_GET_FS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_FS_BASE, base); + else + base = task->thread.fsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } + case ARCH_GET_GS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } +#endif + default: + ret = -EINVAL; + break; + } + + return ret; +} + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..e8c6302 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -524,72 +524,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; - int doit = task == current; - int cpu; - - switch (code) { - case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) - return -EPERM; - cpu = get_cpu(); - task->thread.gsindex = 0; - task->thread.gsbase = addr; - if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); - } - put_cpu(); - break; - case ARCH_SET_FS: - /* Not strictly needed for fs, but do it for symmetry - with gs */ - if (addr >= TASK_SIZE_MAX) -
[PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
Xen advertises the underlying support for CPUID faulting but not does pass through writes to the relevant MSR, nor does it virtualize it, so it does not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 14 ++ arch/x86/xen/enlighten.c | 3 +++ 4 files changed, 19 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..83908d5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define CPUID_FAULTING_SUPPORT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..d502da1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,17 @@ enum cpuid_regs { CR_EBX }; +static int supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && + (lo & CPUID_FAULTING_SUPPORT)) + return 1; + else + return 0; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b86ebb1..2c47f0c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) #endif val &= ~X2APIC_ENABLE; break; + case MSR_PLATFORM_INFO: + val &= ~CPUID_FAULTING_SUPPORT; + break; } return val; } -- 2.7.4
[PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
Signed-off-by: Kyle Huey --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/kernel/process.c | 80 ++ arch/x86/kernel/process_64.c | 66 3 files changed, 81 insertions(+), 66 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f848572..3b6965b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -386,3 +386,4 @@ 377i386copy_file_range sys_copy_file_range 378i386preadv2 sys_preadv2 compat_sys_preadv2 379i386pwritev2sys_pwritev2 compat_sys_pwritev2 +380i386arch_prctl sys_arch_prctl diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..0f857c3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } + +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2) +{ + int ret = 0; + int doit = task == current; + int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32); + int cpu; + + switch (code) { +#ifdef CONFIG_X86_64 + case ARCH_SET_GS: + if (is_32) + return -EINVAL; + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.gsindex = 0; + task->thread.gsbase = arg2; + if (doit) { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_SET_FS: + if (is_32) + return -EINVAL; + /* Not strictly needed for fs, but do it for symmetry + with gs */ + if (arg2 >= TASK_SIZE_MAX) + return -EPERM; + cpu = get_cpu(); + task->thread.fsindex = 0; + task->thread.fsbase = arg2; + if (doit) { + /* set the selector to 0 to not confuse __switch_to */ + loadsegment(fs, 0); + ret = wrmsrl_safe(MSR_FS_BASE, arg2); + } + put_cpu(); + break; + case ARCH_GET_FS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_FS_BASE, base); + else + base = task->thread.fsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } + case ARCH_GET_GS: { + unsigned long base; + + if (is_32) + return -EINVAL; + if (doit) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gsbase; + ret = put_user(base, (unsigned long __user *)arg2); + break; + } +#endif + default: + ret = -EINVAL; + break; + } + + return ret; +} + +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2) +{ + return do_arch_prctl(current, code, arg2); +} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8..e8c6302 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -524,72 +524,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; - int doit = task == current; - int cpu; - - switch (code) { - case ARCH_SET_GS: - if (addr >= TASK_SIZE_MAX) - return -EPERM; - cpu = get_cpu(); - task->thread.gsindex = 0; - task->thread.gsbase = addr; - if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); - } - put_cpu(); - break; - case ARCH_SET_FS: - /* Not strictly needed for fs, but do it for symmetry - with gs */ - if (addr >= TASK_SIZE_MAX) - return -EPERM; -
[PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
Xen advertises the underlying support for CPUID faulting but not does pass through writes to the relevant MSR, nor does it virtualize it, so it does not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO. Signed-off-by: Kyle Huey --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c| 14 ++ arch/x86/xen/enlighten.c | 3 +++ 4 files changed, 19 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308..78b9d06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -190,6 +190,7 @@ #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..83908d5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,7 @@ #define MSR_IA32_PERFCTR1 0x00c2 #define MSR_FSB_FREQ 0x00cd #define MSR_PLATFORM_INFO 0x00ce +#define CPUID_FAULTING_SUPPORT (1UL << 31) #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..d502da1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,6 +24,17 @@ enum cpuid_regs { CR_EBX }; +static int supports_cpuid_faulting(void) +{ + unsigned int lo, hi; + + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 && + (lo & CPUID_FAULTING_SUPPORT)) + return 1; + else + return 0; +} + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } + + if (supports_cpuid_faulting()) + set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b86ebb1..2c47f0c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) #endif val &= ~X2APIC_ENABLE; break; + case MSR_PLATFORM_INFO: + val &= ~CPUID_FAULTING_SUPPORT; + break; } return val; } -- 2.7.4
Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
On Mon, Sep 12, 2016 at 9:56 AM, Andy Lutomirskiwrote: > You should explicitly check that, if the > feature is set under Xen PV, then the MSR actually works as > advertised. This may require talking to the Xen folks to make sure > you're testing the right configuration. This is interesting. When running under Xen PV the kernel is allowed to read the real value of MSR_PLATFORM_INFO and see that CPUID faulting is supported. But as you suggested, writing to MSR_MISC_FEATURES_ENABLES doesn't actually enable CPUID faulting, at least not in any way that works. It's not obvious to me how to test this, because when this feature works, CPUID only faults in userspace, not in the kernel. Is there existing code somewhere that runs tests like this in userspace? - Kyle
Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
On Mon, Sep 12, 2016 at 9:56 AM, Andy Lutomirski wrote: > You should explicitly check that, if the > feature is set under Xen PV, then the MSR actually works as > advertised. This may require talking to the Xen folks to make sure > you're testing the right configuration. This is interesting. When running under Xen PV the kernel is allowed to read the real value of MSR_PLATFORM_INFO and see that CPUID faulting is supported. But as you suggested, writing to MSR_MISC_FEATURES_ENABLES doesn't actually enable CPUID faulting, at least not in any way that works. It's not obvious to me how to test this, because when this feature works, CPUID only faults in userspace, not in the kernel. Is there existing code somewhere that runs tests like this in userspace? - Kyle
Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
On Mon, Sep 12, 2016 at 7:15 AM, Kyle Huey <m...@kylehuey.com> wrote: > On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov <b...@suse.de> wrote: >> On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote: >>> @@ -2162,6 +2168,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, >>> arg2, unsigned long, arg3, >>> case PR_SET_TSC: >>> error = SET_TSC_CTL(arg2); >>> break; >>> + case PR_GET_CPUID: >>> + error = GET_CPUID_CTL(arg2); >>> + break; >>> + case PR_SET_CPUID: >>> + error = SET_CPUID_CTL(arg2); >>> + break; >>> case PR_TASK_PERF_EVENTS_DISABLE: >>> error = perf_event_task_disable(); >>> break; >> >> This whole fun should be in arch_prctl() as it is arch-specific. > > Yeah, I was debating about that, and did it this way because of > PR_SET_TSC. Will fix. arch_prctl is not yet exposed on 32 bit x86, so we'll have to add that as well to do this. - Kyle
Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
On Mon, Sep 12, 2016 at 7:15 AM, Kyle Huey wrote: > On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov wrote: >> On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote: >>> @@ -2162,6 +2168,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, >>> arg2, unsigned long, arg3, >>> case PR_SET_TSC: >>> error = SET_TSC_CTL(arg2); >>> break; >>> + case PR_GET_CPUID: >>> + error = GET_CPUID_CTL(arg2); >>> + break; >>> + case PR_SET_CPUID: >>> + error = SET_CPUID_CTL(arg2); >>> + break; >>> case PR_TASK_PERF_EVENTS_DISABLE: >>> error = perf_event_task_disable(); >>> break; >> >> This whole fun should be in arch_prctl() as it is arch-specific. > > Yeah, I was debating about that, and did it this way because of > PR_SET_TSC. Will fix. arch_prctl is not yet exposed on 32 bit x86, so we'll have to add that as well to do this. - Kyle
Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
Thanks for the review! On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov <b...@suse.de> wrote: > On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote: >> rr (http://rr-project.org/), a userspace record-and-replay reverse- >> execution debugger, would like to trap and emulate the CPUID instruction. >> This would allow us to a) mask away certain hardware features that rr does >> not support (e.g. RDRAND) and b) enable trace portability across machines >> by providing constant results. >> >> Intel supports faulting on the CPUID instruction in newer processors. Bit >> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is >> documented in detail in Section 2.3.2 of >> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf. >> >> I would like to thank Trevor Saunders <tbsau...@tbsaunde.org> for drafting >> an earlier version of this patch. >> >> Signed-off-by Kyle Huey <kh...@kylehuey.com> >> --- >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/include/asm/processor.h | 7 >> arch/x86/include/asm/thread_info.h | 4 +- >> arch/x86/kernel/process.c | 79 >> ++ >> include/uapi/linux/prctl.h | 6 +++ >> kernel/sys.c | 12 ++ >> 6 files changed, 108 insertions(+), 1 deletion(-) > > ... > >> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c >> index 62c0b0e..a189516 100644 >> --- a/arch/x86/kernel/process.c >> +++ b/arch/x86/kernel/process.c >> @@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val) >> return 0; >> } >> >> +static void hard_disable_CPUID(void) > > Why hard_disable? I don't see any soft_disable. Copied from PR_SET_TSC. Would you prefer something like disable_cpuid/disable_cpuid_and_set_flag for hard_disable_CPUID/disable_CPUID? > Also, I can't say that I like all that screaming "CPUID" :-) > > disable_cpuid() looks just fine to me too. Ok. >> +{ >> + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); >> +} >> + >> +static void disable_CPUID(void) >> +{ >> + preempt_disable(); >> + if (!test_and_set_thread_flag(TIF_NOCPUID)) >> + /* >> + * Must flip the CPU state synchronously with >> + * TIF_NOCPUID in the current running context. >> + */ >> + hard_disable_CPUID(); >> + preempt_enable(); >> +} >> + >> +static void hard_enable_CPUID(void) >> +{ >> + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); >> +} >> + >> +static void enable_CPUID(void) >> +{ >> + preempt_disable(); >> + if (test_and_clear_thread_flag(TIF_NOCPUID)) >> + /* >> + * Must flip the CPU state synchronously with >> + * TIF_NOCPUID in the current running context. >> + */ >> + hard_enable_CPUID(); >> + preempt_enable(); >> +} >> + >> +static int supports_CPUID_faulting(void) >> +{ >> + unsigned int lo, hi; >> + >> + rdmsr(MSR_PLATFORM_INFO, lo, hi); > > rdmsr_safe() Ok. >> + if ((lo & (1 << 31))) >> + return 1; >> + else >> + return 0; >> +} >> >> +int get_cpuid_mode(unsigned long adr) >> +{ >> + unsigned int val; >> + >> + if (test_thread_flag(TIF_NOCPUID)) >> + val = PR_CPUID_SIGSEGV; >> + else >> + val = PR_CPUID_ENABLE; >> + >> + return put_user(val, (unsigned int __user *)adr); >> +} >> + >> +int set_cpuid_mode(unsigned int val) >> +{ >> + // Only disable/enable_CPUID() if it is supported on this hardware. > > Use /* ... */ for comments in the kernel. Ok. >> + if (val == PR_CPUID_SIGSEGV && supports_CPUID_faulting()) >> + disable_CPUID(); >> + else if (val == PR_CPUID_ENABLE && supports_CPUID_faulting()) >> + enable_CPUID(); >> + else >> + return -EINVAL; >> + >> + return 0; >> +} >> + >> void __switch_to_xtra(struct task_struct *prev_p, struct task_struct >> *next_p, >> struct tss_struct *tss) >> { >> @@ -210,6 +280,15 @@ void __switch_to_xtra(struct task_struct *prev_p, >> struct task_struct *next_p, >> update_debugctlmsr(debugctl); >>
Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
Thanks for the review! On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov wrote: > On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote: >> rr (http://rr-project.org/), a userspace record-and-replay reverse- >> execution debugger, would like to trap and emulate the CPUID instruction. >> This would allow us to a) mask away certain hardware features that rr does >> not support (e.g. RDRAND) and b) enable trace portability across machines >> by providing constant results. >> >> Intel supports faulting on the CPUID instruction in newer processors. Bit >> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is >> documented in detail in Section 2.3.2 of >> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf. >> >> I would like to thank Trevor Saunders for drafting >> an earlier version of this patch. >> >> Signed-off-by Kyle Huey >> --- >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/include/asm/processor.h | 7 >> arch/x86/include/asm/thread_info.h | 4 +- >> arch/x86/kernel/process.c | 79 >> ++ >> include/uapi/linux/prctl.h | 6 +++ >> kernel/sys.c | 12 ++ >> 6 files changed, 108 insertions(+), 1 deletion(-) > > ... > >> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c >> index 62c0b0e..a189516 100644 >> --- a/arch/x86/kernel/process.c >> +++ b/arch/x86/kernel/process.c >> @@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val) >> return 0; >> } >> >> +static void hard_disable_CPUID(void) > > Why hard_disable? I don't see any soft_disable. Copied from PR_SET_TSC. Would you prefer something like disable_cpuid/disable_cpuid_and_set_flag for hard_disable_CPUID/disable_CPUID? > Also, I can't say that I like all that screaming "CPUID" :-) > > disable_cpuid() looks just fine to me too. Ok. >> +{ >> + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); >> +} >> + >> +static void disable_CPUID(void) >> +{ >> + preempt_disable(); >> + if (!test_and_set_thread_flag(TIF_NOCPUID)) >> + /* >> + * Must flip the CPU state synchronously with >> + * TIF_NOCPUID in the current running context. >> + */ >> + hard_disable_CPUID(); >> + preempt_enable(); >> +} >> + >> +static void hard_enable_CPUID(void) >> +{ >> + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); >> +} >> + >> +static void enable_CPUID(void) >> +{ >> + preempt_disable(); >> + if (test_and_clear_thread_flag(TIF_NOCPUID)) >> + /* >> + * Must flip the CPU state synchronously with >> + * TIF_NOCPUID in the current running context. >> + */ >> + hard_enable_CPUID(); >> + preempt_enable(); >> +} >> + >> +static int supports_CPUID_faulting(void) >> +{ >> + unsigned int lo, hi; >> + >> + rdmsr(MSR_PLATFORM_INFO, lo, hi); > > rdmsr_safe() Ok. >> + if ((lo & (1 << 31))) >> + return 1; >> + else >> + return 0; >> +} >> >> +int get_cpuid_mode(unsigned long adr) >> +{ >> + unsigned int val; >> + >> + if (test_thread_flag(TIF_NOCPUID)) >> + val = PR_CPUID_SIGSEGV; >> + else >> + val = PR_CPUID_ENABLE; >> + >> + return put_user(val, (unsigned int __user *)adr); >> +} >> + >> +int set_cpuid_mode(unsigned int val) >> +{ >> + // Only disable/enable_CPUID() if it is supported on this hardware. > > Use /* ... */ for comments in the kernel. Ok. >> + if (val == PR_CPUID_SIGSEGV && supports_CPUID_faulting()) >> + disable_CPUID(); >> + else if (val == PR_CPUID_ENABLE && supports_CPUID_faulting()) >> + enable_CPUID(); >> + else >> + return -EINVAL; >> + >> + return 0; >> +} >> + >> void __switch_to_xtra(struct task_struct *prev_p, struct task_struct >> *next_p, >> struct tss_struct *tss) >> { >> @@ -210,6 +280,15 @@ void __switch_to_xtra(struct task_struct *prev_p, >> struct task_struct *next_p, >> update_debugctlmsr(debugctl); >> } >> >> + if (test_tsk_thread_flag(prev_p, TIF_NOCPUID) ^ &g
[PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf. I would like to thank Trevor Saunders <tbsau...@tbsaunde.org> for drafting an earlier version of this patch. Signed-off-by Kyle Huey <kh...@kylehuey.com> --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/processor.h | 7 arch/x86/include/asm/thread_info.h | 4 +- arch/x86/kernel/process.c | 79 ++ include/uapi/linux/prctl.h | 6 +++ kernel/sys.c | 12 ++ 6 files changed, 108 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..28b0736 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -52,6 +52,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def95..661c4c1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -805,6 +805,13 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); +/* Get/set a process' ability to use the CPUID instruction */ +#define GET_CPUID_CTL(adr) get_cpuid_mode((adr)) +#define SET_CPUID_CTL(val) set_cpuid_mode((val)) + +extern int get_cpuid_mode(unsigned long adr); +extern int set_cpuid_mode(unsigned int val); + /* Register/unregister a process' MPX related resource */ #define MPX_ENABLE_MANAGEMENT()mpx_enable_management() #define MPX_DISABLE_MANAGEMENT() mpx_disable_management() diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..ec93976 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..a189516 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val) return 0; } +static void hard_disable_CPUID(void) +{ + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_CPUID(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + hard_disable_CPUID(); + preempt_enable(); +} + +static void hard_enable_CPUID(void) +{ + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void enable_CPUID(void) +{ + preempt_disable
[PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.
rr (http://rr-project.org/), a userspace record-and-replay reverse- execution debugger, would like to trap and emulate the CPUID instruction. This would allow us to a) mask away certain hardware features that rr does not support (e.g. RDRAND) and b) enable trace portability across machines by providing constant results. Intel supports faulting on the CPUID instruction in newer processors. Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is documented in detail in Section 2.3.2 of http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf. I would like to thank Trevor Saunders for drafting an earlier version of this patch. Signed-off-by Kyle Huey --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/processor.h | 7 arch/x86/include/asm/thread_info.h | 4 +- arch/x86/kernel/process.c | 79 ++ include/uapi/linux/prctl.h | 6 +++ kernel/sys.c | 12 ++ 6 files changed, 108 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..28b0736 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -52,6 +52,7 @@ #define MSR_MTRRcap0x00fe #define MSR_IA32_BBL_CR_CTL0x0119 #define MSR_IA32_BBL_CR_CTL3 0x011e +#define MSR_MISC_FEATURES_ENABLES 0x0140 #define MSR_IA32_SYSENTER_CS 0x0174 #define MSR_IA32_SYSENTER_ESP 0x0175 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def95..661c4c1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -805,6 +805,13 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); +/* Get/set a process' ability to use the CPUID instruction */ +#define GET_CPUID_CTL(adr) get_cpuid_mode((adr)) +#define SET_CPUID_CTL(val) set_cpuid_mode((val)) + +extern int get_cpuid_mode(unsigned long adr); +extern int set_cpuid_mode(unsigned int val); + /* Register/unregister a process' MPX related resource */ #define MPX_ENABLE_MANAGEMENT()mpx_enable_management() #define MPX_DISABLE_MANAGEMENT() mpx_disable_management() diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8..ec93976 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_SECCOMP8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_NOCPUID15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE(1 << TIF_UPROBE) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -146,7 +148,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..a189516 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val) return 0; } +static void hard_disable_CPUID(void) +{ + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void disable_CPUID(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) + /* +* Must flip the CPU state synchronously with +* TIF_NOCPUID in the current running context. +*/ + hard_disable_CPUID(); + preempt_enable(); +} + +static void hard_enable_CPUID(void) +{ + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0); +} + +static void enable_CPUID(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) + /* +
Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals
On Thu, Aug 11, 2016 at 11:18 AM, Kees Cookwrote: > On Thu, Aug 11, 2016 at 8:12 AM, Oleg Nesterov wrote: >> On 08/10, Kees Cook wrote: >>> >>> This fixes a ptrace vs fatal pending signals bug as manifested in seccomp >>> now that ptrace was reordered to happen after ptrace. The short version is >>> that seccomp should not attempt to call do_exit() while fatal signals are >>> pending under a tracer. This was needlessly paranoid. Instead, the syscall >>> can just be skipped and normal signal handling, tracer notification, and >>> process death can happen. >> >> ACK. >> >> I think this change is fine in any case, but... >> >>> The bug happens because when __seccomp_filter() detects >>> fatal_signal_pending(), it calls do_exit() without dequeuing the fatal >>> signal. When do_exit() sends the PTRACE_EVENT_EXIT >> >> I _never_ understood what PTRACE_EVENT_EXIT should actually do. I mean, >> when it should actually stop. This was never defined. > > Yeah, agreed. I spent some time reading through what should happen to > __TASK_TRACED during exit and my head spun. :) > >>> notification and >>> that task is descheduled, __schedule() notices that there is a fatal >>> signal pending and changes its state from TASK_TRACED to TASK_RUNNING. >> >> And this can happen anyway, with or without this change, with or without >> seccomp. Because another fatal signal can be pending. So PTRACE_EVENT_EXIT >> actually depends on /dev/random. >> >> Perhaps we should finally define what it should do. Say, it should only >> stop if SIGKILL was sent "implicitely" by exit/exec. But as for exec, >> there are more (off-topic) complications, not sure we actually want this... >> >> Nevermind, the main problem is that _any_ change in this area can break >> something. This code is sooo old. >> >> But let me repeat, I think this change is fine anyway. >> >> Acked-by: Oleg Nesterov > > Awesome, thanks! Hi folks, Can't help but notice this didn't make it into rc3. Not sure if it's bubbling up somewhere I can't see, but we'd really like this to get into 4.8 so we don't have to work around the regression. Thanks! - Kyle
Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals
On Thu, Aug 11, 2016 at 11:18 AM, Kees Cook wrote: > On Thu, Aug 11, 2016 at 8:12 AM, Oleg Nesterov wrote: >> On 08/10, Kees Cook wrote: >>> >>> This fixes a ptrace vs fatal pending signals bug as manifested in seccomp >>> now that ptrace was reordered to happen after ptrace. The short version is >>> that seccomp should not attempt to call do_exit() while fatal signals are >>> pending under a tracer. This was needlessly paranoid. Instead, the syscall >>> can just be skipped and normal signal handling, tracer notification, and >>> process death can happen. >> >> ACK. >> >> I think this change is fine in any case, but... >> >>> The bug happens because when __seccomp_filter() detects >>> fatal_signal_pending(), it calls do_exit() without dequeuing the fatal >>> signal. When do_exit() sends the PTRACE_EVENT_EXIT >> >> I _never_ understood what PTRACE_EVENT_EXIT should actually do. I mean, >> when it should actually stop. This was never defined. > > Yeah, agreed. I spent some time reading through what should happen to > __TASK_TRACED during exit and my head spun. :) > >>> notification and >>> that task is descheduled, __schedule() notices that there is a fatal >>> signal pending and changes its state from TASK_TRACED to TASK_RUNNING. >> >> And this can happen anyway, with or without this change, with or without >> seccomp. Because another fatal signal can be pending. So PTRACE_EVENT_EXIT >> actually depends on /dev/random. >> >> Perhaps we should finally define what it should do. Say, it should only >> stop if SIGKILL was sent "implicitely" by exit/exec. But as for exec, >> there are more (off-topic) complications, not sure we actually want this... >> >> Nevermind, the main problem is that _any_ change in this area can break >> something. This code is sooo old. >> >> But let me repeat, I think this change is fine anyway. >> >> Acked-by: Oleg Nesterov > > Awesome, thanks! Hi folks, Can't help but notice this didn't make it into rc3. Not sure if it's bubbling up somewhere I can't see, but we'd really like this to get into 4.8 so we don't have to work around the regression. Thanks! - Kyle
Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals
On Wed, Aug 10, 2016 at 4:37 PM, Kees Cook <keesc...@chromium.org> wrote: > This fixes a ptrace vs fatal pending signals bug as manifested in seccomp > now that ptrace was reordered to happen after ptrace. The short version is > that seccomp should not attempt to call do_exit() while fatal signals are > pending under a tracer. This was needlessly paranoid. Instead, the syscall > can just be skipped and normal signal handling, tracer notification, and > process death can happen. > > Slightly edited original bug report: > > If a tracee task is in a PTRACE_EVENT_SECCOMP trap, or has been resumed > after such a trap but not yet been scheduled, and another task in the > thread-group calls exit_group(), then the tracee task exits without the > ptracer receiving a PTRACE_EVENT_EXIT notification. Test case here: > https://gist.github.com/khuey/3c43ac247c72cef8c956ca73281c9be7 > > The bug happens because when __seccomp_filter() detects > fatal_signal_pending(), it calls do_exit() without dequeuing the fatal > signal. When do_exit() sends the PTRACE_EVENT_EXIT notification and > that task is descheduled, __schedule() notices that there is a fatal > signal pending and changes its state from TASK_TRACED to TASK_RUNNING. > That prevents the ptracer's waitpid() from returning the ptrace event. > A more detailed analysis is here: > https://github.com/mozilla/rr/issues/1762#issuecomment-237396255. > > Reported-by: Robert O'Callahan <rob...@ocallahan.org> > Reported-by: Kyle Huey <kh...@kylehuey.com> > Fixes: 93e35efb8de4 ("x86/ptrace: run seccomp after ptrace") > Signed-off-by: Kees Cook <keesc...@chromium.org> > --- > kernel/seccomp.c | 12 > 1 file changed, 8 insertions(+), 4 deletions(-) > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > index ef6c6c3f9d8a..0db7c8a2afe2 100644 > --- a/kernel/seccomp.c > +++ b/kernel/seccomp.c > @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const > struct seccomp_data *sd, > ptrace_event(PTRACE_EVENT_SECCOMP, data); > /* > * The delivery of a fatal signal during event > -* notification may silently skip tracer notification. > -* Terminating the task now avoids executing a system > -* call that may not be intended. > +* notification may silently skip tracer notification, > +* which could leave us with a potentially unmodified > +* syscall that the tracer would have liked to have > +* changed. Since the process is about to die, we just > +* force the syscall to be skipped and let the signal > +* kill the process and correctly handle any tracer exit > +* notifications. > */ > if (fatal_signal_pending(current)) > - do_exit(SIGSYS); > + goto skip; > /* Check if the tracer forced the syscall to be skipped. */ > this_syscall = syscall_get_nr(current, task_pt_regs(current)); > if (this_syscall < 0) > -- > 2.7.4 > > > -- > Kees Cook > Nexus Security This patch also fixes rr as well. - Kyle
Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals
On Wed, Aug 10, 2016 at 4:37 PM, Kees Cook wrote: > This fixes a ptrace vs fatal pending signals bug as manifested in seccomp > now that ptrace was reordered to happen after ptrace. The short version is > that seccomp should not attempt to call do_exit() while fatal signals are > pending under a tracer. This was needlessly paranoid. Instead, the syscall > can just be skipped and normal signal handling, tracer notification, and > process death can happen. > > Slightly edited original bug report: > > If a tracee task is in a PTRACE_EVENT_SECCOMP trap, or has been resumed > after such a trap but not yet been scheduled, and another task in the > thread-group calls exit_group(), then the tracee task exits without the > ptracer receiving a PTRACE_EVENT_EXIT notification. Test case here: > https://gist.github.com/khuey/3c43ac247c72cef8c956ca73281c9be7 > > The bug happens because when __seccomp_filter() detects > fatal_signal_pending(), it calls do_exit() without dequeuing the fatal > signal. When do_exit() sends the PTRACE_EVENT_EXIT notification and > that task is descheduled, __schedule() notices that there is a fatal > signal pending and changes its state from TASK_TRACED to TASK_RUNNING. > That prevents the ptracer's waitpid() from returning the ptrace event. > A more detailed analysis is here: > https://github.com/mozilla/rr/issues/1762#issuecomment-237396255. > > Reported-by: Robert O'Callahan > Reported-by: Kyle Huey > Fixes: 93e35efb8de4 ("x86/ptrace: run seccomp after ptrace") > Signed-off-by: Kees Cook > --- > kernel/seccomp.c | 12 > 1 file changed, 8 insertions(+), 4 deletions(-) > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > index ef6c6c3f9d8a..0db7c8a2afe2 100644 > --- a/kernel/seccomp.c > +++ b/kernel/seccomp.c > @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const > struct seccomp_data *sd, > ptrace_event(PTRACE_EVENT_SECCOMP, data); > /* > * The delivery of a fatal signal during event > -* notification may silently skip tracer notification. > -* Terminating the task now avoids executing a system > -* call that may not be intended. > +* notification may silently skip tracer notification, > +* which could leave us with a potentially unmodified > +* syscall that the tracer would have liked to have > +* changed. Since the process is about to die, we just > +* force the syscall to be skipped and let the signal > +* kill the process and correctly handle any tracer exit > +* notifications. > */ > if (fatal_signal_pending(current)) > - do_exit(SIGSYS); > + goto skip; > /* Check if the tracer forced the syscall to be skipped. */ > this_syscall = syscall_get_nr(current, task_pt_regs(current)); > if (this_syscall < 0) > -- > 2.7.4 > > > -- > Kees Cook > Nexus Security This patch also fixes rr as well. - Kyle
[PATCH] seccomp: suppress fatal signals that will never be delivered before seccomp forces an exit because of said signals
This fixes rr. It doesn't quite fix the provided testcase, because the testcase fails to wait on the tracee after awakening from the nanosleep. Instead the testcase immediately does a PTHREAD_CONT, discarding the PTHREAD_EVENT_EXIT. The slightly modified testcase at https://gist.github.com/khuey/3c43ac247c72cef8c956c does pass. I don't see any obvious way to dequeue only the fatal signal, so instead I dequeue them all. Since none of these signals will ever be delivered it shouldn't affect the executing task. Suggested-by: Robert O'Callahan <rob...@ocallahan.org> Signed-off-by: Kyle Huey <kh...@kylehuey.com> --- kernel/seccomp.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ef6c6c3..728074d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -609,8 +609,20 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, * Terminating the task now avoids executing a system * call that may not be intended. */ - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current)) { + /* +* Swallow the signals we will never deliver. +* If we do not do this, the PTRACE_EVENT_EXIT will +* be suppressed by those signals. +*/ + siginfo_t info; + + spin_lock_irq(>sighand->siglock); + while (dequeue_signal(current, >blocked, )); + spin_unlock_irq(>sighand->siglock); + do_exit(SIGSYS); + } /* Check if the tracer forced the syscall to be skipped. */ this_syscall = syscall_get_nr(current, task_pt_regs(current)); if (this_syscall < 0) -- 2.7.4
[PATCH] seccomp: suppress fatal signals that will never be delivered before seccomp forces an exit because of said signals
This fixes rr. It doesn't quite fix the provided testcase, because the testcase fails to wait on the tracee after awakening from the nanosleep. Instead the testcase immediately does a PTHREAD_CONT, discarding the PTHREAD_EVENT_EXIT. The slightly modified testcase at https://gist.github.com/khuey/3c43ac247c72cef8c956c does pass. I don't see any obvious way to dequeue only the fatal signal, so instead I dequeue them all. Since none of these signals will ever be delivered it shouldn't affect the executing task. Suggested-by: Robert O'Callahan Signed-off-by: Kyle Huey --- kernel/seccomp.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ef6c6c3..728074d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -609,8 +609,20 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, * Terminating the task now avoids executing a system * call that may not be intended. */ - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current)) { + /* +* Swallow the signals we will never deliver. +* If we do not do this, the PTRACE_EVENT_EXIT will +* be suppressed by those signals. +*/ + siginfo_t info; + + spin_lock_irq(>sighand->siglock); + while (dequeue_signal(current, >blocked, )); + spin_unlock_irq(>sighand->siglock); + do_exit(SIGSYS); + } /* Check if the tracer forced the syscall to be skipped. */ this_syscall = syscall_get_nr(current, task_pt_regs(current)); if (this_syscall < 0) -- 2.7.4
Re: [RESEND PATCH v3] ARM: tegra124: pmu support
On Sat, Jul 18, 2015 at 6:54 AM, Kyle Huey wrote: > On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding > wrote: >> On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote: >>> This patch modifies the device tree for tegra124 based devices to enable >>> the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM >>> DP-06905-001_v03p. This patch was tested on a Jetson TK1. >>> >>> Updated for proper ordering and to add interrupt-affinity values. >>> >>> Signed-off-by: Kyle Huey >>> --- >>> arch/arm/boot/dts/tegra124.dtsi | 17 + >>> 1 file changed, 13 insertions(+), 4 deletions(-) >> >> Is there any way to test this? What are the effects of adding this? > > Yes. This enables the ARM PMU driver for the Cortex A15, which allows > one to use hardware performance counters via the perf_event_open API. > For a simple test program, see > https://github.com/khuey/perf-counter-test/. Without this patch, the > perf_event_open syscall will fail. With this patch, the program will > print out the performance counter value for each iteration of the > loop. (IIRC on the A15 the branch counter was removed, so you may want > to replace 0xD with 0x8 which counts instructions executed if you want > to see a non-zero number there). You also will see a message about > the PMU in the kernel log at startup after applying this patch. > > I have also tested this extensively (including the interrupt features > of the PMU) on a more complex program. > >> Does it enable using perf for profiling? > > I have not tested it, but I believe you can use perf without this > patch if you do not use features that require hardware performance > counter support. This patch would enable those features. > >>> diff --git a/arch/arm/boot/dts/tegra124.dtsi >>> b/arch/arm/boot/dts/tegra124.dtsi >>> index 13cc7ca..de07d7e 100644 >>> --- a/arch/arm/boot/dts/tegra124.dtsi >>> +++ b/arch/arm/boot/dts/tegra124.dtsi >>> @@ -918,31 +918,40 @@ >>> #address-cells = <1>; >>> #size-cells = <0>; >>> >>> - cpu@0 { >>> + A15_0: cpu@0 { >>> device_type = "cpu"; >>> compatible = "arm,cortex-a15"; >>> reg = <0>; >>> }; >>> >>> - cpu@1 { >>> + A15_1: cpu@1 { >>> device_type = "cpu"; >>> compatible = "arm,cortex-a15"; >>> reg = <1>; >>> }; >>> >>> - cpu@2 { >>> + A15_2: cpu@2 { >>> device_type = "cpu"; >>> compatible = "arm,cortex-a15"; >>> reg = <2>; >>> }; >>> >>> - cpu@3 { >>> + A15_3: cpu@3 { >>> device_type = "cpu"; >>> compatible = "arm,cortex-a15"; >>> reg = <3>; >>> }; >>> }; >>> >>> + pmu { >>> + compatible = "arm,cortex-a15-pmu"; >>> + interrupts = , >>> + , >>> + , >>> + ; >>> + interrupt-affinity = <_0>, <_1>, <_2>, <_3>; >> >> These labels look somewhat artificial to me, perhaps we could do >> something like the following instead? >> >> interrupt-affinity = <&{/cpus/cpu@0}>, ...; >> >> That's slightly more obvious and avoids the need to "invent" labels for >> the CPUs. >> >> No need to respin, I can fix that up when applying if nobody objects to >> using the alternative notation. >> >> Thierry > > I have no objections. I was not aware that the device tree syntax > supported that. FWIW I cargo-culted my way to victory from > vexpress-v2p-ca9.dts here. > > - Kyle Anything else I can do to help move this along? - Kyle -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RESEND PATCH v3] ARM: tegra124: pmu support
On Sat, Jul 18, 2015 at 6:54 AM, Kyle Huey m...@kylehuey.com wrote: On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding thierry.red...@gmail.com wrote: On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote: This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Updated for proper ordering and to add interrupt-affinity values. Signed-off-by: Kyle Huey kh...@kylehuey.com --- arch/arm/boot/dts/tegra124.dtsi | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) Is there any way to test this? What are the effects of adding this? Yes. This enables the ARM PMU driver for the Cortex A15, which allows one to use hardware performance counters via the perf_event_open API. For a simple test program, see https://github.com/khuey/perf-counter-test/. Without this patch, the perf_event_open syscall will fail. With this patch, the program will print out the performance counter value for each iteration of the loop. (IIRC on the A15 the branch counter was removed, so you may want to replace 0xD with 0x8 which counts instructions executed if you want to see a non-zero number there). You also will see a message about the PMU in the kernel log at startup after applying this patch. I have also tested this extensively (including the interrupt features of the PMU) on a more complex program. Does it enable using perf for profiling? I have not tested it, but I believe you can use perf without this patch if you do not use features that require hardware performance counter support. This patch would enable those features. diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 13cc7ca..de07d7e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -918,31 +918,40 @@ #address-cells = 1; #size-cells = 0; - cpu@0 { + A15_0: cpu@0 { device_type = cpu; compatible = arm,cortex-a15; reg = 0; }; - cpu@1 { + A15_1: cpu@1 { device_type = cpu; compatible = arm,cortex-a15; reg = 1; }; - cpu@2 { + A15_2: cpu@2 { device_type = cpu; compatible = arm,cortex-a15; reg = 2; }; - cpu@3 { + A15_3: cpu@3 { device_type = cpu; compatible = arm,cortex-a15; reg = 3; }; }; + pmu { + compatible = arm,cortex-a15-pmu; + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH, + GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH, + GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH, + GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH; + interrupt-affinity = A15_0, A15_1, A15_2, A15_3; These labels look somewhat artificial to me, perhaps we could do something like the following instead? interrupt-affinity = {/cpus/cpu@0}, ...; That's slightly more obvious and avoids the need to invent labels for the CPUs. No need to respin, I can fix that up when applying if nobody objects to using the alternative notation. Thierry I have no objections. I was not aware that the device tree syntax supported that. FWIW I cargo-culted my way to victory from vexpress-v2p-ca9.dts here. - Kyle Anything else I can do to help move this along? - Kyle -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ARM: ptrace: Implement PTRACE_SYSEMU
Implement PTRACE_SYSEMU support on ARM. Currently this ptrace call is supported only on x86. This copies the x86 semantics for invoking ptrace hooks (the syscall entry hook is invoked, the exit hook is not). This patch also defines PTRACE_SYSEMU_SINGLESTEP because kernel/ptrace.c expects it to be present if PTRACE_SYSEMU is present. Attempting to use PTRACE_SYSEMU_SINGLESTEP will fail at runtime on ARM with EIO since there is no single stepping on ARM. Signed-off-by: Kyle Huey --- arch/arm/include/asm/thread_info.h | 8 ++-- arch/arm/include/uapi/asm/ptrace.h | 32 +--- arch/arm/kernel/ptrace.c | 9 +++-- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32ede..0e3ee19 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -137,7 +137,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, /* * thread information flags: * TIF_SYSCALL_TRACE - syscall trace active - * TIF_SYSCAL_AUDIT - syscall auditing active + * TIF_SYSCALL_AUDIT - syscall auditing active + * TIF_SYSCALL_EMU- syscall emulation active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -153,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP11 /* seccomp syscall filtering active */ #define TIF_NOHZ 12 /* in adaptive nohz mode */ +#define TIF_SYSCALL_EMU13 #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK20 @@ -165,11 +167,13 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_SYSCALL_EMU) /* * Change these and you break ASM code in entry-common.S diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h index 5af0ed1..2c5e4d7 100644 --- a/arch/arm/include/uapi/asm/ptrace.h +++ b/arch/arm/include/uapi/asm/ptrace.h @@ -12,25 +12,27 @@ #include -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 /* PTRACE_ATTACH is 16 */ /* PTRACE_DETACH is 17 */ -#define PTRACE_GETWMMXREGS 18 -#define PTRACE_SETWMMXREGS 19 +#define PTRACE_GETWMMXREGS 18 +#define PTRACE_SETWMMXREGS 19 /* 20 is unused */ -#define PTRACE_OLDSETOPTIONS 21 -#define PTRACE_GET_THREAD_AREA 22 -#define PTRACE_SET_SYSCALL 23 +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_GET_THREAD_AREA 22 +#define PTRACE_SET_SYSCALL 23 /* PTRACE_SYSCALL is 24 */ -#define PTRACE_GETCRUNCHREGS 25 -#define PTRACE_SETCRUNCHREGS 26 -#define PTRACE_GETVFPREGS 27 -#define PTRACE_SETVFPREGS 28 -#define PTRACE_GETHBPREGS 29 -#define PTRACE_SETHBPREGS 30 +#define PTRACE_GETCRUNCHREGS 25 +#define PTRACE_SETCRUNCHREGS 26 +#define PTRACE_GETVFPREGS 27 +#define PTRACE_SETVFPREGS 28 +#define PTRACE_GETHBPREGS 29 +#define PTRACE_SETHBPREGS 30 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 /* * PSR bits diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index ef9119f..c84058c 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -930,6 +930,8 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) { + int ret = 0; + current_thread_info()->syscall = scno; /* Do the secure computing check first; failures should be fast. */ @@ -941,7 +943,10 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) secure_computing_strict(scno); #endif - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_EMU)) + ret = -1; + + if (ret || t
[PATCH] ARM: ptrace: Implement PTRACE_SYSEMU
Implement PTRACE_SYSEMU support on ARM. Currently this ptrace call is supported only on x86. This copies the x86 semantics for invoking ptrace hooks (the syscall entry hook is invoked, the exit hook is not). This patch also defines PTRACE_SYSEMU_SINGLESTEP because kernel/ptrace.c expects it to be present if PTRACE_SYSEMU is present. Attempting to use PTRACE_SYSEMU_SINGLESTEP will fail at runtime on ARM with EIO since there is no single stepping on ARM. Signed-off-by: Kyle Huey kh...@kylehuey.com --- arch/arm/include/asm/thread_info.h | 8 ++-- arch/arm/include/uapi/asm/ptrace.h | 32 +--- arch/arm/kernel/ptrace.c | 9 +++-- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32ede..0e3ee19 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -137,7 +137,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, /* * thread information flags: * TIF_SYSCALL_TRACE - syscall trace active - * TIF_SYSCAL_AUDIT - syscall auditing active + * TIF_SYSCALL_AUDIT - syscall auditing active + * TIF_SYSCALL_EMU- syscall emulation active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -153,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP11 /* seccomp syscall filtering active */ #define TIF_NOHZ 12 /* in adaptive nohz mode */ +#define TIF_SYSCALL_EMU13 #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK20 @@ -165,11 +167,13 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SYSCALL_AUDIT (1 TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT(1 TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 TIF_SECCOMP) +#define _TIF_SYSCALL_EMU (1 TIF_SYSCALL_EMU) #define _TIF_USING_IWMMXT (1 TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_SYSCALL_EMU) /* * Change these and you break ASM code in entry-common.S diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h index 5af0ed1..2c5e4d7 100644 --- a/arch/arm/include/uapi/asm/ptrace.h +++ b/arch/arm/include/uapi/asm/ptrace.h @@ -12,25 +12,27 @@ #include asm/hwcap.h -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 /* PTRACE_ATTACH is 16 */ /* PTRACE_DETACH is 17 */ -#define PTRACE_GETWMMXREGS 18 -#define PTRACE_SETWMMXREGS 19 +#define PTRACE_GETWMMXREGS 18 +#define PTRACE_SETWMMXREGS 19 /* 20 is unused */ -#define PTRACE_OLDSETOPTIONS 21 -#define PTRACE_GET_THREAD_AREA 22 -#define PTRACE_SET_SYSCALL 23 +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_GET_THREAD_AREA 22 +#define PTRACE_SET_SYSCALL 23 /* PTRACE_SYSCALL is 24 */ -#define PTRACE_GETCRUNCHREGS 25 -#define PTRACE_SETCRUNCHREGS 26 -#define PTRACE_GETVFPREGS 27 -#define PTRACE_SETVFPREGS 28 -#define PTRACE_GETHBPREGS 29 -#define PTRACE_SETHBPREGS 30 +#define PTRACE_GETCRUNCHREGS 25 +#define PTRACE_SETCRUNCHREGS 26 +#define PTRACE_GETVFPREGS 27 +#define PTRACE_SETVFPREGS 28 +#define PTRACE_GETHBPREGS 29 +#define PTRACE_SETHBPREGS 30 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 /* * PSR bits diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index ef9119f..c84058c 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -930,6 +930,8 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) { + int ret = 0; + current_thread_info()-syscall = scno; /* Do the secure computing check first; failures should be fast. */ @@ -941,7 +943,10 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) secure_computing_strict(scno); #endif - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_EMU)) + ret = -1; + + if (ret || test_thread_flag(TIF_SYSCALL_TRACE
Re: [RESEND PATCH v3] ARM: tegra124: pmu support
On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding wrote: > On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote: >> This patch modifies the device tree for tegra124 based devices to enable >> the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM >> DP-06905-001_v03p. This patch was tested on a Jetson TK1. >> >> Updated for proper ordering and to add interrupt-affinity values. >> >> Signed-off-by: Kyle Huey >> --- >> arch/arm/boot/dts/tegra124.dtsi | 17 + >> 1 file changed, 13 insertions(+), 4 deletions(-) > > Is there any way to test this? What are the effects of adding this? Yes. This enables the ARM PMU driver for the Cortex A15, which allows one to use hardware performance counters via the perf_event_open API. For a simple test program, see https://github.com/khuey/perf-counter-test/. Without this patch, the perf_event_open syscall will fail. With this patch, the program will print out the performance counter value for each iteration of the loop. (IIRC on the A15 the branch counter was removed, so you may want to replace 0xD with 0x8 which counts instructions executed if you want to see a non-zero number there). You also will see a message about the PMU in the kernel log at startup after applying this patch. I have also tested this extensively (including the interrupt features of the PMU) on a more complex program. > Does it enable using perf for profiling? I have not tested it, but I believe you can use perf without this patch if you do not use features that require hardware performance counter support. This patch would enable those features. >> diff --git a/arch/arm/boot/dts/tegra124.dtsi >> b/arch/arm/boot/dts/tegra124.dtsi >> index 13cc7ca..de07d7e 100644 >> --- a/arch/arm/boot/dts/tegra124.dtsi >> +++ b/arch/arm/boot/dts/tegra124.dtsi >> @@ -918,31 +918,40 @@ >> #address-cells = <1>; >> #size-cells = <0>; >> >> - cpu@0 { >> + A15_0: cpu@0 { >> device_type = "cpu"; >> compatible = "arm,cortex-a15"; >> reg = <0>; >> }; >> >> - cpu@1 { >> + A15_1: cpu@1 { >> device_type = "cpu"; >> compatible = "arm,cortex-a15"; >> reg = <1>; >> }; >> >> - cpu@2 { >> + A15_2: cpu@2 { >> device_type = "cpu"; >> compatible = "arm,cortex-a15"; >> reg = <2>; >> }; >> >> - cpu@3 { >> + A15_3: cpu@3 { >> device_type = "cpu"; >> compatible = "arm,cortex-a15"; >> reg = <3>; >> }; >> }; >> >> + pmu { >> + compatible = "arm,cortex-a15-pmu"; >> + interrupts = , >> + , >> + , >> + ; >> + interrupt-affinity = <_0>, <_1>, <_2>, <_3>; > > These labels look somewhat artificial to me, perhaps we could do > something like the following instead? > > interrupt-affinity = <&{/cpus/cpu@0}>, ...; > > That's slightly more obvious and avoids the need to "invent" labels for > the CPUs. > > No need to respin, I can fix that up when applying if nobody objects to > using the alternative notation. > > Thierry I have no objections. I was not aware that the device tree syntax supported that. FWIW I cargo-culted my way to victory from vexpress-v2p-ca9.dts here. - Kyle -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RESEND PATCH v3] ARM: tegra124: pmu support
On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding thierry.red...@gmail.com wrote: On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote: This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Updated for proper ordering and to add interrupt-affinity values. Signed-off-by: Kyle Huey kh...@kylehuey.com --- arch/arm/boot/dts/tegra124.dtsi | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) Is there any way to test this? What are the effects of adding this? Yes. This enables the ARM PMU driver for the Cortex A15, which allows one to use hardware performance counters via the perf_event_open API. For a simple test program, see https://github.com/khuey/perf-counter-test/. Without this patch, the perf_event_open syscall will fail. With this patch, the program will print out the performance counter value for each iteration of the loop. (IIRC on the A15 the branch counter was removed, so you may want to replace 0xD with 0x8 which counts instructions executed if you want to see a non-zero number there). You also will see a message about the PMU in the kernel log at startup after applying this patch. I have also tested this extensively (including the interrupt features of the PMU) on a more complex program. Does it enable using perf for profiling? I have not tested it, but I believe you can use perf without this patch if you do not use features that require hardware performance counter support. This patch would enable those features. diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 13cc7ca..de07d7e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -918,31 +918,40 @@ #address-cells = 1; #size-cells = 0; - cpu@0 { + A15_0: cpu@0 { device_type = cpu; compatible = arm,cortex-a15; reg = 0; }; - cpu@1 { + A15_1: cpu@1 { device_type = cpu; compatible = arm,cortex-a15; reg = 1; }; - cpu@2 { + A15_2: cpu@2 { device_type = cpu; compatible = arm,cortex-a15; reg = 2; }; - cpu@3 { + A15_3: cpu@3 { device_type = cpu; compatible = arm,cortex-a15; reg = 3; }; }; + pmu { + compatible = arm,cortex-a15-pmu; + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH, + GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH, + GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH, + GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH; + interrupt-affinity = A15_0, A15_1, A15_2, A15_3; These labels look somewhat artificial to me, perhaps we could do something like the following instead? interrupt-affinity = {/cpus/cpu@0}, ...; That's slightly more obvious and avoids the need to invent labels for the CPUs. No need to respin, I can fix that up when applying if nobody objects to using the alternative notation. Thierry I have no objections. I was not aware that the device tree syntax supported that. FWIW I cargo-culted my way to victory from vexpress-v2p-ca9.dts here. - Kyle -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RESEND PATCH v3] ARM: tegra124: pmu support
This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Updated for proper ordering and to add interrupt-affinity values. Signed-off-by: Kyle Huey --- arch/arm/boot/dts/tegra124.dtsi | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 13cc7ca..de07d7e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -918,31 +918,40 @@ #address-cells = <1>; #size-cells = <0>; - cpu@0 { + A15_0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <0>; }; - cpu@1 { + A15_1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <1>; }; - cpu@2 { + A15_2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <2>; }; - cpu@3 { + A15_3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <3>; }; }; + pmu { + compatible = "arm,cortex-a15-pmu"; + interrupts = , +, +, +; + interrupt-affinity = <_0>, <_1>, <_2>, <_3>; + }; + thermal-zones { cpu { polling-delay-passive = <1000>; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RESEND PATCH v3] ARM: tegra124: pmu support
This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Updated for proper ordering and to add interrupt-affinity values. Signed-off-by: Kyle Huey kh...@kylehuey.com --- arch/arm/boot/dts/tegra124.dtsi | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 13cc7ca..de07d7e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -918,31 +918,40 @@ #address-cells = 1; #size-cells = 0; - cpu@0 { + A15_0: cpu@0 { device_type = cpu; compatible = arm,cortex-a15; reg = 0; }; - cpu@1 { + A15_1: cpu@1 { device_type = cpu; compatible = arm,cortex-a15; reg = 1; }; - cpu@2 { + A15_2: cpu@2 { device_type = cpu; compatible = arm,cortex-a15; reg = 2; }; - cpu@3 { + A15_3: cpu@3 { device_type = cpu; compatible = arm,cortex-a15; reg = 3; }; }; + pmu { + compatible = arm,cortex-a15-pmu; + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH; + interrupt-affinity = A15_0, A15_1, A15_2, A15_3; + }; + thermal-zones { cpu { polling-delay-passive = 1000; -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3] ARM: tegra124: pmu support
This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Updated for proper ordering and to add interrupt-affinity values. Signed-off-by: Kyle Huey --- arch/arm/boot/dts/tegra124.dtsi | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 13cc7ca..de07d7e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -913,41 +913,50 @@ nvidia,xcvr-hsslew = <12>; status = "disabled"; }; cpus { #address-cells = <1>; #size-cells = <0>; - cpu@0 { + A15_0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <0>; }; - cpu@1 { + A15_1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <1>; }; - cpu@2 { + A15_2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <2>; }; - cpu@3 { + A15_3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <3>; }; }; + pmu { + compatible = "arm,cortex-a15-pmu"; + interrupts = , +, +, +; + interrupt-affinity = <_0>, <_1>, <_2>, <_3>; + }; + thermal-zones { cpu { polling-delay-passive = <1000>; polling-delay = <1000>; thermal-sensors = < TEGRA124_SOCTHERM_SENSOR_CPU>; }; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3] ARM: tegra124: pmu support
This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Updated for proper ordering and to add interrupt-affinity values. Signed-off-by: Kyle Huey kh...@kylehuey.com --- arch/arm/boot/dts/tegra124.dtsi | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 13cc7ca..de07d7e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -913,41 +913,50 @@ nvidia,xcvr-hsslew = 12; status = disabled; }; cpus { #address-cells = 1; #size-cells = 0; - cpu@0 { + A15_0: cpu@0 { device_type = cpu; compatible = arm,cortex-a15; reg = 0; }; - cpu@1 { + A15_1: cpu@1 { device_type = cpu; compatible = arm,cortex-a15; reg = 1; }; - cpu@2 { + A15_2: cpu@2 { device_type = cpu; compatible = arm,cortex-a15; reg = 2; }; - cpu@3 { + A15_3: cpu@3 { device_type = cpu; compatible = arm,cortex-a15; reg = 3; }; }; + pmu { + compatible = arm,cortex-a15-pmu; + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH; + interrupt-affinity = A15_0, A15_1, A15_2, A15_3; + }; + thermal-zones { cpu { polling-delay-passive = 1000; polling-delay = 1000; thermal-sensors = soctherm TEGRA124_SOCTHERM_SENSOR_CPU; }; -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] ARM: tegra124: pmu support
This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Signed-off-by: Kyle Huey --- arch/arm/boot/dts/tegra124.dtsi | 8 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 4be06c6..d966d4e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -906,16 +906,24 @@ cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <3>; }; }; + pmu { + compatible = "arm,cortex-a15-pmu"; + interrupts = , +, +, +; + }; + thermal-zones { cpu { polling-delay-passive = <1000>; polling-delay = <1000>; thermal-sensors = < TEGRA124_SOCTHERM_SENSOR_CPU>; }; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] ARM: tegra124: pmu support
This patch modifies the device tree for tegra124 based devices to enable the Cortex A15 PMU. The interrupt numbers are taken from NVIDIA TRM DP-06905-001_v03p. This patch was tested on a Jetson TK1. Signed-off-by: Kyle Huey kh...@kylehuey.com --- arch/arm/boot/dts/tegra124.dtsi | 8 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 4be06c6..d966d4e 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -906,16 +906,24 @@ cpu@3 { device_type = cpu; compatible = arm,cortex-a15; reg = 3; }; }; + pmu { + compatible = arm,cortex-a15-pmu; + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH; + }; + thermal-zones { cpu { polling-delay-passive = 1000; polling-delay = 1000; thermal-sensors = soctherm TEGRA124_SOCTHERM_SENSOR_CPU; }; -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/