[PATCH v5 3/6] x86/arch_prctl Add a new do_arch_prctl

2016-09-21 Thread Kyle Huey
Add a new do_arch_prctl to handle arch_prctls that are not specific to 64
bits. Call it from the syscall entry point, but not any of the other
callsites in the kernel, which all want one of the existing 64 bit only
arch_prctls.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/proto.h | 1 +
 arch/x86/kernel/process.c| 5 +
 arch/x86/kernel/process_64.c | 8 +++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 95c3e51..94a57cc 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,7 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2);
 #ifdef CONFIG_X86_64
 long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2);
 #endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..97aa104 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -567,3 +567,8 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   return -EINVAL;
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 292ce48..5c60e2c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -590,7 +590,13 @@ long do_arch_prctl_64(struct task_struct *task, int code, 
unsigned long arg2)
 
 SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
 {
-   return do_arch_prctl_64(current, code, arg2);
+   long ret;
+
+   ret = do_arch_prctl_64(current, code, arg2);
+   if (ret == -EINVAL)
+   ret = do_arch_prctl(current, code, arg2);
+
+   return ret;
 }
 
 unsigned long KSTK_ESP(struct task_struct *task)
-- 
2.9.3



[PATCH v5 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32

2016-09-21 Thread Kyle Huey
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat
mode on x86-64. This allows us to have arch_prctls that are not specific to
64 bits.

On UML, simply stub out this syscall.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl | 1 +
 arch/x86/kernel/process_32.c   | 7 +++
 arch/x86/kernel/process_64.c   | 7 +++
 arch/x86/um/Makefile   | 2 +-
 arch/x86/um/syscalls_32.c  | 7 +++
 include/linux/compat.h | 2 ++
 6 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/um/syscalls_32.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..300fdf8 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl  
compat_sys_arch_prctl
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29..71770a4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -54,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
@@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
 
return prev_p;
 }
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5c60e2c..aa2b99a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
return ret;
 }
 
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
+#endif
+
 unsigned long KSTK_ESP(struct task_struct *task)
 {
return task_pt_regs(task)->sp;
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 3ee2bb6..5e039d6 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000..ccf0598
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
+#include 
+#include 
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return -EINVAL;
+}
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f964ef7..0039d53 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -722,6 +722,8 @@ asmlinkage long 
compat_sys_sched_rr_get_interval(compat_pid_t pid,
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
 
+asmlinkage long compat_sys_arch_prctl(int, unsigned long);
+
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
  * "am I a compat task?" are the same question.  For architectures on which
-- 
2.9.3



[PATCH v5 5/6] x86/cpufeature Detect CPUID faulting support

2016-09-21 Thread Kyle Huey
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge.
When enabled, the processor will fault on attempts to execute the CPUID
instruction with CPL>0. This will allow a ptracer to emulate the CPUID
instruction.

Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
Reviewed-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 13 +
 3 files changed, 15 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..39aa563 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define PLATINFO_CPUID_FAULT   (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..7901481 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,16 @@ enum cpuid_regs {
CR_EBX
 };
 
+static bool supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ))
+   return false;
+
+   return lo & PLATINFO_CPUID_FAULT;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
-- 
2.9.3



[PATCH v5 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-21 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at
CPL > 0.  Expose this capability to userspace as a new pair of arch_prctls,
ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and
ARCH_CPUID_SIGSEGV.

The following changes have been made since v4:

Patch 1:
- Fix missing include on 64bit UML.

Patch 6:
- Fix comment in the test that still referred to an earlier design of the API.

The following changes have been made since v3:

Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6,
respectively.

Patch 1:
- Use SYSCALL_DEFINE in UML.

Patch 2:
- More descriptive commit message.

Patch 3:
- More decriptive commit message.
- Name the common arch_prctl function do_arch_prctl instead of
  do_arch_prctl_common

Patch 4:
- Move the 32-bit syscall entry point to process_32.c, place the compat
  entry point in process_64.c

Patch 5 (previously Patch 2):
- More descriptive commit message.
- Prefix the #define for the cpuid faulting bit with PLATINFO
- supports_cpuid_faulting returns bool
- Rearrange supports_cpuid_faulting to avoid linebreaks

Patch 6 (previously Patch 3):
- ARCH_GET_CPUID now takes 0 for the second argument, and returns the
  result directly.
- arch_post_exec is now a #define, called from setup_new_exec
- The test now uses errx
- The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after
  fork()



[PATCH v5 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32

2016-09-21 Thread Kyle Huey
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat
mode on x86-64. This allows us to have arch_prctls that are not specific to
64 bits.

On UML, simply stub out this syscall.

Signed-off-by: Kyle Huey 
---
 arch/x86/entry/syscalls/syscall_32.tbl | 1 +
 arch/x86/kernel/process_32.c   | 7 +++
 arch/x86/kernel/process_64.c   | 7 +++
 arch/x86/um/Makefile   | 2 +-
 arch/x86/um/syscalls_32.c  | 7 +++
 include/linux/compat.h | 2 ++
 6 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/um/syscalls_32.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..300fdf8 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl  
compat_sys_arch_prctl
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29..71770a4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -54,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
@@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
 
return prev_p;
 }
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5c60e2c..aa2b99a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
return ret;
 }
 
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
+#endif
+
 unsigned long KSTK_ESP(struct task_struct *task)
 {
return task_pt_regs(task)->sp;
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 3ee2bb6..5e039d6 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000..ccf0598
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
+#include 
+#include 
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return -EINVAL;
+}
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f964ef7..0039d53 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -722,6 +722,8 @@ asmlinkage long 
compat_sys_sched_rr_get_interval(compat_pid_t pid,
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
 
+asmlinkage long compat_sys_arch_prctl(int, unsigned long);
+
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
  * "am I a compat task?" are the same question.  For architectures on which
-- 
2.9.3



[PATCH v5 5/6] x86/cpufeature Detect CPUID faulting support

2016-09-21 Thread Kyle Huey
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge.
When enabled, the processor will fault on attempts to execute the CPUID
instruction with CPL>0. This will allow a ptracer to emulate the CPUID
instruction.

Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT.

Signed-off-by: Kyle Huey 
Reviewed-by: Andy Lutomirski 
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 13 +
 3 files changed, 15 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..39aa563 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define PLATINFO_CPUID_FAULT   (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..7901481 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,16 @@ enum cpuid_regs {
CR_EBX
 };
 
+static bool supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ))
+   return false;
+
+   return lo & PLATINFO_CPUID_FAULT;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
-- 
2.9.3



[PATCH v5 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-21 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at
CPL > 0.  Expose this capability to userspace as a new pair of arch_prctls,
ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and
ARCH_CPUID_SIGSEGV.

The following changes have been made since v4:

Patch 1:
- Fix missing include on 64bit UML.

Patch 6:
- Fix comment in the test that still referred to an earlier design of the API.

The following changes have been made since v3:

Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6,
respectively.

Patch 1:
- Use SYSCALL_DEFINE in UML.

Patch 2:
- More descriptive commit message.

Patch 3:
- More decriptive commit message.
- Name the common arch_prctl function do_arch_prctl instead of
  do_arch_prctl_common

Patch 4:
- Move the 32-bit syscall entry point to process_32.c, place the compat
  entry point in process_64.c

Patch 5 (previously Patch 2):
- More descriptive commit message.
- Prefix the #define for the cpuid faulting bit with PLATINFO
- supports_cpuid_faulting returns bool
- Rearrange supports_cpuid_faulting to avoid linebreaks

Patch 6 (previously Patch 3):
- ARCH_GET_CPUID now takes 0 for the second argument, and returns the
  result directly.
- arch_post_exec is now a #define, called from setup_new_exec
- The test now uses errx
- The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after
  fork()



[PATCH v4 6/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID

2016-09-18 Thread Kyle Huey
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge.
When enabled, the processor will fault on attempts to execute the CPUID
instruction with CPL>0. Exposing this feature to userspace will allow a
ptracer to trap and emulate the CPUID instruction.

When supported, this feature is controlled by toggling bit 0 of
MSR_MISC_FEATURES_ENABLES. It is documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Implement a new pair of arch_prctls, available on both x86-32 and x86-64.

ARCH_GET_CPUID: Returns the current CPUID faulting state, either
  ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. arg2 must be 0.

ARCH_SET_CPUID: Set the CPUID faulting state to arg2, which must be either
  ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. Returns EINVAL if arg2 is
  another value or CPUID faulting is not supported on this system.

The state of the CPUID faulting flag is propagated across forks, but reset
upon exec.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   6 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  94 +++-
 fs/exec.c |   1 +
 include/linux/thread_info.h   |   4 +
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 231 ++
 8 files changed, 342 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 39aa563..cddefdd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..1bc79bc 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -293,6 +295,8 @@ static inline bool in_ia32_syscall(void)
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct 
*src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_post_exec(void);
+#define arch_post_exec arch_post_exec
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97aa104..3ac90eb 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include 

[PATCH v4 6/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID

2016-09-18 Thread Kyle Huey
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge.
When enabled, the processor will fault on attempts to execute the CPUID
instruction with CPL>0. Exposing this feature to userspace will allow a
ptracer to trap and emulate the CPUID instruction.

When supported, this feature is controlled by toggling bit 0 of
MSR_MISC_FEATURES_ENABLES. It is documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Implement a new pair of arch_prctls, available on both x86-32 and x86-64.

ARCH_GET_CPUID: Returns the current CPUID faulting state, either
  ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. arg2 must be 0.

ARCH_SET_CPUID: Set the CPUID faulting state to arg2, which must be either
  ARCH_CPUID_ENABLE or ARCH_CPUID_SIGSEGV. Returns EINVAL if arg2 is
  another value or CPUID faulting is not supported on this system.

The state of the CPUID faulting flag is propagated across forks, but reset
upon exec.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   6 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  94 +++-
 fs/exec.c |   1 +
 include/linux/thread_info.h   |   4 +
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 231 ++
 8 files changed, 342 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 39aa563..cddefdd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..1bc79bc 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -293,6 +295,8 @@ static inline bool in_ia32_syscall(void)
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct 
*src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_post_exec(void);
+#define arch_post_exec arch_post_exec
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97aa104..3ac90eb 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #inclu

[PATCH v4 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32

2016-09-18 Thread Kyle Huey
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat
mode on x86-64. This allows us to have arch_prctls that are not specific to
64 bits.

On UML, simply stub out this syscall.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl | 1 +
 arch/x86/kernel/process_32.c   | 7 +++
 arch/x86/kernel/process_64.c   | 7 +++
 arch/x86/um/Makefile   | 2 +-
 arch/x86/um/syscalls_32.c  | 7 +++
 include/linux/compat.h | 2 ++
 6 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/um/syscalls_32.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..300fdf8 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl  
compat_sys_arch_prctl
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29..71770a4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -54,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
@@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
 
return prev_p;
 }
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5c60e2c..aa2b99a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
return ret;
 }
 
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
+#endif
+
 unsigned long KSTK_ESP(struct task_struct *task)
 {
return task_pt_regs(task)->sp;
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 3ee2bb6..5e039d6 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000..ccf0598
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
+#include 
+#include 
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return -EINVAL;
+}
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f964ef7..0039d53 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -722,6 +722,8 @@ asmlinkage long 
compat_sys_sched_rr_get_interval(compat_pid_t pid,
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
 
+asmlinkage long compat_sys_arch_prctl(int, unsigned long);
+
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
  * "am I a compat task?" are the same question.  For architectures on which
-- 
2.9.3



[PATCH v4 3/6] x86/arch_prctl Add a new do_arch_prctl

2016-09-18 Thread Kyle Huey
Add a new do_arch_prctl to handle arch_prctls that are not specific to 64
bits. Call it from the syscall entry point, but not any of the other
callsites in the kernel, which all want one of the existing 64 bit only
arch_prctls.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/proto.h | 1 +
 arch/x86/kernel/process.c| 5 +
 arch/x86/kernel/process_64.c | 8 +++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 95c3e51..94a57cc 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,7 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2);
 #ifdef CONFIG_X86_64
 long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2);
 #endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..97aa104 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -567,3 +567,8 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   return -EINVAL;
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 292ce48..5c60e2c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -590,7 +590,13 @@ long do_arch_prctl_64(struct task_struct *task, int code, 
unsigned long arg2)
 
 SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
 {
-   return do_arch_prctl_64(current, code, arg2);
+   long ret;
+
+   ret = do_arch_prctl_64(current, code, arg2);
+   if (ret == -EINVAL)
+   ret = do_arch_prctl(current, code, arg2);
+
+   return ret;
 }
 
 unsigned long KSTK_ESP(struct task_struct *task)
-- 
2.9.3



[PATCH v4 4/6] x86/syscalls/32 Wire up arch_prctl on x86-32

2016-09-18 Thread Kyle Huey
Hook up arch_prctl to call do_arch_prctl on x86-32, and in 32 bit compat
mode on x86-64. This allows us to have arch_prctls that are not specific to
64 bits.

On UML, simply stub out this syscall.

Signed-off-by: Kyle Huey 
---
 arch/x86/entry/syscalls/syscall_32.tbl | 1 +
 arch/x86/kernel/process_32.c   | 7 +++
 arch/x86/kernel/process_64.c   | 7 +++
 arch/x86/um/Makefile   | 2 +-
 arch/x86/um/syscalls_32.c  | 7 +++
 include/linux/compat.h | 2 ++
 6 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/um/syscalls_32.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..300fdf8 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl  
compat_sys_arch_prctl
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29..71770a4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -54,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
@@ -316,3 +318,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
 
return prev_p;
 }
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5c60e2c..aa2b99a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -599,6 +599,13 @@ SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
return ret;
 }
 
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
+#endif
+
 unsigned long KSTK_ESP(struct task_struct *task)
 {
return task_pt_regs(task)->sp;
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 3ee2bb6..5e039d6 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000..ccf0598
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
+#include 
+#include 
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return -EINVAL;
+}
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f964ef7..0039d53 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -722,6 +722,8 @@ asmlinkage long 
compat_sys_sched_rr_get_interval(compat_pid_t pid,
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
 
+asmlinkage long compat_sys_arch_prctl(int, unsigned long);
+
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
  * "am I a compat task?" are the same question.  For architectures on which
-- 
2.9.3



[PATCH v4 3/6] x86/arch_prctl Add a new do_arch_prctl

2016-09-18 Thread Kyle Huey
Add a new do_arch_prctl to handle arch_prctls that are not specific to 64
bits. Call it from the syscall entry point, but not any of the other
callsites in the kernel, which all want one of the existing 64 bit only
arch_prctls.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/proto.h | 1 +
 arch/x86/kernel/process.c| 5 +
 arch/x86/kernel/process_64.c | 8 +++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 95c3e51..94a57cc 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,7 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2);
 #ifdef CONFIG_X86_64
 long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2);
 #endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..97aa104 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -567,3 +567,8 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   return -EINVAL;
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 292ce48..5c60e2c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -590,7 +590,13 @@ long do_arch_prctl_64(struct task_struct *task, int code, 
unsigned long arg2)
 
 SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
 {
-   return do_arch_prctl_64(current, code, arg2);
+   long ret;
+
+   ret = do_arch_prctl_64(current, code, arg2);
+   if (ret == -EINVAL)
+   ret = do_arch_prctl(current, code, arg2);
+
+   return ret;
 }
 
 unsigned long KSTK_ESP(struct task_struct *task)
-- 
2.9.3



[PATCH v4 2/6] x86/arch_prctl/64 Rename do_arch_prctl to do_arch_prctl_64

2016-09-18 Thread Kyle Huey
In order to introduce new arch_prctls that are not 64 bit only, rename the
existing 64 bit implementation to do_arch_prctl_64. Also rename the second
argument to arch_prctl, which will no longer always be an address.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/proto.h |  4 +++-
 arch/x86/kernel/process_64.c | 26 ++
 arch/x86/kernel/ptrace.c |  8 
 arch/x86/um/syscalls_64.c|  4 ++--
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b..95c3e51 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,8 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+#ifdef CONFIG_X86_64
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2);
+#endif
 
 #endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4d6363c..292ce48 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -197,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
(struct user_desc __user *)tls, 0);
else
 #endif
-   err = do_arch_prctl(p, ARCH_SET_FS, tls);
+   err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
@@ -525,7 +525,7 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2)
 {
int ret = 0;
int doit = task == current;
@@ -533,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
 
switch (code) {
case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
+   task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
   with gs */
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
-   task->thread.fsbase = addr;
+   task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
-   ret = wrmsrl_safe(MSR_FS_BASE, addr);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fsbase;
-   ret = put_user(base, (unsigned long __user *)addr);
+   ret = put_user(base, (unsigned long __user *)arg2);
break;
}
case ARCH_GET_GS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gsbase;
-   ret = put_user(base, (unsigned long __user *)addr);
+   ret = put_user(base, (unsigned long __user *)arg2);
break;
}
 
@@ -586,9 +588,9 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
return ret;
 }
 
-SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr)
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
 {
-   return do_arch_prctl(current, code, addr);
+   return do_arch_prctl_64(current, code, arg2);
 }
 
 unsigned long KSTK_ESP(struct task_struct *task)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a..030cbc5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -395,12 +395,12 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
/*
-* When changing the segment base, use do_arch_prctl
+* When changing the segment base, u

[PATCH v4 5/6] x86/cpufeature Detect CPUID faulting support

2016-09-18 Thread Kyle Huey
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge.
When enabled, the processor will fault on attempts to execute the CPUID
instruction with CPL>0. This will allow a ptracer to emulate the CPUID
instruction.

Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
Reviewed-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 13 +
 3 files changed, 15 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..39aa563 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define PLATINFO_CPUID_FAULT   (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..7901481 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,16 @@ enum cpuid_regs {
CR_EBX
 };
 
+static bool supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ))
+   return false;
+
+   return lo & PLATINFO_CPUID_FAULT;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
-- 
2.9.3



[PATCH v4 5/6] x86/cpufeature Detect CPUID faulting support

2016-09-18 Thread Kyle Huey
Intel supports faulting on the CPUID instruction beginning with Ivy Bridge.
When enabled, the processor will fault on attempts to execute the CPUID
instruction with CPL>0. This will allow a ptracer to emulate the CPUID
instruction.

Bit 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Detect support for this feature and expose it as X86_FEATURE_CPUID_FAULT.

Signed-off-by: Kyle Huey 
Reviewed-by: Andy Lutomirski 
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 13 +
 3 files changed, 15 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..39aa563 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define PLATINFO_CPUID_FAULT   (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..7901481 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,16 @@ enum cpuid_regs {
CR_EBX
 };
 
+static bool supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ))
+   return false;
+
+   return lo & PLATINFO_CPUID_FAULT;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +64,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
-- 
2.9.3



[PATCH v4 2/6] x86/arch_prctl/64 Rename do_arch_prctl to do_arch_prctl_64

2016-09-18 Thread Kyle Huey
In order to introduce new arch_prctls that are not 64 bit only, rename the
existing 64 bit implementation to do_arch_prctl_64. Also rename the second
argument to arch_prctl, which will no longer always be an address.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/proto.h |  4 +++-
 arch/x86/kernel/process_64.c | 26 ++
 arch/x86/kernel/ptrace.c |  8 
 arch/x86/um/syscalls_64.c|  4 ++--
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b..95c3e51 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,8 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+#ifdef CONFIG_X86_64
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2);
+#endif
 
 #endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4d6363c..292ce48 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -197,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
(struct user_desc __user *)tls, 0);
else
 #endif
-   err = do_arch_prctl(p, ARCH_SET_FS, tls);
+   err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
@@ -525,7 +525,7 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2)
 {
int ret = 0;
int doit = task == current;
@@ -533,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
 
switch (code) {
case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
+   task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
   with gs */
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
-   task->thread.fsbase = addr;
+   task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
-   ret = wrmsrl_safe(MSR_FS_BASE, addr);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fsbase;
-   ret = put_user(base, (unsigned long __user *)addr);
+   ret = put_user(base, (unsigned long __user *)arg2);
break;
}
case ARCH_GET_GS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gsbase;
-   ret = put_user(base, (unsigned long __user *)addr);
+   ret = put_user(base, (unsigned long __user *)arg2);
break;
}
 
@@ -586,9 +588,9 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
return ret;
 }
 
-SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr)
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
 {
-   return do_arch_prctl(current, code, addr);
+   return do_arch_prctl_64(current, code, arg2);
 }
 
 unsigned long KSTK_ESP(struct task_struct *task)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a..030cbc5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -395,12 +395,12 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
/*
-* When changing the segment base, use do_arch_prctl
+* When changing the segment base, use do_arch_prctl_64
   

[PATCH v4 1/6] x86/arch_prctl/64 Use SYSCALL_DEFINE2 to define sys_arch_prctl

2016-09-18 Thread Kyle Huey
Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/kernel/process_64.c | 3 ++-
 arch/x86/um/syscalls_64.c| 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..4d6363c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -585,7 +586,7 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr)
 {
return do_arch_prctl(current, code, addr);
 }
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index e655227..3282066 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -72,7 +72,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned 
long __user *addr)
return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr)
 {
return arch_prctl(current, code, (unsigned long __user *) addr);
 }
-- 
2.9.3

base-commit: 024c7e3756d8a42fc41fe8a9488488b9b09d1dcc


[PATCH v4 1/6] x86/arch_prctl/64 Use SYSCALL_DEFINE2 to define sys_arch_prctl

2016-09-18 Thread Kyle Huey
Signed-off-by: Kyle Huey 
---
 arch/x86/kernel/process_64.c | 3 ++-
 arch/x86/um/syscalls_64.c| 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..4d6363c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -585,7 +586,7 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr)
 {
return do_arch_prctl(current, code, addr);
 }
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index e655227..3282066 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -72,7 +72,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned 
long __user *addr)
return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, addr)
 {
return arch_prctl(current, code, (unsigned long __user *) addr);
 }
-- 
2.9.3

base-commit: 024c7e3756d8a42fc41fe8a9488488b9b09d1dcc


[PATCH v4 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-18 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at
CPL > 0.  Expose this capability to userspace as a new pair of arch_prctls,
ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and
ARCH_CPUID_SIGSEGV.

The following changes have been made since v3:

Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6, 
respectively.

Patch 1:
- Use SYSCALL_DEFINE in UML.

Patch 2:
- More descriptive commit message.

Patch 3:
- More decriptive commit message.
- Name the common arch_prctl function do_arch_prctl instead of
  do_arch_prctl_common

Patch 4:
- Move the 32-bit syscall entry point to process_32.c, place the compat
  entry point in process_64.c

Patch 5 (previously Patch 2):
- More descriptive commit message.
- Prefix the #define for the cpuid faulting bit with PLATINFO
- supports_cpuid_faulting returns bool
- Rearrange supports_cpuid_faulting to avoid linebreaks

Patch 6 (previously Patch 3):
- ARCH_GET_CPUID now takes 0 for the second argument, and returns the
  result directly.
- arch_post_exec is now a #define, called from setup_new_exec
- The test now uses errx
- The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after
  fork()



[PATCH v4 0/6] x86/arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-18 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

Newer Intel CPUs (Ivy Bridge and later) can fault when CPUID is executed at
CPL > 0.  Expose this capability to userspace as a new pair of arch_prctls,
ARCH_GET_CPUID and ARCH_SET_CPUID, with two values, ARCH_CPUID_ENABLE and
ARCH_CPUID_SIGSEGV.

The following changes have been made since v3:

Patch 1 was split into patches 1-4, patches 2 and 3 became patches 5 and 6, 
respectively.

Patch 1:
- Use SYSCALL_DEFINE in UML.

Patch 2:
- More descriptive commit message.

Patch 3:
- More decriptive commit message.
- Name the common arch_prctl function do_arch_prctl instead of
  do_arch_prctl_common

Patch 4:
- Move the 32-bit syscall entry point to process_32.c, place the compat
  entry point in process_64.c

Patch 5 (previously Patch 2):
- More descriptive commit message.
- Prefix the #define for the cpuid faulting bit with PLATINFO
- supports_cpuid_faulting returns bool
- Rearrange supports_cpuid_faulting to avoid linebreaks

Patch 6 (previously Patch 3):
- ARCH_GET_CPUID now takes 0 for the second argument, and returns the
  result directly.
- arch_post_exec is now a #define, called from setup_new_exec
- The test now uses errx
- The test now checks that ARCH_GET_CPUID returns ARCH_CPUID_SIGSEGV after
  fork()



Re: [PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-16 Thread Kyle Huey
On Fri, Sep 16, 2016 at 12:50 AM, Thomas Gleixner <t...@linutronix.de> wrote:
> On Thu, 15 Sep 2016, Kyle Huey wrote:
>
> First of all, please add a cover letter [PATCH 0/N] to your patch series
> and send it with something which provides proper mail threading.
> See: git-send-email, quilt

I did ... seems like using git-send-email with
--cc-cmd=scripts/get_maintainer.pl is not a good idea since people get
CCd to some parts of the thread and not others.

https://lkml.org/lkml/2016/9/15/811

>> arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for
>> now. Rename the second arg to a more generic name.
>
> This changelog is useless.
>
> - it does not provide any rationale for this change, i.e. why this is
>   required. Just because its 64bit only is not a reason.
>
> - "Rename the second arg to a more generic name" does not give
>   any useful information.
>
> Misleading information is worse than no information.
>
> Further your patch does 5 things at once. It wants to be split into parts:
>
> 1) Rename do_arch_prctl() and change the argument name,
>
>> -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
>> +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long 
>> arg2)
>
> 2) Provide do_arch_prctl_common() and hook it up to the arch_prctl syscall
>
>> -long sys_arch_prctl(int code, unsigned long addr)
>> +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
>>  {
>> - return do_arch_prctl(current, code, addr);
>> + long ret;
>> +
>> + ret = do_arch_prctl_64(current, code, arg2);
>> + if (ret == -EINVAL)
>> + ret = do_arch_prctl_common(current, code, arg2);
>> +
>> + return ret;
>>  }
>
> 3) Implement the compat version

Ok.

- Kyle


Re: [PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-16 Thread Kyle Huey
On Fri, Sep 16, 2016 at 12:50 AM, Thomas Gleixner  wrote:
> On Thu, 15 Sep 2016, Kyle Huey wrote:
>
> First of all, please add a cover letter [PATCH 0/N] to your patch series
> and send it with something which provides proper mail threading.
> See: git-send-email, quilt

I did ... seems like using git-send-email with
--cc-cmd=scripts/get_maintainer.pl is not a good idea since people get
CCd to some parts of the thread and not others.

https://lkml.org/lkml/2016/9/15/811

>> arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for
>> now. Rename the second arg to a more generic name.
>
> This changelog is useless.
>
> - it does not provide any rationale for this change, i.e. why this is
>   required. Just because its 64bit only is not a reason.
>
> - "Rename the second arg to a more generic name" does not give
>   any useful information.
>
> Misleading information is worse than no information.
>
> Further your patch does 5 things at once. It wants to be split into parts:
>
> 1) Rename do_arch_prctl() and change the argument name,
>
>> -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
>> +long do_arch_prctl_64(struct task_struct *task, int code, unsigned long 
>> arg2)
>
> 2) Provide do_arch_prctl_common() and hook it up to the arch_prctl syscall
>
>> -long sys_arch_prctl(int code, unsigned long addr)
>> +SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
>>  {
>> - return do_arch_prctl(current, code, addr);
>> + long ret;
>> +
>> + ret = do_arch_prctl_64(current, code, arg2);
>> + if (ret == -EINVAL)
>> + ret = do_arch_prctl_common(current, code, arg2);
>> +
>> + return ret;
>>  }
>
> 3) Implement the compat version

Ok.

- Kyle


Re: [PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-15 Thread Kyle Huey
On Thu, Sep 15, 2016 at 5:07 PM, Andy Lutomirski <l...@amacapital.net> wrote:
> On Thu, Sep 15, 2016 at 4:33 PM, Kyle Huey <m...@kylehuey.com> wrote:
>> +int get_cpuid_mode(unsigned long adr)
>> +{
>> +   unsigned int val;
>> +
>> +   if (test_thread_flag(TIF_NOCPUID))
>> +   val = ARCH_CPUID_SIGSEGV;
>> +   else
>> +   val = ARCH_CPUID_ENABLE;
>> +
>> +   return put_user(val, (unsigned int __user *)adr);
>> +}
>
> Can we just do:
>
> if (arg2 != 0)
>   return -EINVAL;
> else
>  return test_thread_flag(TIF_NOCPUID) ? ARCH_CPUID_SIGSEGBV : 
> ARCH_CPUID_ENABLE;

We could.  I copied the pattern of PR_GET_TSC here, but I don't feel
strongly about it.

>> diff --git a/tools/testing/selftests/x86/cpuid-fault.c 
>> b/tools/testing/selftests/x86/cpuid-fault.c
>> new file mode 100644
>> index 000..a9f3f68
>> --- /dev/null
>> +++ b/tools/testing/selftests/x86/cpuid-fault.c
>> @@ -0,0 +1,234 @@
>> +
>> +/*
>> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / prctl(ARCH_SET_CPUID, ...)
>> + *
>> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +
>> +const char *cpuid_names[] = {
>> +   [0] = "[not set]",
>
> Is 0 even possible?

Only if the call fails.

- Kyle


Re: [PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-15 Thread Kyle Huey
On Thu, Sep 15, 2016 at 5:07 PM, Andy Lutomirski  wrote:
> On Thu, Sep 15, 2016 at 4:33 PM, Kyle Huey  wrote:
>> +int get_cpuid_mode(unsigned long adr)
>> +{
>> +   unsigned int val;
>> +
>> +   if (test_thread_flag(TIF_NOCPUID))
>> +   val = ARCH_CPUID_SIGSEGV;
>> +   else
>> +   val = ARCH_CPUID_ENABLE;
>> +
>> +   return put_user(val, (unsigned int __user *)adr);
>> +}
>
> Can we just do:
>
> if (arg2 != 0)
>   return -EINVAL;
> else
>  return test_thread_flag(TIF_NOCPUID) ? ARCH_CPUID_SIGSEGBV : 
> ARCH_CPUID_ENABLE;

We could.  I copied the pattern of PR_GET_TSC here, but I don't feel
strongly about it.

>> diff --git a/tools/testing/selftests/x86/cpuid-fault.c 
>> b/tools/testing/selftests/x86/cpuid-fault.c
>> new file mode 100644
>> index 000..a9f3f68
>> --- /dev/null
>> +++ b/tools/testing/selftests/x86/cpuid-fault.c
>> @@ -0,0 +1,234 @@
>> +
>> +/*
>> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / prctl(ARCH_SET_CPUID, ...)
>> + *
>> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +
>> +const char *cpuid_names[] = {
>> +   [0] = "[not set]",
>
> Is 0 even possible?

Only if the call fails.

- Kyle


Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-15 Thread Kyle Huey
On Thu, Sep 15, 2016 at 12:37 PM, Andy Lutomirski <l...@amacapital.net> wrote:
> On Thu, Sep 15, 2016 at 12:11 PM, Kyle Huey <m...@kylehuey.com> wrote:
>> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <jbeul...@suse.com> wrote:
>>>>>> On 15.09.16 at 12:05, <david.vra...@citrix.com> wrote:
>>>> On 14/09/16 22:01, Kyle Huey wrote:
>>>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>>>
>>>> Could you clarify in the commit message that it is PV guests that are
>>>> affected.
>>>
>>> What makes you think HVM ones aren't?
>>
>> Testing on EC2, HVM guests are affected as well.  Not sure what to do
>> about that.
>>
>
> It's kind of nasty, but it shouldn't be *too* hard to probe for this
> thing during early boot.  Allocate a page somewhere that has the user
> bit set, put something like this in it:
>
> cpuid
> inc %eax  /* return 1 */
> movw %ax, %ss /* force %GP to get out of here */
>
> Call it like this from asm (real asm, not inline):
>
> FRAME_BEGIN
> pushq %rbx
>
> xorl %eax, %eax
>
> /* Push return frame */
> pushq %ss
> pushq %rsp
> addq $8, (%rsp)
> pushfq
> pushq %cs
> pushq $end_of_cpuid_faulting_test
>
> /* Call it! */
> pushq $__USER_DS
> pushq $0
> pushq $X86_EFLAGS_FIXED  /* leave IF off when running the CPL3 stub */
> pushq $__USER_CS
> pushq [address of userspace stub]
> INTERRUPT_RETURN
>
> end_of_cpuid_faulting_test:
> pop %rbx
>
> FRAME_END
>
> Run this after the main GDT is loaded but while the #GP vector is
> temporarily pointing to:
>
> movq SS-RIP(%rsp), %rsp  /* pop the real return frame */
> INTERRUPT_RETURN
>
> and with interrupts off.  The function should return 0 if CPUID
> faulting works and 1 if it doesn't.
>
> Yeah, this is gross, but it should work.  I'm not sure how okay I am
> with putting this crap in the kernel...

This is rather heroic :)

I think it's more trouble than it's worth though.  The latest series I
submitted doesn't try to handle this.  Instead I'll patch Xen to fix
the bug.

- Kyle


Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-15 Thread Kyle Huey
On Thu, Sep 15, 2016 at 12:37 PM, Andy Lutomirski  wrote:
> On Thu, Sep 15, 2016 at 12:11 PM, Kyle Huey  wrote:
>> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich  wrote:
>>>>>> On 15.09.16 at 12:05,  wrote:
>>>> On 14/09/16 22:01, Kyle Huey wrote:
>>>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>>>
>>>> Could you clarify in the commit message that it is PV guests that are
>>>> affected.
>>>
>>> What makes you think HVM ones aren't?
>>
>> Testing on EC2, HVM guests are affected as well.  Not sure what to do
>> about that.
>>
>
> It's kind of nasty, but it shouldn't be *too* hard to probe for this
> thing during early boot.  Allocate a page somewhere that has the user
> bit set, put something like this in it:
>
> cpuid
> inc %eax  /* return 1 */
> movw %ax, %ss /* force %GP to get out of here */
>
> Call it like this from asm (real asm, not inline):
>
> FRAME_BEGIN
> pushq %rbx
>
> xorl %eax, %eax
>
> /* Push return frame */
> pushq %ss
> pushq %rsp
> addq $8, (%rsp)
> pushfq
> pushq %cs
> pushq $end_of_cpuid_faulting_test
>
> /* Call it! */
> pushq $__USER_DS
> pushq $0
> pushq $X86_EFLAGS_FIXED  /* leave IF off when running the CPL3 stub */
> pushq $__USER_CS
> pushq [address of userspace stub]
> INTERRUPT_RETURN
>
> end_of_cpuid_faulting_test:
> pop %rbx
>
> FRAME_END
>
> Run this after the main GDT is loaded but while the #GP vector is
> temporarily pointing to:
>
> movq SS-RIP(%rsp), %rsp  /* pop the real return frame */
> INTERRUPT_RETURN
>
> and with interrupts off.  The function should return 0 if CPUID
> faulting works and 1 if it doesn't.
>
> Yeah, this is gross, but it should work.  I'm not sure how okay I am
> with putting this crap in the kernel...

This is rather heroic :)

I think it's more trouble than it's worth though.  The latest series I
submitted doesn't try to handle this.  Instead I'll patch Xen to fix
the bug.

- Kyle


[PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-15 Thread Kyle Huey
Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Support for this is implemented as a new pair of arch_prctls, available on both 
x86-32 and x86-64.  The structure mirrors PR_[GET|SET]_TSC.  Like the TSC flag, 
CPUID faulting is propagated across forks.  Unlike the TSC flag, it is reset 
(to CPUID enabled) on exec.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   5 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  98 -
 fs/exec.c |   6 +
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 234 ++
 7 files changed, 349 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..e3c40c6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -293,6 +295,7 @@ static inline bool in_ia32_syscall(void)
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct 
*src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_post_exec(void);
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1421451..f307d5c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -191,6 +192,75 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+   if (on)
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+   else
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+   

[PATCH v3 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-15 Thread Kyle Huey
Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Support for this is implemented as a new pair of arch_prctls, available on both 
x86-32 and x86-64.  The structure mirrors PR_[GET|SET]_TSC.  Like the TSC flag, 
CPUID faulting is propagated across forks.  Unlike the TSC flag, it is reset 
(to CPUID enabled) on exec.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   5 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  98 -
 fs/exec.c |   6 +
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 234 ++
 7 files changed, 349 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..e3c40c6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -293,6 +295,7 @@ static inline bool in_ia32_syscall(void)
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct 
*src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_post_exec(void);
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1421451..f307d5c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -191,6 +192,75 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+   if (on)
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+   else
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchro

[PATCH v3 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-15 Thread Kyle Huey
Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 14 ++
 3 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define CPUID_FAULTING_SUPPORT (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
+   (lo & CPUID_FAULTING_SUPPORT))
+   return 1;
+   else
+   return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
-- 
2.9.3



[PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-15 Thread Kyle Huey
arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for
now. Rename the second arg to a more generic name.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/include/asm/proto.h   |  5 -
 arch/x86/kernel/process.c  | 10 ++
 arch/x86/kernel/process_64.c   | 33 +
 arch/x86/kernel/ptrace.c   |  8 
 arch/x86/um/Makefile   |  2 +-
 arch/x86/um/syscalls_32.c  |  7 +++
 arch/x86/um/syscalls_64.c  |  4 ++--
 include/linux/compat.h |  2 ++
 9 files changed, 52 insertions(+), 20 deletions(-)
 create mode 100644 arch/x86/um/syscalls_32.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..666fa61 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  compat_sys_arch_prctl   
compat_sys_arch_prctl
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b..f0e86aa 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,9 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int code, unsigned long 
addr);
+#ifdef CONFIG_X86_64
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long addr);
+#endif
 
 #endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..1421451 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -567,3 +567,13 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl_common(struct task_struct *task, int code, unsigned long 
arg2)
+{
+   return -EINVAL;
+}
+
+asmlinkage long compat_sys_arch_prctl(int code, unsigned long arg2)
+{
+   return do_arch_prctl_common(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..0e44608 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -196,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
(struct user_desc __user *)tls, 0);
else
 #endif
-   err = do_arch_prctl(p, ARCH_SET_FS, tls);
+   err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
@@ -524,7 +525,7 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2)
 {
int ret = 0;
int doit = task == current;
@@ -532,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
 
switch (code) {
case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
+   task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
   with gs */
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
-   task->thread.fsbase = addr;
+   task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
-   ret = wrmsrl_safe(MSR_FS_BASE, addr);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {

[PATCH v3 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-15 Thread Kyle Huey
Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 14 ++
 3 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define CPUID_FAULTING_SUPPORT (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
+   (lo & CPUID_FAULTING_SUPPORT))
+   return 1;
+   else
+   return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
-- 
2.9.3



[PATCH v3 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-15 Thread Kyle Huey
arch_prctl is currently 64-bit only. Wire it up for 32-bits, as a no-op for
now. Rename the second arg to a more generic name.

Signed-off-by: Kyle Huey 
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/include/asm/proto.h   |  5 -
 arch/x86/kernel/process.c  | 10 ++
 arch/x86/kernel/process_64.c   | 33 +
 arch/x86/kernel/ptrace.c   |  8 
 arch/x86/um/Makefile   |  2 +-
 arch/x86/um/syscalls_32.c  |  7 +++
 arch/x86/um/syscalls_64.c  |  4 ++--
 include/linux/compat.h |  2 ++
 9 files changed, 52 insertions(+), 20 deletions(-)
 create mode 100644 arch/x86/um/syscalls_32.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..666fa61 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  compat_sys_arch_prctl   
compat_sys_arch_prctl
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b..f0e86aa 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -30,6 +30,9 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int code, unsigned long 
addr);
+#ifdef CONFIG_X86_64
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long addr);
+#endif
 
 #endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..1421451 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -567,3 +567,13 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl_common(struct task_struct *task, int code, unsigned long 
arg2)
+{
+   return -EINVAL;
+}
+
+asmlinkage long compat_sys_arch_prctl(int code, unsigned long arg2)
+{
+   return do_arch_prctl_common(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..0e44608 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -196,7 +197,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
(struct user_desc __user *)tls, 0);
else
 #endif
-   err = do_arch_prctl(p, ARCH_SET_FS, tls);
+   err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
@@ -524,7 +525,7 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int code, unsigned long arg2)
 {
int ret = 0;
int doit = task == current;
@@ -532,48 +533,50 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
 
switch (code) {
case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
+   task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
   with gs */
-   if (addr >= TASK_SIZE_MAX)
+   if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
-   task->thread.fsbase = addr;
+   task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
-   ret = wrmsrl_safe(MSR_FS_BASE, addr);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
+

[PATCH v3] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-15 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

The following changes have been made since v2.

Patch 1:
- Use of compat_sys_arch_prctl and separate do_arch_prctl_[common|64]
  functions to separate generic and 64-bit only arch_prctls.

Patch 2:
- The hack to suppress the mistakenly advertised CPUID faulting support in
  Xen guests is removed. Doing this for both PV and HVM guests is quite
  tricky, and likely more trouble than it's worth. Instead I'll submit a
  patch to Xen.

Patch 3:
- TIF_NOCPUID is now droppped on exec. I added the arch_post_exec hook
  as I didn't see any existing place to run arch-specific code during
  exec. The test is updated for the new exec behavior.




[PATCH v3] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-15 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

The following changes have been made since v2.

Patch 1:
- Use of compat_sys_arch_prctl and separate do_arch_prctl_[common|64]
  functions to separate generic and 64-bit only arch_prctls.

Patch 2:
- The hack to suppress the mistakenly advertised CPUID faulting support in
  Xen guests is removed. Doing this for both PV and HVM guests is quite
  tricky, and likely more trouble than it's worth. Instead I'll submit a
  patch to Xen.

Patch 3:
- TIF_NOCPUID is now droppped on exec. I added the arch_post_exec hook
  as I didn't see any existing place to run arch-specific code during
  exec. The test is updated for the new exec behavior.




Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-15 Thread Kyle Huey
On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <jbeul...@suse.com> wrote:
>>>> On 15.09.16 at 12:05, <david.vra...@citrix.com> wrote:
>> On 14/09/16 22:01, Kyle Huey wrote:
>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>
>> Could you clarify in the commit message that it is PV guests that are
>> affected.
>
> What makes you think HVM ones aren't?

Testing on EC2, HVM guests are affected as well.  Not sure what to do
about that.

- Kyle


Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-15 Thread Kyle Huey
On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich  wrote:
>>>> On 15.09.16 at 12:05,  wrote:
>> On 14/09/16 22:01, Kyle Huey wrote:
>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>
>> Could you clarify in the commit message that it is PV guests that are
>> affected.
>
> What makes you think HVM ones aren't?

Testing on EC2, HVM guests are affected as well.  Not sure what to do
about that.

- Kyle


Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 6:17 PM, Andy Lutomirski <l...@amacapital.net> wrote:
> On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey <m...@kylehuey.com> wrote:
>> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
>> <dave.han...@linux.intel.com> wrote:
>>> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>
>>> Is any of this useful to optimize away at compile-time?  We have config
>>> options for when we're running as a guest, and this seems like a feature
>>> that isn't available when running on bare metal.
>>
>> On the contrary, this is only available when we're on bare metal.
>> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
>> suppresses MSR_PLATFORM_INFO's report of support for it).
>
> KVM could easily support this.  If rr starts using it, I think KVM
> *should* add support, possibly even for older CPUs that don't support
> the feature in hardware.
>
> It's too bad that x86 doesn't give us the instruction bytes on a
> fault.  Otherwise we could lazily switch this feature.

We are *very* interested in having KVM and Xen support virtualization
of this feature.  I am planning to work on KVM after I get this series
of patches in :)

- Kyle


Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 6:17 PM, Andy Lutomirski  wrote:
> On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey  wrote:
>> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
>>  wrote:
>>> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>
>>> Is any of this useful to optimize away at compile-time?  We have config
>>> options for when we're running as a guest, and this seems like a feature
>>> that isn't available when running on bare metal.
>>
>> On the contrary, this is only available when we're on bare metal.
>> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
>> suppresses MSR_PLATFORM_INFO's report of support for it).
>
> KVM could easily support this.  If rr starts using it, I think KVM
> *should* add support, possibly even for older CPUs that don't support
> the feature in hardware.
>
> It's too bad that x86 doesn't give us the instruction bytes on a
> fault.  Otherwise we could lazily switch this feature.

We are *very* interested in having KVM and Xen support virtualization
of this feature.  I am planning to work on KVM after I get this series
of patches in :)

- Kyle


Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 6:54 PM, Andy Lutomirski <l...@amacapital.net> wrote:
> On Wed, Sep 14, 2016 at 6:47 PM, Kyle Huey <m...@kylehuey.com> wrote:
>> On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <l...@amacapital.net> wrote:
>>> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <m...@kylehuey.com> wrote:
>
>>>> +
>>>> +int set_cpuid_mode(struct task_struct *task, unsigned long val)
>>>> +{
>>>> +   /* Only disable/enable_cpuid() if it is supported on this 
>>>> hardware. */
>>>> +   bool cpuid_fault_supported = 
>>>> static_cpu_has(X86_FEATURE_CPUID_FAULT);
>>>> +
>>>> +   if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
>>>> +   if (task_no_new_privs(task) && 
>>>> test_thread_flag(TIF_NOCPUID))
>>>> +   return -EACCES;
>>>
>>> This check seems confused.  If this flag were preserved on execve,
>>> it's the SIGSEGV mode that would need the check.
>>
>> Not sure I follow this one.  no_new_privs should block transitions
>> from SIGSEGV to ENABLE, right?  That's what this check does.
>
> It's the other way around entirely: if you make a change to your
> process context such that a subseqently execve()'d setuid program
> might malfunction, you've just done something dangerous.  This is only
> okay, at least in newly-supported instances, if you are either
> privileged or if you have no_new_privs set.  Having privilege makes it
> okay: unprivileged programs can't use it to subvert setuid programs.
> no_new_privs makes it safe as well: if no_new_privs is set, you can't
> gain privilege via execve(), so there's no attack surface.  So, if you
> have execve() keep ARCH_CPUID_SIGSEGV set, then setting it that way in
> the first place should require privilege or no_new_privs.
>
> I personally favor resetting to ARCH_CPUID_ENABLE on execve() and not
> worrying about no_new_privs.
>
> Does that make sense?

Yes, ok.  Robert and I agree that resetting does make the most sense.
Using this usefully requires a ptrace supervisor (to catch the traps),
which can easily inject a call to arch_prctl to reenable
ARCH_CPUID_SIGSEGV when desired.

- Kyle


Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 6:54 PM, Andy Lutomirski  wrote:
> On Wed, Sep 14, 2016 at 6:47 PM, Kyle Huey  wrote:
>> On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski  wrote:
>>> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey  wrote:
>
>>>> +
>>>> +int set_cpuid_mode(struct task_struct *task, unsigned long val)
>>>> +{
>>>> +   /* Only disable/enable_cpuid() if it is supported on this 
>>>> hardware. */
>>>> +   bool cpuid_fault_supported = 
>>>> static_cpu_has(X86_FEATURE_CPUID_FAULT);
>>>> +
>>>> +   if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
>>>> +   if (task_no_new_privs(task) && 
>>>> test_thread_flag(TIF_NOCPUID))
>>>> +   return -EACCES;
>>>
>>> This check seems confused.  If this flag were preserved on execve,
>>> it's the SIGSEGV mode that would need the check.
>>
>> Not sure I follow this one.  no_new_privs should block transitions
>> from SIGSEGV to ENABLE, right?  That's what this check does.
>
> It's the other way around entirely: if you make a change to your
> process context such that a subseqently execve()'d setuid program
> might malfunction, you've just done something dangerous.  This is only
> okay, at least in newly-supported instances, if you are either
> privileged or if you have no_new_privs set.  Having privilege makes it
> okay: unprivileged programs can't use it to subvert setuid programs.
> no_new_privs makes it safe as well: if no_new_privs is set, you can't
> gain privilege via execve(), so there's no attack surface.  So, if you
> have execve() keep ARCH_CPUID_SIGSEGV set, then setting it that way in
> the first place should require privilege or no_new_privs.
>
> I personally favor resetting to ARCH_CPUID_ENABLE on execve() and not
> worrying about no_new_privs.
>
> Does that make sense?

Yes, ok.  Robert and I agree that resetting does make the most sense.
Using this usefully requires a ptrace supervisor (to catch the traps),
which can easily inject a call to arch_prctl to reenable
ARCH_CPUID_SIGSEGV when desired.

- Kyle


Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <l...@amacapital.net> wrote:
> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <m...@kylehuey.com> wrote:
>> Intel supports faulting on the CPUID instruction in newer processors. Bit
>> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
>> documented in detail in Section 2.3.2 of
>> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf
>>
>> Signed-off-by: Kyle Huey <kh...@kylehuey.com>
>> ---
>>  arch/x86/include/asm/msr-index.h  |   1 +
>>  arch/x86/include/asm/thread_info.h|   4 +-
>>  arch/x86/include/uapi/asm/prctl.h |   6 +
>>  arch/x86/kernel/process.c |  81 +++
>>  tools/testing/selftests/x86/Makefile  |   2 +-
>>  tools/testing/selftests/x86/cpuid-fault.c | 223 
>> ++
>>  6 files changed, 315 insertions(+), 2 deletions(-)
>>  create mode 100644 tools/testing/selftests/x86/cpuid-fault.c
>>
>> diff --git a/arch/x86/include/asm/msr-index.h 
>> b/arch/x86/include/asm/msr-index.h
>> index 83908d5..4aebec2 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -53,6 +53,7 @@
>>  #define MSR_MTRRcap0x00fe
>>  #define MSR_IA32_BBL_CR_CTL0x0119
>>  #define MSR_IA32_BBL_CR_CTL3   0x011e
>> +#define MSR_MISC_FEATURES_ENABLES  0x0140
>>
>>  #define MSR_IA32_SYSENTER_CS   0x0174
>>  #define MSR_IA32_SYSENTER_ESP  0x0175
>> diff --git a/arch/x86/include/asm/thread_info.h 
>> b/arch/x86/include/asm/thread_info.h
>> index 8b7c8d8..ec93976 100644
>> --- a/arch/x86/include/asm/thread_info.h
>> +++ b/arch/x86/include/asm/thread_info.h
>> @@ -93,6 +93,7 @@ struct thread_info {
>>  #define TIF_SECCOMP8   /* secure computing */
>>  #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return 
>> */
>>  #define TIF_UPROBE 12  /* breakpointed or singlestepping */
>> +#define TIF_NOCPUID15  /* CPUID is not accessible in 
>> userland */
>>  #define TIF_NOTSC  16  /* TSC is not accessible in userland 
>> */
>>  #define TIF_IA32   17  /* IA32 compatibility process */
>>  #define TIF_FORK   18  /* ret_from_fork */
>> @@ -117,6 +118,7 @@ struct thread_info {
>>  #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
>>  #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
>>  #define _TIF_UPROBE(1 << TIF_UPROBE)
>> +#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
>>  #define _TIF_NOTSC (1 << TIF_NOTSC)
>>  #define _TIF_IA32  (1 << TIF_IA32)
>>  #define _TIF_FORK  (1 << TIF_FORK)
>> @@ -146,7 +148,7 @@ struct thread_info {
>>
>>  /* flags to check in __switch_to() */
>>  #define _TIF_WORK_CTXSW 
>>\
>> -   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
>> +   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
>>
>>  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
>>  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
>> diff --git a/arch/x86/include/uapi/asm/prctl.h 
>> b/arch/x86/include/uapi/asm/prctl.h
>> index 3ac5032..c087e55 100644
>> --- a/arch/x86/include/uapi/asm/prctl.h
>> +++ b/arch/x86/include/uapi/asm/prctl.h
>> @@ -6,4 +6,10 @@
>>  #define ARCH_GET_FS 0x1003
>>  #define ARCH_GET_GS 0x1004
>>
>> +/* Get/set the process' ability to use the CPUID instruction */
>> +#define ARCH_GET_CPUID 0x1005
>> +#define ARCH_SET_CPUID 0x1006
>> +# define ARCH_CPUID_ENABLE 1   /* allow the use of the 
>> CPUID instruction */
>> +# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead 
>> of reading the CPUID */
>> +
>>  #endif /* _ASM_X86_PRCTL_H */
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 0f857c3..5fc8e9d 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
>> return 0;
>>  }
>>
>> +static void switch_cpuid_faulting(bool on)
>> +{
>> +   if (on)
>> +   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +   else
>> +   ms

Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski  wrote:
> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey  wrote:
>> Intel supports faulting on the CPUID instruction in newer processors. Bit
>> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
>> documented in detail in Section 2.3.2 of
>> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf
>>
>> Signed-off-by: Kyle Huey 
>> ---
>>  arch/x86/include/asm/msr-index.h  |   1 +
>>  arch/x86/include/asm/thread_info.h|   4 +-
>>  arch/x86/include/uapi/asm/prctl.h |   6 +
>>  arch/x86/kernel/process.c |  81 +++
>>  tools/testing/selftests/x86/Makefile  |   2 +-
>>  tools/testing/selftests/x86/cpuid-fault.c | 223 
>> ++
>>  6 files changed, 315 insertions(+), 2 deletions(-)
>>  create mode 100644 tools/testing/selftests/x86/cpuid-fault.c
>>
>> diff --git a/arch/x86/include/asm/msr-index.h 
>> b/arch/x86/include/asm/msr-index.h
>> index 83908d5..4aebec2 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -53,6 +53,7 @@
>>  #define MSR_MTRRcap0x00fe
>>  #define MSR_IA32_BBL_CR_CTL0x0119
>>  #define MSR_IA32_BBL_CR_CTL3   0x011e
>> +#define MSR_MISC_FEATURES_ENABLES  0x0140
>>
>>  #define MSR_IA32_SYSENTER_CS   0x0174
>>  #define MSR_IA32_SYSENTER_ESP  0x0175
>> diff --git a/arch/x86/include/asm/thread_info.h 
>> b/arch/x86/include/asm/thread_info.h
>> index 8b7c8d8..ec93976 100644
>> --- a/arch/x86/include/asm/thread_info.h
>> +++ b/arch/x86/include/asm/thread_info.h
>> @@ -93,6 +93,7 @@ struct thread_info {
>>  #define TIF_SECCOMP8   /* secure computing */
>>  #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return 
>> */
>>  #define TIF_UPROBE 12  /* breakpointed or singlestepping */
>> +#define TIF_NOCPUID15  /* CPUID is not accessible in 
>> userland */
>>  #define TIF_NOTSC  16  /* TSC is not accessible in userland 
>> */
>>  #define TIF_IA32   17  /* IA32 compatibility process */
>>  #define TIF_FORK   18  /* ret_from_fork */
>> @@ -117,6 +118,7 @@ struct thread_info {
>>  #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
>>  #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
>>  #define _TIF_UPROBE(1 << TIF_UPROBE)
>> +#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
>>  #define _TIF_NOTSC (1 << TIF_NOTSC)
>>  #define _TIF_IA32  (1 << TIF_IA32)
>>  #define _TIF_FORK  (1 << TIF_FORK)
>> @@ -146,7 +148,7 @@ struct thread_info {
>>
>>  /* flags to check in __switch_to() */
>>  #define _TIF_WORK_CTXSW 
>>\
>> -   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
>> +   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
>>
>>  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
>>  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
>> diff --git a/arch/x86/include/uapi/asm/prctl.h 
>> b/arch/x86/include/uapi/asm/prctl.h
>> index 3ac5032..c087e55 100644
>> --- a/arch/x86/include/uapi/asm/prctl.h
>> +++ b/arch/x86/include/uapi/asm/prctl.h
>> @@ -6,4 +6,10 @@
>>  #define ARCH_GET_FS 0x1003
>>  #define ARCH_GET_GS 0x1004
>>
>> +/* Get/set the process' ability to use the CPUID instruction */
>> +#define ARCH_GET_CPUID 0x1005
>> +#define ARCH_SET_CPUID 0x1006
>> +# define ARCH_CPUID_ENABLE 1   /* allow the use of the 
>> CPUID instruction */
>> +# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead 
>> of reading the CPUID */
>> +
>>  #endif /* _ASM_X86_PRCTL_H */
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 0f857c3..5fc8e9d 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
>> return 0;
>>  }
>>
>> +static void switch_cpuid_faulting(bool on)
>> +{
>> +   if (on)
>> +   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +   else
>> +   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +}
&

Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 3:29 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote:
> 2016-09-15 1:08 GMT+03:00 Kyle Huey <m...@kylehuey.com>:
>> On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote:
>>> 2016-09-15 0:08 GMT+03:00 Kyle Huey <m...@kylehuey.com>:
>>>> Signed-off-by: Kyle Huey <kh...@kylehuey.com>
>>>> ---
>>>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>>>  arch/x86/kernel/process.c  | 80 
>>>> ++
>>>>  arch/x86/kernel/process_64.c   | 66 
>>>>  3 files changed, 81 insertions(+), 66 deletions(-)
>>>>
>>>> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
>>>> b/arch/x86/entry/syscalls/syscall_32.tbl
>>>> index f848572..3b6965b 100644
>>>> --- a/arch/x86/entry/syscalls/syscall_32.tbl
>>>> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
>>>> @@ -386,3 +386,4 @@
>>>>  377i386copy_file_range sys_copy_file_range
>>>>  378i386preadv2 sys_preadv2 
>>>> compat_sys_preadv2
>>>>  379i386pwritev2sys_pwritev2
>>>> compat_sys_pwritev2
>>>> +380i386arch_prctl  sys_arch_prctl
>>>
>>> Why not define it as other 32-bit syscalls with compat_sys_ prefix
>>> with the help of COMPAT_SYSCALL_DEFINE() macro?
>>> Then you could omit code moving, drop is_32 helper.
>>> I miss something obvious?
>>
>> The code will have to move regardless, because right now do_arch_prctl
>> is in process-64.c which is only compiled on a 64 bit kernel.
>
> Why? This code will not work anyway for 32-bit in your patches
> by obscuring it with is_32.
>
>> As I told Dave Hansen in the non-RESEND thread (not sure why
>> git-send-email didn't put him in this one ...) I considered doing a
>> compat_sys_arch_prctl that would reject the relevant arch_prctls that
>> don't apply on 32 bit but I didn't see any prior art for it (in my
>> admittedly non-exhaustive search).
>
> Well, you could just add to 64-bit do_arch_prctl() new cases for your
> prctls - that would be just a two-lines for each new prctl.
> Also add compat_sys_ and define *only* what's needed there for you,
> do not add there ARCH_{SET,GET}_{FS,GS}.
> Does this make sense?

Yeah, I should have spoken more clearly.  We'll need some
implementation of the syscall outside of process_64.c.  But we could
leave the 64 bit specific stuff behind in it.   Dave Hansen suggested
something similar (though without the compat_sys_bit)

>FWIW, I don't think it would be horrible to leave the existing
> do_arch_prctl() code in process_64.h and call it
> do_64_bit_only_something_arch_prctl(), and only call in to it from the
> generic do_arch_prctl().  You really have one reason for all the "if
> (is_32)"'s and it would be nice to document why in one single place.

- Kyle


Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 3:29 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote:
> 2016-09-15 1:08 GMT+03:00 Kyle Huey :
>> On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote:
>>> 2016-09-15 0:08 GMT+03:00 Kyle Huey :
>>>> Signed-off-by: Kyle Huey 
>>>> ---
>>>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>>>  arch/x86/kernel/process.c  | 80 
>>>> ++
>>>>  arch/x86/kernel/process_64.c   | 66 
>>>>  3 files changed, 81 insertions(+), 66 deletions(-)
>>>>
>>>> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
>>>> b/arch/x86/entry/syscalls/syscall_32.tbl
>>>> index f848572..3b6965b 100644
>>>> --- a/arch/x86/entry/syscalls/syscall_32.tbl
>>>> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
>>>> @@ -386,3 +386,4 @@
>>>>  377i386copy_file_range sys_copy_file_range
>>>>  378i386preadv2 sys_preadv2 
>>>> compat_sys_preadv2
>>>>  379i386pwritev2sys_pwritev2
>>>> compat_sys_pwritev2
>>>> +380i386arch_prctl  sys_arch_prctl
>>>
>>> Why not define it as other 32-bit syscalls with compat_sys_ prefix
>>> with the help of COMPAT_SYSCALL_DEFINE() macro?
>>> Then you could omit code moving, drop is_32 helper.
>>> I miss something obvious?
>>
>> The code will have to move regardless, because right now do_arch_prctl
>> is in process-64.c which is only compiled on a 64 bit kernel.
>
> Why? This code will not work anyway for 32-bit in your patches
> by obscuring it with is_32.
>
>> As I told Dave Hansen in the non-RESEND thread (not sure why
>> git-send-email didn't put him in this one ...) I considered doing a
>> compat_sys_arch_prctl that would reject the relevant arch_prctls that
>> don't apply on 32 bit but I didn't see any prior art for it (in my
>> admittedly non-exhaustive search).
>
> Well, you could just add to 64-bit do_arch_prctl() new cases for your
> prctls - that would be just a two-lines for each new prctl.
> Also add compat_sys_ and define *only* what's needed there for you,
> do not add there ARCH_{SET,GET}_{FS,GS}.
> Does this make sense?

Yeah, I should have spoken more clearly.  We'll need some
implementation of the syscall outside of process_64.c.  But we could
leave the 64 bit specific stuff behind in it.   Dave Hansen suggested
something similar (though without the compat_sys_bit)

>FWIW, I don't think it would be horrible to leave the existing
> do_arch_prctl() code in process_64.h and call it
> do_64_bit_only_something_arch_prctl(), and only call in to it from the
> generic do_arch_prctl().  You really have one reason for all the "if
> (is_32)"'s and it would be nice to document why in one single place.

- Kyle


Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote:
> 2016-09-15 0:08 GMT+03:00 Kyle Huey <m...@kylehuey.com>:
>> Signed-off-by: Kyle Huey <kh...@kylehuey.com>
>> ---
>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>  arch/x86/kernel/process.c  | 80 
>> ++
>>  arch/x86/kernel/process_64.c   | 66 
>>  3 files changed, 81 insertions(+), 66 deletions(-)
>>
>> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
>> b/arch/x86/entry/syscalls/syscall_32.tbl
>> index f848572..3b6965b 100644
>> --- a/arch/x86/entry/syscalls/syscall_32.tbl
>> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
>> @@ -386,3 +386,4 @@
>>  377i386copy_file_range sys_copy_file_range
>>  378i386preadv2 sys_preadv2 
>> compat_sys_preadv2
>>  379i386pwritev2sys_pwritev2
>> compat_sys_pwritev2
>> +380i386arch_prctl  sys_arch_prctl
>
> Why not define it as other 32-bit syscalls with compat_sys_ prefix
> with the help of COMPAT_SYSCALL_DEFINE() macro?
> Then you could omit code moving, drop is_32 helper.
> I miss something obvious?

The code will have to move regardless, because right now do_arch_prctl
is in process-64.c which is only compiled on a 64 bit kernel.

As I told Dave Hansen in the non-RESEND thread (not sure why
git-send-email didn't put him in this one ...) I considered doing a
compat_sys_arch_prctl that would reject the relevant arch_prctls that
don't apply on 32 bit but I didn't see any prior art for it (in my
admittedly non-exhaustive search).

- Kyle


Re: [RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:59 PM, Dmitry Safonov <0x7f454...@gmail.com> wrote:
> 2016-09-15 0:08 GMT+03:00 Kyle Huey :
>> Signed-off-by: Kyle Huey 
>> ---
>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>  arch/x86/kernel/process.c  | 80 
>> ++
>>  arch/x86/kernel/process_64.c   | 66 
>>  3 files changed, 81 insertions(+), 66 deletions(-)
>>
>> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
>> b/arch/x86/entry/syscalls/syscall_32.tbl
>> index f848572..3b6965b 100644
>> --- a/arch/x86/entry/syscalls/syscall_32.tbl
>> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
>> @@ -386,3 +386,4 @@
>>  377i386copy_file_range sys_copy_file_range
>>  378i386preadv2 sys_preadv2 
>> compat_sys_preadv2
>>  379i386pwritev2sys_pwritev2
>> compat_sys_pwritev2
>> +380i386arch_prctl  sys_arch_prctl
>
> Why not define it as other 32-bit syscalls with compat_sys_ prefix
> with the help of COMPAT_SYSCALL_DEFINE() macro?
> Then you could omit code moving, drop is_32 helper.
> I miss something obvious?

The code will have to move regardless, because right now do_arch_prctl
is in process-64.c which is only compiled on a 64 bit kernel.

As I told Dave Hansen in the non-RESEND thread (not sure why
git-send-email didn't put him in this one ...) I considered doing a
compat_sys_arch_prctl that would reject the relevant arch_prctls that
don't apply on 32 bit but I didn't see any prior art for it (in my
admittedly non-exhaustive search).

- Kyle


Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
<dave.han...@linux.intel.com> wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Xen advertises the underlying support for CPUID faulting but not does pass
>> through writes to the relevant MSR, nor does it virtualize it, so it does
>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>
> That needs to make it into a comment, please.
>
> That *is* a Xen bug, right?

Yes.  Xen needs to either not advertise the feature or actually
support it.  This came up in the prior thread ("[PATCH] prctl,x86 Add
PR_[GET|SET]_CPUID for controlling the CPUID instruction.").

>> Signed-off-by: Kyle Huey <kh...@kylehuey.com>
>> ---
>>  arch/x86/include/asm/cpufeatures.h |  1 +
>>  arch/x86/include/asm/msr-index.h   |  1 +
>>  arch/x86/kernel/cpu/scattered.c| 14 ++
>>  arch/x86/xen/enlighten.c   |  3 +++
>>  4 files changed, 19 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/cpufeatures.h 
>> b/arch/x86/include/asm/cpufeatures.h
>> index 92a8308..78b9d06 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -190,6 +190,7 @@
>>
>>  #define X86_FEATURE_CPB  ( 7*32+ 2) /* AMD Core Performance 
>> Boost */
>>  #define X86_FEATURE_EPB  ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
>> support */
>> +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
>>
>>  #define X86_FEATURE_HW_PSTATE( 7*32+ 8) /* AMD HW-PState */
>>  #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
>> diff --git a/arch/x86/include/asm/msr-index.h 
>> b/arch/x86/include/asm/msr-index.h
>> index 56f4c66..83908d5 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -41,6 +41,7 @@
>>  #define MSR_IA32_PERFCTR10x00c2
>>  #define MSR_FSB_FREQ 0x00cd
>>  #define MSR_PLATFORM_INFO0x00ce
>> +#define CPUID_FAULTING_SUPPORT   (1UL << 31)
>>
>>  #define MSR_NHM_SNB_PKG_CST_CFG_CTL  0x00e2
>>  #define NHM_C3_AUTO_DEMOTE   (1UL << 25)
>> diff --git a/arch/x86/kernel/cpu/scattered.c 
>> b/arch/x86/kernel/cpu/scattered.c
>> index 8cb57df..d502da1 100644
>> --- a/arch/x86/kernel/cpu/scattered.c
>> +++ b/arch/x86/kernel/cpu/scattered.c
>> @@ -24,6 +24,17 @@ enum cpuid_regs {
>>   CR_EBX
>>  };
>>
>> +static int supports_cpuid_faulting(void)
>> +{
>> + unsigned int lo, hi;
>> +
>> + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
>> + (lo & CPUID_FAULTING_SUPPORT))
>> + return 1;
>> + else
>> + return 0;
>> +}
>
> Is any of this useful to optimize away at compile-time?  We have config
> options for when we're running as a guest, and this seems like a feature
> that isn't available when running on bare metal.

On the contrary, this is only available when we're on bare metal.
Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
suppresses MSR_PLATFORM_INFO's report of support for it).

>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index b86ebb1..2c47f0c 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int 
>> *err)
>>  #endif
>>   val &= ~X2APIC_ENABLE;
>>   break;
>> + case MSR_PLATFORM_INFO:
>> + val &= ~CPUID_FAULTING_SUPPORT;
>> + break;
>>   }
>>   return val;
>>  }
>
> Does this mean that Xen guests effectively can't take advantage of this
> feature?

Yes.

- Kyle


Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
 wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Xen advertises the underlying support for CPUID faulting but not does pass
>> through writes to the relevant MSR, nor does it virtualize it, so it does
>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>
> That needs to make it into a comment, please.
>
> That *is* a Xen bug, right?

Yes.  Xen needs to either not advertise the feature or actually
support it.  This came up in the prior thread ("[PATCH] prctl,x86 Add
PR_[GET|SET]_CPUID for controlling the CPUID instruction.").

>> Signed-off-by: Kyle Huey 
>> ---
>>  arch/x86/include/asm/cpufeatures.h |  1 +
>>  arch/x86/include/asm/msr-index.h   |  1 +
>>  arch/x86/kernel/cpu/scattered.c| 14 ++
>>  arch/x86/xen/enlighten.c   |  3 +++
>>  4 files changed, 19 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/cpufeatures.h 
>> b/arch/x86/include/asm/cpufeatures.h
>> index 92a8308..78b9d06 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -190,6 +190,7 @@
>>
>>  #define X86_FEATURE_CPB  ( 7*32+ 2) /* AMD Core Performance 
>> Boost */
>>  #define X86_FEATURE_EPB  ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
>> support */
>> +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
>>
>>  #define X86_FEATURE_HW_PSTATE( 7*32+ 8) /* AMD HW-PState */
>>  #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
>> diff --git a/arch/x86/include/asm/msr-index.h 
>> b/arch/x86/include/asm/msr-index.h
>> index 56f4c66..83908d5 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -41,6 +41,7 @@
>>  #define MSR_IA32_PERFCTR10x00c2
>>  #define MSR_FSB_FREQ 0x00cd
>>  #define MSR_PLATFORM_INFO0x00ce
>> +#define CPUID_FAULTING_SUPPORT   (1UL << 31)
>>
>>  #define MSR_NHM_SNB_PKG_CST_CFG_CTL  0x00e2
>>  #define NHM_C3_AUTO_DEMOTE   (1UL << 25)
>> diff --git a/arch/x86/kernel/cpu/scattered.c 
>> b/arch/x86/kernel/cpu/scattered.c
>> index 8cb57df..d502da1 100644
>> --- a/arch/x86/kernel/cpu/scattered.c
>> +++ b/arch/x86/kernel/cpu/scattered.c
>> @@ -24,6 +24,17 @@ enum cpuid_regs {
>>   CR_EBX
>>  };
>>
>> +static int supports_cpuid_faulting(void)
>> +{
>> + unsigned int lo, hi;
>> +
>> + if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
>> + (lo & CPUID_FAULTING_SUPPORT))
>> + return 1;
>> + else
>> + return 0;
>> +}
>
> Is any of this useful to optimize away at compile-time?  We have config
> options for when we're running as a guest, and this seems like a feature
> that isn't available when running on bare metal.

On the contrary, this is only available when we're on bare metal.
Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
suppresses MSR_PLATFORM_INFO's report of support for it).

>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index b86ebb1..2c47f0c 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int 
>> *err)
>>  #endif
>>   val &= ~X2APIC_ENABLE;
>>   break;
>> + case MSR_PLATFORM_INFO:
>> + val &= ~CPUID_FAULTING_SUPPORT;
>> + break;
>>   }
>>   return val;
>>  }
>
> Does this mean that Xen guests effectively can't take advantage of this
> feature?

Yes.

- Kyle


Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:46 PM, Dave Hansen
<dave.han...@linux.intel.com> wrote:
> On 09/14/2016 02:35 PM, Kyle Huey wrote:
>> It's not quite a plain move.  To leave the existing arch_prctls only
>> accessible to 64 bit callers, I added the is_32 bit and the four early
>> returns for each existing ARCH_BLAH.  These cases are now
>> conditionally compiled out in a 32 bit kernel, so we only have to
>> handle the 32 bit process on a 64 bit kernel case at runtime.
>
> I think it would make a lot of sense to do the move and the modification
> in two patches.

Ok.

> Oh, and arch_prctl() really *is* 64-bit only.  I didn't realize that.
> That would have been nice to call out in the changelog, too.  It's
> totally non-obvious.

Ok.

> You're going to owe some manpage updates after this too, I guess.  It
> says: "arch_prctl() is supported only on Linux/x86-64 for 64-bit
> programs currently."

Indeed. There's a patch at the end of the series (sent to LKML, but
you're not directly CCd on it) with a suggested manpage patch.

> FWIW, I don't think it would be horrible to leave the existing
> do_arch_prctl() code in process_64.h and call it
> do_64_bit_only_something_arch_prctl(), and only call in to it from the
> generic do_arch_prctl().  You really have one reason for all the "if
> (is_32)"'s and it would be nice to document why in one single place.

Yeah, that seems like a good idea.

- Kyle


Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:46 PM, Dave Hansen
 wrote:
> On 09/14/2016 02:35 PM, Kyle Huey wrote:
>> It's not quite a plain move.  To leave the existing arch_prctls only
>> accessible to 64 bit callers, I added the is_32 bit and the four early
>> returns for each existing ARCH_BLAH.  These cases are now
>> conditionally compiled out in a 32 bit kernel, so we only have to
>> handle the 32 bit process on a 64 bit kernel case at runtime.
>
> I think it would make a lot of sense to do the move and the modification
> in two patches.

Ok.

> Oh, and arch_prctl() really *is* 64-bit only.  I didn't realize that.
> That would have been nice to call out in the changelog, too.  It's
> totally non-obvious.

Ok.

> You're going to owe some manpage updates after this too, I guess.  It
> says: "arch_prctl() is supported only on Linux/x86-64 for 64-bit
> programs currently."

Indeed. There's a patch at the end of the series (sent to LKML, but
you're not directly CCd on it) with a suggested manpage patch.

> FWIW, I don't think it would be horrible to leave the existing
> do_arch_prctl() code in process_64.h and call it
> do_64_bit_only_something_arch_prctl(), and only call in to it from the
> generic do_arch_prctl().  You really have one reason for all the "if
> (is_32)"'s and it would be nice to document why in one single place.

Yeah, that seems like a good idea.

- Kyle


Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:29 PM, Dave Hansen
<dave.han...@linux.intel.com> wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Signed-off-by: Kyle Huey <kh...@kylehuey.com>
>> ---
>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>  arch/x86/kernel/process.c  | 80 
>> ++
>>  arch/x86/kernel/process_64.c   | 66 
>>  3 files changed, 81 insertions(+), 66 deletions(-)
>
> Could you explain a bit about what is going on here?  Is it just a plain
> old code move, _why_ you had to do it this way, etc...?

Sure.  In the subsequent patches in this series I add an arch_prctl
that is available for both 64 and 32 bit programs/kernels.  Since
process_64.c is only built for 64 bit kernels, this syscall can't stay
there anymore.

It's not quite a plain move.  To leave the existing arch_prctls only
accessible to 64 bit callers, I added the is_32 bit and the four early
returns for each existing ARCH_BLAH.  These cases are now
conditionally compiled out in a 32 bit kernel, so we only have to
handle the 32 bit process on a 64 bit kernel case at runtime.

I considered doing this instead with a compat wrapper for the syscall
on 32 bit systems that would filter these arch_prctls before getting
to do_arch_prctl. I didn't see any prior art for it, so decided not to
proceed that way.

- Kyle


Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
On Wed, Sep 14, 2016 at 2:29 PM, Dave Hansen
 wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Signed-off-by: Kyle Huey 
>> ---
>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>  arch/x86/kernel/process.c  | 80 
>> ++
>>  arch/x86/kernel/process_64.c   | 66 
>>  3 files changed, 81 insertions(+), 66 deletions(-)
>
> Could you explain a bit about what is going on here?  Is it just a plain
> old code move, _why_ you had to do it this way, etc...?

Sure.  In the subsequent patches in this series I add an arch_prctl
that is available for both 64 and 32 bit programs/kernels.  Since
process_64.c is only built for 64 bit kernels, this syscall can't stay
there anymore.

It's not quite a plain move.  To leave the existing arch_prctls only
accessible to 64 bit callers, I added the is_32 bit and the four early
returns for each existing ARCH_BLAH.  These cases are now
conditionally compiled out in a 32 bit kernel, so we only have to
handle the 32 bit process on a 64 bit kernel case at runtime.

I considered doing this instead with a compat wrapper for the syscall
on 32 bit systems that would filter these arch_prctls before getting
to do_arch_prctl. I didn't see any prior art for it, so decided not to
proceed that way.

- Kyle


[PATCH (man-pages)] arch_prctl.2: Note new support on x86-32, ARCH_[GET|SET]_CPUID.

2016-09-14 Thread Kyle Huey
Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 man2/arch_prctl.2 | 73 +--
 1 file changed, 60 insertions(+), 13 deletions(-)

diff --git a/man2/arch_prctl.2 b/man2/arch_prctl.2
index 989d369..c388797 100644
--- a/man2/arch_prctl.2
+++ b/man2/arch_prctl.2
@@ -22,7 +22,7 @@
 .\" the source, must acknowledge the copyright and authors of this work.
 .\" %%%LICENSE_END
 .\"
-.TH ARCH_PRCTL 2 2015-02-21 "Linux" "Linux Programmer's Manual"
+.TH ARCH_PRCTL 2 2016-09-14 "Linux" "Linux Programmer's Manual"
 .SH NAME
 arch_prctl \- set architecture-specific thread state
 .SH SYNOPSIS
@@ -31,8 +31,8 @@ arch_prctl \- set architecture-specific thread state
 .br
 .B #include 
 .sp
-.BI "int arch_prctl(int " code ", unsigned long " addr );
-.BI "int arch_prctl(int " code ", unsigned long *" addr );
+.BI "int arch_prctl(int " code ", unsigned long " arg2 );
+.BI "int arch_prctl(int " code ", unsigned long *" arg2 );
 .fi
 .SH DESCRIPTION
 The
@@ -41,22 +41,47 @@ function sets architecture-specific process or thread state.
 .I code
 selects a subfunction
 and passes argument
-.I addr
+.I arg2
 to it;
-.I addr
+.I arg2
 is interpreted as either an
 .I "unsigned long"
 for the "set" operations, or as an
 .IR "unsigned long\ *" ,
 for the "get" operations.
 .LP
+Subfunctions for both x86-64 and x86-32 are:
+.TP
+.B ARCH_GET_CPUID " (since Linux 4.X)"
+Return the state of the flag determining whether the
+.I cpuid
+instruction can be executed by the process, in the
+.I unsigned long
+pointed to by
+.IR arg2 .
+.TP
+.B ARCH_SET_CPUID " (since Linux 4.X)"
+Set the state of the flag determining whether the
+.I cpuid
+instruction can be executed by the process. Pass
+.B ARCH_CPUID_ENABLE
+in
+.I arg2
+to allow it to be executed, or
+.B ARCH_CPUID_SIGSEGV
+to generate a
+.B SIGSEGV
+when the process tries to execute the
+.I cpuid
+instruction. This flag is propagated across fork and exec.
+.LP
 Subfunctions for x86-64 are:
 .TP
 .B ARCH_SET_FS
 Set the 64-bit base for the
 .I FS
 register to
-.IR addr .
+.IR arg2 .
 .TP
 .B ARCH_GET_FS
 Return the 64-bit base value for the
@@ -64,13 +89,13 @@ Return the 64-bit base value for the
 register of the current thread in the
 .I unsigned long
 pointed to by
-.IR addr .
+.IR arg2 .
 .TP
 .B ARCH_SET_GS
 Set the 64-bit base for the
 .I GS
 register to
-.IR addr .
+.IR arg2 .
 .TP
 .B ARCH_GET_GS
 Return the 64-bit base value for the
@@ -78,7 +103,7 @@ Return the 64-bit base value for the
 register of the current thread in the
 .I unsigned long
 pointed to by
-.IR addr .
+.IR arg2 .
 .SH RETURN VALUE
 On success,
 .BR arch_prctl ()
@@ -87,26 +112,48 @@ returns 0; on error, \-1 is returned, and
 is set to indicate the error.
 .SH ERRORS
 .TP
+.B EACCES
+.I code
+is
+.B ARCH_SET_CPUID
+and
+.I arg2
+is
+.B ARCH_CPUID_ENABLE
+and cpuid was previously disabled with
+.B ARCH_CPUID_SIGSEGV
+and the
+.I no_new_privs
+bit is set on this thread.
+.TP
 .B EFAULT
-.I addr
+.I arg2
 points to an unmapped address or is outside the process address space.
 .TP
 .B EINVAL
 .I code
 is not a valid subcommand.
 .TP
+.B EINVAL
+.I code
+is
+.B ARCH_SET_CPUID
+and
+.I cpuid
+faulting is not supported on this machine.
+.TP
 .B EPERM
-.I addr
+.I arg2
 is outside the process address space.
 .\" .SH AUTHOR
 .\" Man page written by Andi Kleen.
 .SH CONFORMING TO
 .BR arch_prctl ()
-is a Linux/x86-64 extension and should not be used in programs intended
+is a Linux/x86 extension and should not be used in programs intended
 to be portable.
 .SH NOTES
 .BR arch_prctl ()
-is supported only on Linux/x86-64 for 64-bit programs currently.
+is supported only on Linux/x86 currently.
 
 The 64-bit base changes when a new 32-bit segment selector is loaded.
 
-- 
2.7.4



[RESEND][PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   4 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  81 +++
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 223 ++
 6 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0f857c3..5fc8e9d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+   if (on)
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+   else
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(true);
+   preempt_enable();
+}
+
+static void enable_cpuid(void)
+{
+   preempt_disable();
+   if (test_and_clear_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(false);
+   preempt_enable();
+}
+
+int get_cpuid_mode(unsigned long adr)
+{
+   unsigned int val;
+
+   if (test_thread_flag(TIF_NOCPUID))
+   val = ARCH_CPUID_SIGSEGV;
+   else
+   val = ARCH_CPUID_ENABLE;
+
+   return put_user(val, (unsigned in

[PATCH (man-pages)] arch_prctl.2: Note new support on x86-32, ARCH_[GET|SET]_CPUID.

2016-09-14 Thread Kyle Huey
Signed-off-by: Kyle Huey 
---
 man2/arch_prctl.2 | 73 +--
 1 file changed, 60 insertions(+), 13 deletions(-)

diff --git a/man2/arch_prctl.2 b/man2/arch_prctl.2
index 989d369..c388797 100644
--- a/man2/arch_prctl.2
+++ b/man2/arch_prctl.2
@@ -22,7 +22,7 @@
 .\" the source, must acknowledge the copyright and authors of this work.
 .\" %%%LICENSE_END
 .\"
-.TH ARCH_PRCTL 2 2015-02-21 "Linux" "Linux Programmer's Manual"
+.TH ARCH_PRCTL 2 2016-09-14 "Linux" "Linux Programmer's Manual"
 .SH NAME
 arch_prctl \- set architecture-specific thread state
 .SH SYNOPSIS
@@ -31,8 +31,8 @@ arch_prctl \- set architecture-specific thread state
 .br
 .B #include 
 .sp
-.BI "int arch_prctl(int " code ", unsigned long " addr );
-.BI "int arch_prctl(int " code ", unsigned long *" addr );
+.BI "int arch_prctl(int " code ", unsigned long " arg2 );
+.BI "int arch_prctl(int " code ", unsigned long *" arg2 );
 .fi
 .SH DESCRIPTION
 The
@@ -41,22 +41,47 @@ function sets architecture-specific process or thread state.
 .I code
 selects a subfunction
 and passes argument
-.I addr
+.I arg2
 to it;
-.I addr
+.I arg2
 is interpreted as either an
 .I "unsigned long"
 for the "set" operations, or as an
 .IR "unsigned long\ *" ,
 for the "get" operations.
 .LP
+Subfunctions for both x86-64 and x86-32 are:
+.TP
+.B ARCH_GET_CPUID " (since Linux 4.X)"
+Return the state of the flag determining whether the
+.I cpuid
+instruction can be executed by the process, in the
+.I unsigned long
+pointed to by
+.IR arg2 .
+.TP
+.B ARCH_SET_CPUID " (since Linux 4.X)"
+Set the state of the flag determining whether the
+.I cpuid
+instruction can be executed by the process. Pass
+.B ARCH_CPUID_ENABLE
+in
+.I arg2
+to allow it to be executed, or
+.B ARCH_CPUID_SIGSEGV
+to generate a
+.B SIGSEGV
+when the process tries to execute the
+.I cpuid
+instruction. This flag is propagated across fork and exec.
+.LP
 Subfunctions for x86-64 are:
 .TP
 .B ARCH_SET_FS
 Set the 64-bit base for the
 .I FS
 register to
-.IR addr .
+.IR arg2 .
 .TP
 .B ARCH_GET_FS
 Return the 64-bit base value for the
@@ -64,13 +89,13 @@ Return the 64-bit base value for the
 register of the current thread in the
 .I unsigned long
 pointed to by
-.IR addr .
+.IR arg2 .
 .TP
 .B ARCH_SET_GS
 Set the 64-bit base for the
 .I GS
 register to
-.IR addr .
+.IR arg2 .
 .TP
 .B ARCH_GET_GS
 Return the 64-bit base value for the
@@ -78,7 +103,7 @@ Return the 64-bit base value for the
 register of the current thread in the
 .I unsigned long
 pointed to by
-.IR addr .
+.IR arg2 .
 .SH RETURN VALUE
 On success,
 .BR arch_prctl ()
@@ -87,26 +112,48 @@ returns 0; on error, \-1 is returned, and
 is set to indicate the error.
 .SH ERRORS
 .TP
+.B EACCES
+.I code
+is
+.B ARCH_SET_CPUID
+and
+.I arg2
+is
+.B ARCH_CPUID_ENABLE
+and cpuid was previously disabled with
+.B ARCH_CPUID_SIGSEGV
+and the
+.I no_new_privs
+bit is set on this thread.
+.TP
 .B EFAULT
-.I addr
+.I arg2
 points to an unmapped address or is outside the process address space.
 .TP
 .B EINVAL
 .I code
 is not a valid subcommand.
 .TP
+.B EINVAL
+.I code
+is
+.B ARCH_SET_CPUID
+and
+.I cpuid
+faulting is not supported on this machine.
+.TP
 .B EPERM
-.I addr
+.I arg2
 is outside the process address space.
 .\" .SH AUTHOR
 .\" Man page written by Andi Kleen.
 .SH CONFORMING TO
 .BR arch_prctl ()
-is a Linux/x86-64 extension and should not be used in programs intended
+is a Linux/x86 extension and should not be used in programs intended
 to be portable.
 .SH NOTES
 .BR arch_prctl ()
-is supported only on Linux/x86-64 for 64-bit programs currently.
+is supported only on Linux/x86 currently.
 
 The 64-bit base changes when a new 32-bit segment selector is loaded.
 
-- 
2.7.4



[RESEND][PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   4 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  81 +++
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 223 ++
 6 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0f857c3..5fc8e9d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+   if (on)
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+   else
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(true);
+   preempt_enable();
+}
+
+static void enable_cpuid(void)
+{
+   preempt_disable();
+   if (test_and_clear_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(false);
+   preempt_enable();
+}
+
+int get_cpuid_mode(unsigned long adr)
+{
+   unsigned int val;
+
+   if (test_thread_flag(TIF_NOCPUID))
+   val = ARCH_CPUID_SIGSEGV;
+   else
+   val = ARCH_CPUID_ENABLE;
+
+   return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_cpu

[RESEND][PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
Xen advertises the underlying support for CPUID faulting but not does pass
through writes to the relevant MSR, nor does it virtualize it, so it does
not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 14 ++
 arch/x86/xen/enlighten.c   |  3 +++
 4 files changed, 19 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define CPUID_FAULTING_SUPPORT (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
+   (lo & CPUID_FAULTING_SUPPORT))
+   return 1;
+   else
+   return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b86ebb1..2c47f0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 #endif
val &= ~X2APIC_ENABLE;
break;
+   case MSR_PLATFORM_INFO:
+   val &= ~CPUID_FAULTING_SUPPORT;
+   break;
}
return val;
 }
-- 
2.7.4



[RESEND][PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
Xen advertises the underlying support for CPUID faulting but not does pass
through writes to the relevant MSR, nor does it virtualize it, so it does
not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 14 ++
 arch/x86/xen/enlighten.c   |  3 +++
 4 files changed, 19 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define CPUID_FAULTING_SUPPORT (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
+   (lo & CPUID_FAULTING_SUPPORT))
+   return 1;
+   else
+   return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b86ebb1..2c47f0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 #endif
val &= ~X2APIC_ENABLE;
break;
+   case MSR_PLATFORM_INFO:
+   val &= ~CPUID_FAULTING_SUPPORT;
+   break;
}
return val;
 }
-- 
2.7.4



[RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/kernel/process.c  | 80 ++
 arch/x86/kernel/process_64.c   | 66 
 3 files changed, 81 insertions(+), 66 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..3b6965b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0f857c3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -32,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   int ret = 0;
+   int doit = task == current;
+   int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && 
test_thread_flag(TIF_IA32);
+   int cpu;
+
+   switch (code) {
+#ifdef CONFIG_X86_64
+   case ARCH_SET_GS:
+   if (is_32)
+   return -EINVAL;
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.gsindex = 0;
+   task->thread.gsbase = arg2;
+   if (doit) {
+   load_gs_index(0);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_SET_FS:
+   if (is_32)
+   return -EINVAL;
+   /* Not strictly needed for fs, but do it for symmetry
+  with gs */
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.fsindex = 0;
+   task->thread.fsbase = arg2;
+   if (doit) {
+   /* set the selector to 0 to not confuse __switch_to */
+   loadsegment(fs, 0);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_GET_FS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_FS_BASE, base);
+   else
+   base = task->thread.fsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+   case ARCH_GET_GS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_KERNEL_GS_BASE, base);
+   else
+   base = task->thread.gsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+#endif
+   default:
+   ret = -EINVAL;
+   break;
+   }
+
+   return ret;
+}
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..e8c6302 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -524,72 +524,6 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
-   int ret = 0;
-   int doit = task == current;
-   int cpu;
-
-   switch (code) {
-   case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
-   return -EPERM;
-   cpu = get_cpu();
-   task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
-   if (doit) {
-   load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-   }
-   put_cpu();
-   break;
-   case ARCH_SET_FS:
-   /* Not strictly needed for fs, but do it for symmetry
-  with gs */
-   if (addr >= TASK_SIZE_MAX)
-   

[RESEND][PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
Signed-off-by: Kyle Huey 
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/kernel/process.c  | 80 ++
 arch/x86/kernel/process_64.c   | 66 
 3 files changed, 81 insertions(+), 66 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..3b6965b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0f857c3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -32,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   int ret = 0;
+   int doit = task == current;
+   int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && 
test_thread_flag(TIF_IA32);
+   int cpu;
+
+   switch (code) {
+#ifdef CONFIG_X86_64
+   case ARCH_SET_GS:
+   if (is_32)
+   return -EINVAL;
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.gsindex = 0;
+   task->thread.gsbase = arg2;
+   if (doit) {
+   load_gs_index(0);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_SET_FS:
+   if (is_32)
+   return -EINVAL;
+   /* Not strictly needed for fs, but do it for symmetry
+  with gs */
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.fsindex = 0;
+   task->thread.fsbase = arg2;
+   if (doit) {
+   /* set the selector to 0 to not confuse __switch_to */
+   loadsegment(fs, 0);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_GET_FS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_FS_BASE, base);
+   else
+   base = task->thread.fsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+   case ARCH_GET_GS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_KERNEL_GS_BASE, base);
+   else
+   base = task->thread.gsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+#endif
+   default:
+   ret = -EINVAL;
+   break;
+   }
+
+   return ret;
+}
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..e8c6302 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -524,72 +524,6 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
-   int ret = 0;
-   int doit = task == current;
-   int cpu;
-
-   switch (code) {
-   case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
-   return -EPERM;
-   cpu = get_cpu();
-   task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
-   if (doit) {
-   load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-   }
-   put_cpu();
-   break;
-   case ARCH_SET_FS:
-   /* Not strictly needed for fs, but do it for symmetry
-  with gs */
-   if (addr >= TASK_SIZE_MAX)
-   return -EPERM;
-  

[RESEND][PATCH v2] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
(Resending because I screwed up the cover email, sorry about that.)

rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

4 patches follow, the first 3 to the kernel, and the final patch to man-pages.

The following changes have been  made since v1:

Suggested by Borislav Petkov:
  - Uses arch_prctl instead of prctl.
- Uses rdmsr_safe.
  - Added sample man-pages patch.
- Various functions are renamed, style fixes.

Suggested by Andy Lutomirski:
  - Added a cpufeature bit to show up in /proc/cpuinfo.
- Added sane behavior  in Xen, by masking away the MSR_PLATFORM_INFO bit
showing support for this feature for now.
  - Added a selftest, clarifying the bit is preserved on fork/exec.

The following issues were raised and are not addressed:

Use of cpuid within interrupt handlers: as Linus pointed out, CPUID only
faults at cpl>0, so this is not a concern.

Use a static_key instead of a TIF: I don't believe this solves anything.
There are currently 8 free TIF bits (after this patch), and it's always
possible to move this (or others) later if they are needed. Even if we were
to use a static_key we would still need to maintain state about which tasks
are subject to CPUID faulting and which are not somewhere else.



[RESEND][PATCH v2] arch_prctl,x86 Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
(Resending because I screwed up the cover email, sorry about that.)

rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

4 patches follow, the first 3 to the kernel, and the final patch to man-pages.

The following changes have been  made since v1:

Suggested by Borislav Petkov:
  - Uses arch_prctl instead of prctl.
- Uses rdmsr_safe.
  - Added sample man-pages patch.
- Various functions are renamed, style fixes.

Suggested by Andy Lutomirski:
  - Added a cpufeature bit to show up in /proc/cpuinfo.
- Added sane behavior  in Xen, by masking away the MSR_PLATFORM_INFO bit
showing support for this feature for now.
  - Added a selftest, clarifying the bit is preserved on fork/exec.

The following issues were raised and are not addressed:

Use of cpuid within interrupt handlers: as Linus pointed out, CPUID only
faults at cpl>0, so this is not a concern.

Use a static_key instead of a TIF: I don't believe this solves anything.
There are currently 8 free TIF bits (after this patch), and it's always
possible to move this (or others) later if they are needed. Even if we were
to use a static_key we would still need to maintain state about which tasks
are subject to CPUID faulting and which are not somewhere else.



[PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   4 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  81 +++
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 223 ++
 6 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0f857c3..5fc8e9d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+   if (on)
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+   else
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(true);
+   preempt_enable();
+}
+
+static void enable_cpuid(void)
+{
+   preempt_disable();
+   if (test_and_clear_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(false);
+   preempt_enable();
+}
+
+int get_cpuid_mode(unsigned long adr)
+{
+   unsigned int val;
+
+   if (test_thread_flag(TIF_NOCPUID))
+   val = ARCH_CPUID_SIGSEGV;
+   else
+   val = ARCH_CPUID_ENABLE;
+
+   return put_user(val, (unsigned in

[PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction

2016-09-14 Thread Kyle Huey
Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/thread_info.h|   4 +-
 arch/x86/include/uapi/asm/prctl.h |   6 +
 arch/x86/kernel/process.c |  81 +++
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 223 ++
 6 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE 1   /* allow the use of the CPUID 
instruction */
+# define ARCH_CPUID_SIGSEGV2   /* throw a SIGSEGV instead of 
reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0f857c3..5fc8e9d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+   if (on)
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+   else
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(true);
+   preempt_enable();
+}
+
+static void enable_cpuid(void)
+{
+   preempt_disable();
+   if (test_and_clear_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   switch_cpuid_faulting(false);
+   preempt_enable();
+}
+
+int get_cpuid_mode(unsigned long adr)
+{
+   unsigned int val;
+
+   if (test_thread_flag(TIF_NOCPUID))
+   val = ARCH_CPUID_SIGSEGV;
+   else
+   val = ARCH_CPUID_ENABLE;
+
+   return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_cpu

[PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/kernel/process.c  | 80 ++
 arch/x86/kernel/process_64.c   | 66 
 3 files changed, 81 insertions(+), 66 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..3b6965b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0f857c3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -32,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   int ret = 0;
+   int doit = task == current;
+   int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && 
test_thread_flag(TIF_IA32);
+   int cpu;
+
+   switch (code) {
+#ifdef CONFIG_X86_64
+   case ARCH_SET_GS:
+   if (is_32)
+   return -EINVAL;
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.gsindex = 0;
+   task->thread.gsbase = arg2;
+   if (doit) {
+   load_gs_index(0);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_SET_FS:
+   if (is_32)
+   return -EINVAL;
+   /* Not strictly needed for fs, but do it for symmetry
+  with gs */
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.fsindex = 0;
+   task->thread.fsbase = arg2;
+   if (doit) {
+   /* set the selector to 0 to not confuse __switch_to */
+   loadsegment(fs, 0);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_GET_FS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_FS_BASE, base);
+   else
+   base = task->thread.fsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+   case ARCH_GET_GS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_KERNEL_GS_BASE, base);
+   else
+   base = task->thread.gsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+#endif
+   default:
+   ret = -EINVAL;
+   break;
+   }
+
+   return ret;
+}
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..e8c6302 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -524,72 +524,6 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
-   int ret = 0;
-   int doit = task == current;
-   int cpu;
-
-   switch (code) {
-   case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
-   return -EPERM;
-   cpu = get_cpu();
-   task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
-   if (doit) {
-   load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-   }
-   put_cpu();
-   break;
-   case ARCH_SET_FS:
-   /* Not strictly needed for fs, but do it for symmetry
-  with gs */
-   if (addr >= TASK_SIZE_MAX)
-   

[PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
Xen advertises the underlying support for CPUID faulting but not does pass
through writes to the relevant MSR, nor does it virtualize it, so it does
not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 14 ++
 arch/x86/xen/enlighten.c   |  3 +++
 4 files changed, 19 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define CPUID_FAULTING_SUPPORT (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
+   (lo & CPUID_FAULTING_SUPPORT))
+   return 1;
+   else
+   return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b86ebb1..2c47f0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 #endif
val &= ~X2APIC_ENABLE;
break;
+   case MSR_PLATFORM_INFO:
+   val &= ~CPUID_FAULTING_SUPPORT;
+   break;
}
return val;
 }
-- 
2.7.4



[PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.

2016-09-14 Thread Kyle Huey
Signed-off-by: Kyle Huey 
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/kernel/process.c  | 80 ++
 arch/x86/kernel/process_64.c   | 66 
 3 files changed, 81 insertions(+), 66 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..3b6965b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377i386copy_file_range sys_copy_file_range
 378i386preadv2 sys_preadv2 
compat_sys_preadv2
 379i386pwritev2sys_pwritev2
compat_sys_pwritev2
+380i386arch_prctl  sys_arch_prctl
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0f857c3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -32,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p)
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+   int ret = 0;
+   int doit = task == current;
+   int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && 
test_thread_flag(TIF_IA32);
+   int cpu;
+
+   switch (code) {
+#ifdef CONFIG_X86_64
+   case ARCH_SET_GS:
+   if (is_32)
+   return -EINVAL;
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.gsindex = 0;
+   task->thread.gsbase = arg2;
+   if (doit) {
+   load_gs_index(0);
+   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_SET_FS:
+   if (is_32)
+   return -EINVAL;
+   /* Not strictly needed for fs, but do it for symmetry
+  with gs */
+   if (arg2 >= TASK_SIZE_MAX)
+   return -EPERM;
+   cpu = get_cpu();
+   task->thread.fsindex = 0;
+   task->thread.fsbase = arg2;
+   if (doit) {
+   /* set the selector to 0 to not confuse __switch_to */
+   loadsegment(fs, 0);
+   ret = wrmsrl_safe(MSR_FS_BASE, arg2);
+   }
+   put_cpu();
+   break;
+   case ARCH_GET_FS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_FS_BASE, base);
+   else
+   base = task->thread.fsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+   case ARCH_GET_GS: {
+   unsigned long base;
+
+   if (is_32)
+   return -EINVAL;
+   if (doit)
+   rdmsrl(MSR_KERNEL_GS_BASE, base);
+   else
+   base = task->thread.gsbase;
+   ret = put_user(base, (unsigned long __user *)arg2);
+   break;
+   }
+#endif
+   default:
+   ret = -EINVAL;
+   break;
+   }
+
+   return ret;
+}
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+   return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..e8c6302 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -524,72 +524,6 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
-   int ret = 0;
-   int doit = task == current;
-   int cpu;
-
-   switch (code) {
-   case ARCH_SET_GS:
-   if (addr >= TASK_SIZE_MAX)
-   return -EPERM;
-   cpu = get_cpu();
-   task->thread.gsindex = 0;
-   task->thread.gsbase = addr;
-   if (doit) {
-   load_gs_index(0);
-   ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-   }
-   put_cpu();
-   break;
-   case ARCH_SET_FS:
-   /* Not strictly needed for fs, but do it for symmetry
-  with gs */
-   if (addr >= TASK_SIZE_MAX)
-   return -EPERM;
-  

[PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo

2016-09-14 Thread Kyle Huey
Xen advertises the underlying support for CPUID faulting but not does pass
through writes to the relevant MSR, nor does it virtualize it, so it does
not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

Signed-off-by: Kyle Huey 
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c| 14 ++
 arch/x86/xen/enlighten.c   |  3 +++
 4 files changed, 19 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB( 7*32+ 2) /* AMD Core Performance 
Boost */
 #define X86_FEATURE_EPB( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS 
support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1  0x00c2
 #define MSR_FSB_FREQ   0x00cd
 #define MSR_PLATFORM_INFO  0x00ce
+#define CPUID_FAULTING_SUPPORT (1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL0x00e2
 #define NHM_C3_AUTO_DEMOTE (1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+   unsigned int lo, hi;
+
+   if (rdmsr_safe(MSR_PLATFORM_INFO, , ) == 0 &&
+   (lo & CPUID_FAULTING_SUPPORT))
+   return 1;
+   else
+   return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
+
+   if (supports_cpuid_faulting())
+   set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b86ebb1..2c47f0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 #endif
val &= ~X2APIC_ENABLE;
break;
+   case MSR_PLATFORM_INFO:
+   val &= ~CPUID_FAULTING_SUPPORT;
+   break;
}
return val;
 }
-- 
2.7.4



Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-14 Thread Kyle Huey
On Mon, Sep 12, 2016 at 9:56 AM, Andy Lutomirski  wrote:
> You should explicitly check that, if the
> feature is set under Xen PV, then the MSR actually works as
> advertised.  This may require talking to the Xen folks to make sure
> you're testing the right configuration.

This is interesting.  When running under Xen PV the kernel is allowed
to read the real value of MSR_PLATFORM_INFO and see that CPUID
faulting is supported.  But as you suggested, writing to
MSR_MISC_FEATURES_ENABLES doesn't actually enable CPUID faulting, at
least not in any way that works.

It's not obvious to me how to test this, because when this feature
works, CPUID only faults in userspace, not in the kernel.  Is there
existing code somewhere that runs tests like this in userspace?

- Kyle


Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-14 Thread Kyle Huey
On Mon, Sep 12, 2016 at 9:56 AM, Andy Lutomirski  wrote:
> You should explicitly check that, if the
> feature is set under Xen PV, then the MSR actually works as
> advertised.  This may require talking to the Xen folks to make sure
> you're testing the right configuration.

This is interesting.  When running under Xen PV the kernel is allowed
to read the real value of MSR_PLATFORM_INFO and see that CPUID
faulting is supported.  But as you suggested, writing to
MSR_MISC_FEATURES_ENABLES doesn't actually enable CPUID faulting, at
least not in any way that works.

It's not obvious to me how to test this, because when this feature
works, CPUID only faults in userspace, not in the kernel.  Is there
existing code somewhere that runs tests like this in userspace?

- Kyle


Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-13 Thread Kyle Huey
On Mon, Sep 12, 2016 at 7:15 AM, Kyle Huey <m...@kylehuey.com> wrote:
> On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov <b...@suse.de> wrote:
>> On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote:
>>> @@ -2162,6 +2168,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, 
>>> arg2, unsigned long, arg3,
>>>   case PR_SET_TSC:
>>>   error = SET_TSC_CTL(arg2);
>>>   break;
>>> + case PR_GET_CPUID:
>>> + error = GET_CPUID_CTL(arg2);
>>> + break;
>>> + case PR_SET_CPUID:
>>> + error = SET_CPUID_CTL(arg2);
>>> + break;
>>>   case PR_TASK_PERF_EVENTS_DISABLE:
>>>   error = perf_event_task_disable();
>>>   break;
>>
>> This whole fun should be in arch_prctl() as it is arch-specific.
>
> Yeah, I was debating about that, and did it this way because of
> PR_SET_TSC.  Will fix.

arch_prctl is not yet exposed on 32 bit x86, so we'll have to add that
as well to do this.

- Kyle


Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-13 Thread Kyle Huey
On Mon, Sep 12, 2016 at 7:15 AM, Kyle Huey  wrote:
> On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov  wrote:
>> On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote:
>>> @@ -2162,6 +2168,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, 
>>> arg2, unsigned long, arg3,
>>>   case PR_SET_TSC:
>>>   error = SET_TSC_CTL(arg2);
>>>   break;
>>> + case PR_GET_CPUID:
>>> + error = GET_CPUID_CTL(arg2);
>>> + break;
>>> + case PR_SET_CPUID:
>>> + error = SET_CPUID_CTL(arg2);
>>> + break;
>>>   case PR_TASK_PERF_EVENTS_DISABLE:
>>>   error = perf_event_task_disable();
>>>   break;
>>
>> This whole fun should be in arch_prctl() as it is arch-specific.
>
> Yeah, I was debating about that, and did it this way because of
> PR_SET_TSC.  Will fix.

arch_prctl is not yet exposed on 32 bit x86, so we'll have to add that
as well to do this.

- Kyle


Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-12 Thread Kyle Huey
Thanks for the review!

On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov <b...@suse.de> wrote:
> On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote:
>> rr (http://rr-project.org/), a userspace record-and-replay reverse-
>> execution debugger, would like to trap and emulate the CPUID instruction.
>> This would allow us to a) mask away certain hardware features that rr does
>> not support (e.g. RDRAND) and b) enable trace portability across machines
>> by providing constant results.
>>
>> Intel supports faulting on the CPUID instruction in newer processors. Bit
>> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
>> documented in detail in Section 2.3.2 of
>> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf.
>>
>> I would like to thank Trevor Saunders <tbsau...@tbsaunde.org> for drafting
>> an earlier version of this patch.
>>
>> Signed-off-by Kyle Huey <kh...@kylehuey.com>
>> ---
>>  arch/x86/include/asm/msr-index.h   |  1 +
>>  arch/x86/include/asm/processor.h   |  7 
>>  arch/x86/include/asm/thread_info.h |  4 +-
>>  arch/x86/kernel/process.c  | 79 
>> ++
>>  include/uapi/linux/prctl.h |  6 +++
>>  kernel/sys.c   | 12 ++
>>  6 files changed, 108 insertions(+), 1 deletion(-)
>
> ...
>
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 62c0b0e..a189516 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val)
>>   return 0;
>>  }
>>
>> +static void hard_disable_CPUID(void)
>
> Why hard_disable? I don't see any soft_disable.

Copied from PR_SET_TSC. Would you prefer something like
disable_cpuid/disable_cpuid_and_set_flag for
hard_disable_CPUID/disable_CPUID?

> Also, I can't say that I like all that screaming "CPUID" :-)
>
> disable_cpuid() looks just fine to me too.

Ok.

>> +{
>> + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +}
>> +
>> +static void disable_CPUID(void)
>> +{
>> + preempt_disable();
>> + if (!test_and_set_thread_flag(TIF_NOCPUID))
>> + /*
>> +  * Must flip the CPU state synchronously with
>> +  * TIF_NOCPUID in the current running context.
>> +  */
>> + hard_disable_CPUID();
>> + preempt_enable();
>> +}
>> +
>> +static void hard_enable_CPUID(void)
>> +{
>> + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +}
>> +
>> +static void enable_CPUID(void)
>> +{
>> + preempt_disable();
>> + if (test_and_clear_thread_flag(TIF_NOCPUID))
>> + /*
>> +  * Must flip the CPU state synchronously with
>> +  * TIF_NOCPUID in the current running context.
>> +  */
>> + hard_enable_CPUID();
>> + preempt_enable();
>> +}
>> +
>> +static int supports_CPUID_faulting(void)
>> +{
>> + unsigned int lo, hi;
>> +
>> + rdmsr(MSR_PLATFORM_INFO, lo, hi);
>
> rdmsr_safe()

Ok.

>> + if ((lo & (1 << 31)))
>> + return 1;
>> + else
>> + return 0;
>> +}
>>
>> +int get_cpuid_mode(unsigned long adr)
>> +{
>> + unsigned int val;
>> +
>> + if (test_thread_flag(TIF_NOCPUID))
>> + val = PR_CPUID_SIGSEGV;
>> + else
>> + val = PR_CPUID_ENABLE;
>> +
>> + return put_user(val, (unsigned int __user *)adr);
>> +}
>> +
>> +int set_cpuid_mode(unsigned int val)
>> +{
>> + // Only disable/enable_CPUID() if it is supported on this hardware.
>
> Use /* ... */ for comments in the kernel.

Ok.

>> + if (val == PR_CPUID_SIGSEGV && supports_CPUID_faulting())
>> + disable_CPUID();
>> + else if (val == PR_CPUID_ENABLE && supports_CPUID_faulting())
>> + enable_CPUID();
>> + else
>> + return -EINVAL;
>> +
>> + return 0;
>> +}
>> +
>>  void __switch_to_xtra(struct task_struct *prev_p, struct task_struct 
>> *next_p,
>> struct tss_struct *tss)
>>  {
>> @@ -210,6 +280,15 @@ void __switch_to_xtra(struct task_struct *prev_p, 
>> struct task_struct *next_p,
>>   update_debugctlmsr(debugctl);
>>

Re: [PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-12 Thread Kyle Huey
Thanks for the review!

On Mon, Sep 12, 2016 at 2:07 AM, Borislav Petkov  wrote:
> On Sun, Sep 11, 2016 at 05:29:23PM -0700, Kyle Huey wrote:
>> rr (http://rr-project.org/), a userspace record-and-replay reverse-
>> execution debugger, would like to trap and emulate the CPUID instruction.
>> This would allow us to a) mask away certain hardware features that rr does
>> not support (e.g. RDRAND) and b) enable trace portability across machines
>> by providing constant results.
>>
>> Intel supports faulting on the CPUID instruction in newer processors. Bit
>> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
>> documented in detail in Section 2.3.2 of
>> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf.
>>
>> I would like to thank Trevor Saunders  for drafting
>> an earlier version of this patch.
>>
>> Signed-off-by Kyle Huey 
>> ---
>>  arch/x86/include/asm/msr-index.h   |  1 +
>>  arch/x86/include/asm/processor.h   |  7 
>>  arch/x86/include/asm/thread_info.h |  4 +-
>>  arch/x86/kernel/process.c  | 79 
>> ++
>>  include/uapi/linux/prctl.h |  6 +++
>>  kernel/sys.c   | 12 ++
>>  6 files changed, 108 insertions(+), 1 deletion(-)
>
> ...
>
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 62c0b0e..a189516 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val)
>>   return 0;
>>  }
>>
>> +static void hard_disable_CPUID(void)
>
> Why hard_disable? I don't see any soft_disable.

Copied from PR_SET_TSC. Would you prefer something like
disable_cpuid/disable_cpuid_and_set_flag for
hard_disable_CPUID/disable_CPUID?

> Also, I can't say that I like all that screaming "CPUID" :-)
>
> disable_cpuid() looks just fine to me too.

Ok.

>> +{
>> + msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +}
>> +
>> +static void disable_CPUID(void)
>> +{
>> + preempt_disable();
>> + if (!test_and_set_thread_flag(TIF_NOCPUID))
>> + /*
>> +  * Must flip the CPU state synchronously with
>> +  * TIF_NOCPUID in the current running context.
>> +  */
>> + hard_disable_CPUID();
>> + preempt_enable();
>> +}
>> +
>> +static void hard_enable_CPUID(void)
>> +{
>> + msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +}
>> +
>> +static void enable_CPUID(void)
>> +{
>> + preempt_disable();
>> + if (test_and_clear_thread_flag(TIF_NOCPUID))
>> + /*
>> +  * Must flip the CPU state synchronously with
>> +  * TIF_NOCPUID in the current running context.
>> +  */
>> + hard_enable_CPUID();
>> + preempt_enable();
>> +}
>> +
>> +static int supports_CPUID_faulting(void)
>> +{
>> + unsigned int lo, hi;
>> +
>> + rdmsr(MSR_PLATFORM_INFO, lo, hi);
>
> rdmsr_safe()

Ok.

>> + if ((lo & (1 << 31)))
>> + return 1;
>> + else
>> + return 0;
>> +}
>>
>> +int get_cpuid_mode(unsigned long adr)
>> +{
>> + unsigned int val;
>> +
>> + if (test_thread_flag(TIF_NOCPUID))
>> + val = PR_CPUID_SIGSEGV;
>> + else
>> + val = PR_CPUID_ENABLE;
>> +
>> + return put_user(val, (unsigned int __user *)adr);
>> +}
>> +
>> +int set_cpuid_mode(unsigned int val)
>> +{
>> + // Only disable/enable_CPUID() if it is supported on this hardware.
>
> Use /* ... */ for comments in the kernel.

Ok.

>> + if (val == PR_CPUID_SIGSEGV && supports_CPUID_faulting())
>> + disable_CPUID();
>> + else if (val == PR_CPUID_ENABLE && supports_CPUID_faulting())
>> + enable_CPUID();
>> + else
>> + return -EINVAL;
>> +
>> + return 0;
>> +}
>> +
>>  void __switch_to_xtra(struct task_struct *prev_p, struct task_struct 
>> *next_p,
>> struct tss_struct *tss)
>>  {
>> @@ -210,6 +280,15 @@ void __switch_to_xtra(struct task_struct *prev_p, 
>> struct task_struct *next_p,
>>   update_debugctlmsr(debugctl);
>>   }
>>
>> + if (test_tsk_thread_flag(prev_p, TIF_NOCPUID) ^
&g

[PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-11 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf.

I would like to thank Trevor Saunders <tbsau...@tbsaunde.org> for drafting
an earlier version of this patch.

Signed-off-by Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/include/asm/processor.h   |  7 
 arch/x86/include/asm/thread_info.h |  4 +-
 arch/x86/kernel/process.c  | 79 ++
 include/uapi/linux/prctl.h |  6 +++
 kernel/sys.c   | 12 ++
 6 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..28b0736 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -52,6 +52,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def95..661c4c1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -805,6 +805,13 @@ extern void start_thread(struct pt_regs *regs, unsigned 
long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+/* Get/set a process' ability to use the CPUID instruction */
+#define GET_CPUID_CTL(adr) get_cpuid_mode((adr))
+#define SET_CPUID_CTL(val) set_cpuid_mode((val))
+
+extern int get_cpuid_mode(unsigned long adr);
+extern int set_cpuid_mode(unsigned int val);
+
 /* Register/unregister a process' MPX related resource */
 #define MPX_ENABLE_MANAGEMENT()mpx_enable_management()
 #define MPX_DISABLE_MANAGEMENT()   mpx_disable_management()
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..a189516 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void hard_disable_CPUID(void)
+{
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_CPUID(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   hard_disable_CPUID();
+   preempt_enable();
+}
+
+static void hard_enable_CPUID(void)
+{
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void enable_CPUID(void)
+{
+   preempt_disable

[PATCH] prctl,x86 Add PR_[GET|SET]_CPUID for controlling the CPUID instruction.

2016-09-11 Thread Kyle Huey
rr (http://rr-project.org/), a userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results.

Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf.

I would like to thank Trevor Saunders  for drafting
an earlier version of this patch.

Signed-off-by Kyle Huey 
---
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/include/asm/processor.h   |  7 
 arch/x86/include/asm/thread_info.h |  4 +-
 arch/x86/kernel/process.c  | 79 ++
 include/uapi/linux/prctl.h |  6 +++
 kernel/sys.c   | 12 ++
 6 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..28b0736 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -52,6 +52,7 @@
 #define MSR_MTRRcap0x00fe
 #define MSR_IA32_BBL_CR_CTL0x0119
 #define MSR_IA32_BBL_CR_CTL3   0x011e
+#define MSR_MISC_FEATURES_ENABLES  0x0140
 
 #define MSR_IA32_SYSENTER_CS   0x0174
 #define MSR_IA32_SYSENTER_ESP  0x0175
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def95..661c4c1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -805,6 +805,13 @@ extern void start_thread(struct pt_regs *regs, unsigned 
long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+/* Get/set a process' ability to use the CPUID instruction */
+#define GET_CPUID_CTL(adr) get_cpuid_mode((adr))
+#define SET_CPUID_CTL(val) set_cpuid_mode((val))
+
+extern int get_cpuid_mode(unsigned long adr);
+extern int set_cpuid_mode(unsigned int val);
+
 /* Register/unregister a process' MPX related resource */
 #define MPX_ENABLE_MANAGEMENT()mpx_enable_management()
 #define MPX_DISABLE_MANAGEMENT()   mpx_disable_management()
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
+#define TIF_NOCPUID15  /* CPUID is not accessible in userland 
*/
 #define TIF_NOTSC  16  /* TSC is not accessible in userland */
 #define TIF_IA32   17  /* IA32 compatibility process */
 #define TIF_FORK   18  /* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
+#define _TIF_NOCPUID   (1 << TIF_NOCPUID)
 #define _TIF_NOTSC (1 << TIF_NOTSC)
 #define _TIF_IA32  (1 << TIF_IA32)
 #define _TIF_FORK  (1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW
\
-   (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+   (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..a189516 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -191,6 +191,76 @@ int set_tsc_mode(unsigned int val)
return 0;
 }
 
+static void hard_disable_CPUID(void)
+{
+   msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_CPUID(void)
+{
+   preempt_disable();
+   if (!test_and_set_thread_flag(TIF_NOCPUID))
+   /*
+* Must flip the CPU state synchronously with
+* TIF_NOCPUID in the current running context.
+*/
+   hard_disable_CPUID();
+   preempt_enable();
+}
+
+static void hard_enable_CPUID(void)
+{
+   msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void enable_CPUID(void)
+{
+   preempt_disable();
+   if (test_and_clear_thread_flag(TIF_NOCPUID))
+   /*
+ 

Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals

2016-08-22 Thread Kyle Huey
On Thu, Aug 11, 2016 at 11:18 AM, Kees Cook  wrote:
> On Thu, Aug 11, 2016 at 8:12 AM, Oleg Nesterov  wrote:
>> On 08/10, Kees Cook wrote:
>>>
>>> This fixes a ptrace vs fatal pending signals bug as manifested in seccomp
>>> now that ptrace was reordered to happen after ptrace. The short version is
>>> that seccomp should not attempt to call do_exit() while fatal signals are
>>> pending under a tracer. This was needlessly paranoid. Instead, the syscall
>>> can just be skipped and normal signal handling, tracer notification, and
>>> process death can happen.
>>
>> ACK.
>>
>> I think this change is fine in any case, but...
>>
>>> The bug happens because when __seccomp_filter() detects
>>> fatal_signal_pending(), it calls do_exit() without dequeuing the fatal
>>> signal. When do_exit() sends the PTRACE_EVENT_EXIT
>>
>> I _never_ understood what PTRACE_EVENT_EXIT should actually do. I mean,
>> when it should actually stop. This was never defined.
>
> Yeah, agreed. I spent some time reading through what should happen to
> __TASK_TRACED during exit and my head spun. :)
>
>>> notification and
>>> that task is descheduled, __schedule() notices that there is a fatal
>>> signal pending and changes its state from TASK_TRACED to TASK_RUNNING.
>>
>> And this can happen anyway, with or without this change, with or without
>> seccomp. Because another fatal signal can be pending. So PTRACE_EVENT_EXIT
>> actually depends on /dev/random.
>>
>> Perhaps we should finally define what it should do. Say, it should only
>> stop if SIGKILL was sent "implicitely" by exit/exec. But as for exec,
>> there are more (off-topic) complications, not sure we actually want this...
>>
>> Nevermind, the main problem is that _any_ change in this area can break
>> something. This code is sooo old.
>>
>> But let me repeat, I think this change is fine anyway.
>>
>> Acked-by: Oleg Nesterov 
>
> Awesome, thanks!

Hi folks,

Can't help but notice this didn't make it into rc3.  Not sure if it's
bubbling up somewhere I can't see, but we'd really like this to get
into 4.8 so we don't have to work around the regression.

Thanks!

- Kyle


Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals

2016-08-22 Thread Kyle Huey
On Thu, Aug 11, 2016 at 11:18 AM, Kees Cook  wrote:
> On Thu, Aug 11, 2016 at 8:12 AM, Oleg Nesterov  wrote:
>> On 08/10, Kees Cook wrote:
>>>
>>> This fixes a ptrace vs fatal pending signals bug as manifested in seccomp
>>> now that ptrace was reordered to happen after ptrace. The short version is
>>> that seccomp should not attempt to call do_exit() while fatal signals are
>>> pending under a tracer. This was needlessly paranoid. Instead, the syscall
>>> can just be skipped and normal signal handling, tracer notification, and
>>> process death can happen.
>>
>> ACK.
>>
>> I think this change is fine in any case, but...
>>
>>> The bug happens because when __seccomp_filter() detects
>>> fatal_signal_pending(), it calls do_exit() without dequeuing the fatal
>>> signal. When do_exit() sends the PTRACE_EVENT_EXIT
>>
>> I _never_ understood what PTRACE_EVENT_EXIT should actually do. I mean,
>> when it should actually stop. This was never defined.
>
> Yeah, agreed. I spent some time reading through what should happen to
> __TASK_TRACED during exit and my head spun. :)
>
>>> notification and
>>> that task is descheduled, __schedule() notices that there is a fatal
>>> signal pending and changes its state from TASK_TRACED to TASK_RUNNING.
>>
>> And this can happen anyway, with or without this change, with or without
>> seccomp. Because another fatal signal can be pending. So PTRACE_EVENT_EXIT
>> actually depends on /dev/random.
>>
>> Perhaps we should finally define what it should do. Say, it should only
>> stop if SIGKILL was sent "implicitely" by exit/exec. But as for exec,
>> there are more (off-topic) complications, not sure we actually want this...
>>
>> Nevermind, the main problem is that _any_ change in this area can break
>> something. This code is sooo old.
>>
>> But let me repeat, I think this change is fine anyway.
>>
>> Acked-by: Oleg Nesterov 
>
> Awesome, thanks!

Hi folks,

Can't help but notice this didn't make it into rc3.  Not sure if it's
bubbling up somewhere I can't see, but we'd really like this to get
into 4.8 so we don't have to work around the regression.

Thanks!

- Kyle


Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals

2016-08-10 Thread Kyle Huey
On Wed, Aug 10, 2016 at 4:37 PM, Kees Cook <keesc...@chromium.org> wrote:
> This fixes a ptrace vs fatal pending signals bug as manifested in seccomp
> now that ptrace was reordered to happen after ptrace. The short version is
> that seccomp should not attempt to call do_exit() while fatal signals are
> pending under a tracer. This was needlessly paranoid. Instead, the syscall
> can just be skipped and normal signal handling, tracer notification, and
> process death can happen.
>
> Slightly edited original bug report:
>
> If a tracee task is in a PTRACE_EVENT_SECCOMP trap, or has been resumed
> after such a trap but not yet been scheduled, and another task in the
> thread-group calls exit_group(), then the tracee task exits without the
> ptracer receiving a PTRACE_EVENT_EXIT notification. Test case here:
> https://gist.github.com/khuey/3c43ac247c72cef8c956ca73281c9be7
>
> The bug happens because when __seccomp_filter() detects
> fatal_signal_pending(), it calls do_exit() without dequeuing the fatal
> signal. When do_exit() sends the PTRACE_EVENT_EXIT notification and
> that task is descheduled, __schedule() notices that there is a fatal
> signal pending and changes its state from TASK_TRACED to TASK_RUNNING.
> That prevents the ptracer's waitpid() from returning the ptrace event.
> A more detailed analysis is here:
> https://github.com/mozilla/rr/issues/1762#issuecomment-237396255.
>
> Reported-by: Robert O'Callahan <rob...@ocallahan.org>
> Reported-by: Kyle Huey <kh...@kylehuey.com>
> Fixes: 93e35efb8de4 ("x86/ptrace: run seccomp after ptrace")
> Signed-off-by: Kees Cook <keesc...@chromium.org>
> ---
>  kernel/seccomp.c | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index ef6c6c3f9d8a..0db7c8a2afe2 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const 
> struct seccomp_data *sd,
> ptrace_event(PTRACE_EVENT_SECCOMP, data);
> /*
>  * The delivery of a fatal signal during event
> -* notification may silently skip tracer notification.
> -* Terminating the task now avoids executing a system
> -* call that may not be intended.
> +* notification may silently skip tracer notification,
> +* which could leave us with a potentially unmodified
> +* syscall that the tracer would have liked to have
> +* changed. Since the process is about to die, we just
> +* force the syscall to be skipped and let the signal
> +* kill the process and correctly handle any tracer exit
> +* notifications.
>  */
> if (fatal_signal_pending(current))
> -   do_exit(SIGSYS);
> +   goto skip;
> /* Check if the tracer forced the syscall to be skipped. */
> this_syscall = syscall_get_nr(current, task_pt_regs(current));
> if (this_syscall < 0)
> --
> 2.7.4
>
>
> --
> Kees Cook
> Nexus Security

This patch also fixes rr as well.

- Kyle


Re: [PATCH] seccomp: Fix tracer exit notifications during fatal signals

2016-08-10 Thread Kyle Huey
On Wed, Aug 10, 2016 at 4:37 PM, Kees Cook  wrote:
> This fixes a ptrace vs fatal pending signals bug as manifested in seccomp
> now that ptrace was reordered to happen after ptrace. The short version is
> that seccomp should not attempt to call do_exit() while fatal signals are
> pending under a tracer. This was needlessly paranoid. Instead, the syscall
> can just be skipped and normal signal handling, tracer notification, and
> process death can happen.
>
> Slightly edited original bug report:
>
> If a tracee task is in a PTRACE_EVENT_SECCOMP trap, or has been resumed
> after such a trap but not yet been scheduled, and another task in the
> thread-group calls exit_group(), then the tracee task exits without the
> ptracer receiving a PTRACE_EVENT_EXIT notification. Test case here:
> https://gist.github.com/khuey/3c43ac247c72cef8c956ca73281c9be7
>
> The bug happens because when __seccomp_filter() detects
> fatal_signal_pending(), it calls do_exit() without dequeuing the fatal
> signal. When do_exit() sends the PTRACE_EVENT_EXIT notification and
> that task is descheduled, __schedule() notices that there is a fatal
> signal pending and changes its state from TASK_TRACED to TASK_RUNNING.
> That prevents the ptracer's waitpid() from returning the ptrace event.
> A more detailed analysis is here:
> https://github.com/mozilla/rr/issues/1762#issuecomment-237396255.
>
> Reported-by: Robert O'Callahan 
> Reported-by: Kyle Huey 
> Fixes: 93e35efb8de4 ("x86/ptrace: run seccomp after ptrace")
> Signed-off-by: Kees Cook 
> ---
>  kernel/seccomp.c | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index ef6c6c3f9d8a..0db7c8a2afe2 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const 
> struct seccomp_data *sd,
> ptrace_event(PTRACE_EVENT_SECCOMP, data);
> /*
>  * The delivery of a fatal signal during event
> -* notification may silently skip tracer notification.
> -* Terminating the task now avoids executing a system
> -* call that may not be intended.
> +* notification may silently skip tracer notification,
> +* which could leave us with a potentially unmodified
> +* syscall that the tracer would have liked to have
> +* changed. Since the process is about to die, we just
> +* force the syscall to be skipped and let the signal
> +* kill the process and correctly handle any tracer exit
> +* notifications.
>  */
> if (fatal_signal_pending(current))
> -   do_exit(SIGSYS);
> +   goto skip;
> /* Check if the tracer forced the syscall to be skipped. */
> this_syscall = syscall_get_nr(current, task_pt_regs(current));
> if (this_syscall < 0)
> --
> 2.7.4
>
>
> --
> Kees Cook
> Nexus Security

This patch also fixes rr as well.

- Kyle


[PATCH] seccomp: suppress fatal signals that will never be delivered before seccomp forces an exit because of said signals

2016-08-10 Thread Kyle Huey
This fixes rr. It doesn't quite fix the provided testcase, because the testcase 
fails to wait on the tracee after awakening from the nanosleep. Instead the 
testcase immediately does a PTHREAD_CONT, discarding the PTHREAD_EVENT_EXIT. 
The slightly modified testcase at 
https://gist.github.com/khuey/3c43ac247c72cef8c956c does pass.

I don't see any obvious way to dequeue only the fatal signal, so instead I 
dequeue them all. Since none of these signals will ever be delivered it 
shouldn't affect the executing task.

Suggested-by: Robert O'Callahan <rob...@ocallahan.org>
Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 kernel/seccomp.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef6c6c3..728074d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -609,8 +609,20 @@ static int __seccomp_filter(int this_syscall, const struct 
seccomp_data *sd,
 * Terminating the task now avoids executing a system
 * call that may not be intended.
 */
-   if (fatal_signal_pending(current))
+   if (fatal_signal_pending(current)) {
+   /*
+* Swallow the signals we will never deliver.
+* If we do not do this, the PTRACE_EVENT_EXIT will
+* be suppressed by those signals.
+*/
+   siginfo_t info;
+
+   spin_lock_irq(>sighand->siglock);
+   while (dequeue_signal(current, >blocked, 
));
+   spin_unlock_irq(>sighand->siglock);
+
do_exit(SIGSYS);
+   }
/* Check if the tracer forced the syscall to be skipped. */
this_syscall = syscall_get_nr(current, task_pt_regs(current));
if (this_syscall < 0)
-- 
2.7.4



[PATCH] seccomp: suppress fatal signals that will never be delivered before seccomp forces an exit because of said signals

2016-08-10 Thread Kyle Huey
This fixes rr. It doesn't quite fix the provided testcase, because the testcase 
fails to wait on the tracee after awakening from the nanosleep. Instead the 
testcase immediately does a PTHREAD_CONT, discarding the PTHREAD_EVENT_EXIT. 
The slightly modified testcase at 
https://gist.github.com/khuey/3c43ac247c72cef8c956c does pass.

I don't see any obvious way to dequeue only the fatal signal, so instead I 
dequeue them all. Since none of these signals will ever be delivered it 
shouldn't affect the executing task.

Suggested-by: Robert O'Callahan 
Signed-off-by: Kyle Huey 
---
 kernel/seccomp.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef6c6c3..728074d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -609,8 +609,20 @@ static int __seccomp_filter(int this_syscall, const struct 
seccomp_data *sd,
 * Terminating the task now avoids executing a system
 * call that may not be intended.
 */
-   if (fatal_signal_pending(current))
+   if (fatal_signal_pending(current)) {
+   /*
+* Swallow the signals we will never deliver.
+* If we do not do this, the PTRACE_EVENT_EXIT will
+* be suppressed by those signals.
+*/
+   siginfo_t info;
+
+   spin_lock_irq(>sighand->siglock);
+   while (dequeue_signal(current, >blocked, 
));
+   spin_unlock_irq(>sighand->siglock);
+
do_exit(SIGSYS);
+   }
/* Check if the tracer forced the syscall to be skipped. */
this_syscall = syscall_get_nr(current, task_pt_regs(current));
if (this_syscall < 0)
-- 
2.7.4



Re: [RESEND PATCH v3] ARM: tegra124: pmu support

2015-07-27 Thread Kyle Huey
On Sat, Jul 18, 2015 at 6:54 AM, Kyle Huey  wrote:
> On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding
>  wrote:
>> On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote:
>>> This patch modifies the device tree for tegra124 based devices to enable
>>> the Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM
>>> DP-06905-001_v03p.  This patch was tested on a Jetson TK1.
>>>
>>> Updated for proper ordering and to add interrupt-affinity values.
>>>
>>> Signed-off-by: Kyle Huey 
>>> ---
>>>  arch/arm/boot/dts/tegra124.dtsi | 17 +
>>>  1 file changed, 13 insertions(+), 4 deletions(-)
>>
>> Is there any way to test this? What are the effects of adding this?
>
> Yes.  This enables the ARM PMU driver for the Cortex A15, which allows
> one to use hardware performance counters via the perf_event_open API.
> For a simple test program, see
> https://github.com/khuey/perf-counter-test/.  Without this patch, the
> perf_event_open syscall will fail.  With this patch, the program will
> print out the performance counter value for each iteration of the
> loop. (IIRC on the A15 the branch counter was removed, so you may want
> to replace 0xD with 0x8 which counts instructions executed if you want
> to see a non-zero number there).  You also will see a message about
> the PMU in the kernel log at startup after applying this patch.
>
> I have also tested this extensively (including the interrupt features
> of the PMU) on a more complex program.
>
>> Does it enable using perf for profiling?
>
> I have not tested it, but I believe you can use perf without this
> patch if you do not use features that require hardware performance
> counter support.  This patch would enable those features.
>
>>> diff --git a/arch/arm/boot/dts/tegra124.dtsi 
>>> b/arch/arm/boot/dts/tegra124.dtsi
>>> index 13cc7ca..de07d7e 100644
>>> --- a/arch/arm/boot/dts/tegra124.dtsi
>>> +++ b/arch/arm/boot/dts/tegra124.dtsi
>>> @@ -918,31 +918,40 @@
>>>   #address-cells = <1>;
>>>   #size-cells = <0>;
>>>
>>> - cpu@0 {
>>> + A15_0: cpu@0 {
>>>   device_type = "cpu";
>>>   compatible = "arm,cortex-a15";
>>>   reg = <0>;
>>>   };
>>>
>>> - cpu@1 {
>>> + A15_1: cpu@1 {
>>>   device_type = "cpu";
>>>   compatible = "arm,cortex-a15";
>>>   reg = <1>;
>>>   };
>>>
>>> - cpu@2 {
>>> + A15_2: cpu@2 {
>>>   device_type = "cpu";
>>>   compatible = "arm,cortex-a15";
>>>   reg = <2>;
>>>   };
>>>
>>> - cpu@3 {
>>> + A15_3: cpu@3 {
>>>   device_type = "cpu";
>>>   compatible = "arm,cortex-a15";
>>>   reg = <3>;
>>>   };
>>>   };
>>>
>>> + pmu {
>>> + compatible = "arm,cortex-a15-pmu";
>>> + interrupts = ,
>>> +  ,
>>> +  ,
>>> +  ;
>>> + interrupt-affinity = <_0>, <_1>, <_2>, <_3>;
>>
>> These labels look somewhat artificial to me, perhaps we could do
>> something like the following instead?
>>
>> interrupt-affinity = <&{/cpus/cpu@0}>, ...;
>>
>> That's slightly more obvious and avoids the need to "invent" labels for
>> the CPUs.
>>
>> No need to respin, I can fix that up when applying if nobody objects to
>> using the alternative notation.
>>
>> Thierry
>
> I have no objections.  I was not aware that the device tree syntax
> supported that.  FWIW I cargo-culted my way to victory from
> vexpress-v2p-ca9.dts here.
>
> - Kyle

Anything else I can do to help move this along?

- Kyle
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RESEND PATCH v3] ARM: tegra124: pmu support

2015-07-27 Thread Kyle Huey
On Sat, Jul 18, 2015 at 6:54 AM, Kyle Huey m...@kylehuey.com wrote:
 On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding
 thierry.red...@gmail.com wrote:
 On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote:
 This patch modifies the device tree for tegra124 based devices to enable
 the Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM
 DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

 Updated for proper ordering and to add interrupt-affinity values.

 Signed-off-by: Kyle Huey kh...@kylehuey.com
 ---
  arch/arm/boot/dts/tegra124.dtsi | 17 +
  1 file changed, 13 insertions(+), 4 deletions(-)

 Is there any way to test this? What are the effects of adding this?

 Yes.  This enables the ARM PMU driver for the Cortex A15, which allows
 one to use hardware performance counters via the perf_event_open API.
 For a simple test program, see
 https://github.com/khuey/perf-counter-test/.  Without this patch, the
 perf_event_open syscall will fail.  With this patch, the program will
 print out the performance counter value for each iteration of the
 loop. (IIRC on the A15 the branch counter was removed, so you may want
 to replace 0xD with 0x8 which counts instructions executed if you want
 to see a non-zero number there).  You also will see a message about
 the PMU in the kernel log at startup after applying this patch.

 I have also tested this extensively (including the interrupt features
 of the PMU) on a more complex program.

 Does it enable using perf for profiling?

 I have not tested it, but I believe you can use perf without this
 patch if you do not use features that require hardware performance
 counter support.  This patch would enable those features.

 diff --git a/arch/arm/boot/dts/tegra124.dtsi 
 b/arch/arm/boot/dts/tegra124.dtsi
 index 13cc7ca..de07d7e 100644
 --- a/arch/arm/boot/dts/tegra124.dtsi
 +++ b/arch/arm/boot/dts/tegra124.dtsi
 @@ -918,31 +918,40 @@
   #address-cells = 1;
   #size-cells = 0;

 - cpu@0 {
 + A15_0: cpu@0 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 0;
   };

 - cpu@1 {
 + A15_1: cpu@1 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 1;
   };

 - cpu@2 {
 + A15_2: cpu@2 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 2;
   };

 - cpu@3 {
 + A15_3: cpu@3 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 3;
   };
   };

 + pmu {
 + compatible = arm,cortex-a15-pmu;
 + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH,
 +  GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH,
 +  GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH,
 +  GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH;
 + interrupt-affinity = A15_0, A15_1, A15_2, A15_3;

 These labels look somewhat artificial to me, perhaps we could do
 something like the following instead?

 interrupt-affinity = {/cpus/cpu@0}, ...;

 That's slightly more obvious and avoids the need to invent labels for
 the CPUs.

 No need to respin, I can fix that up when applying if nobody objects to
 using the alternative notation.

 Thierry

 I have no objections.  I was not aware that the device tree syntax
 supported that.  FWIW I cargo-culted my way to victory from
 vexpress-v2p-ca9.dts here.

 - Kyle

Anything else I can do to help move this along?

- Kyle
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ARM: ptrace: Implement PTRACE_SYSEMU

2015-07-21 Thread Kyle Huey
Implement PTRACE_SYSEMU support on ARM. Currently this ptrace call is
supported only on x86. This copies the x86 semantics for invoking ptrace hooks
(the syscall entry hook is invoked, the exit hook is not). This patch also
defines PTRACE_SYSEMU_SINGLESTEP because kernel/ptrace.c expects it to be
present if PTRACE_SYSEMU is present. Attempting to use PTRACE_SYSEMU_SINGLESTEP
will fail at runtime on ARM with EIO since there is no single stepping on ARM.

Signed-off-by: Kyle Huey 
---
 arch/arm/include/asm/thread_info.h |  8 ++--
 arch/arm/include/uapi/asm/ptrace.h | 32 +---
 arch/arm/kernel/ptrace.c   |  9 +++--
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h 
b/arch/arm/include/asm/thread_info.h
index bd32ede..0e3ee19 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -137,7 +137,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user 
*,
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE  - syscall trace active
- *  TIF_SYSCAL_AUDIT   - syscall auditing active
+ *  TIF_SYSCALL_AUDIT  - syscall auditing active
+ *  TIF_SYSCALL_EMU- syscall emulation active
  *  TIF_SIGPENDING - signal pending
  *  TIF_NEED_RESCHED   - rescheduling necessary
  *  TIF_NOTIFY_RESUME  - callback before returning to user
@@ -153,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user 
*,
 #define TIF_SYSCALL_TRACEPOINT 10
 #define TIF_SECCOMP11  /* seccomp syscall filtering active */
 #define TIF_NOHZ   12  /* in adaptive nohz mode */
+#define TIF_SYSCALL_EMU13
 #define TIF_USING_IWMMXT   17
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK20
@@ -165,11 +167,13 @@ extern int vfp_restore_user_hwstate(struct user_vfp 
__user *,
 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
+#define _TIF_SYSCALL_EMU   (1 << TIF_SYSCALL_EMU)
 #define _TIF_USING_IWMMXT  (1 << TIF_USING_IWMMXT)
 
 /* Checks for any syscall work in entry-common.S */
 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+  _TIF_SYSCALL_EMU)
 
 /*
  * Change these and you break ASM code in entry-common.S
diff --git a/arch/arm/include/uapi/asm/ptrace.h 
b/arch/arm/include/uapi/asm/ptrace.h
index 5af0ed1..2c5e4d7 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -12,25 +12,27 @@
 
 #include 
 
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS   14
-#define PTRACE_SETFPREGS   15
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS   14
+#define PTRACE_SETFPREGS   15
 /* PTRACE_ATTACH is 16 */
 /* PTRACE_DETACH is 17 */
-#define PTRACE_GETWMMXREGS 18
-#define PTRACE_SETWMMXREGS 19
+#define PTRACE_GETWMMXREGS 18
+#define PTRACE_SETWMMXREGS 19
 /* 20 is unused */
-#define PTRACE_OLDSETOPTIONS   21
-#define PTRACE_GET_THREAD_AREA 22
-#define PTRACE_SET_SYSCALL 23
+#define PTRACE_OLDSETOPTIONS   21
+#define PTRACE_GET_THREAD_AREA 22
+#define PTRACE_SET_SYSCALL 23
 /* PTRACE_SYSCALL is 24 */
-#define PTRACE_GETCRUNCHREGS   25
-#define PTRACE_SETCRUNCHREGS   26
-#define PTRACE_GETVFPREGS  27
-#define PTRACE_SETVFPREGS  28
-#define PTRACE_GETHBPREGS  29
-#define PTRACE_SETHBPREGS  30
+#define PTRACE_GETCRUNCHREGS   25
+#define PTRACE_SETCRUNCHREGS   26
+#define PTRACE_GETVFPREGS  27
+#define PTRACE_SETVFPREGS  28
+#define PTRACE_GETHBPREGS  29
+#define PTRACE_SETHBPREGS  30
+#define PTRACE_SYSEMU  31
+#define PTRACE_SYSEMU_SINGLESTEP   32
 
 /*
  * PSR bits
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index ef9119f..c84058c 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -930,6 +930,8 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 {
+   int ret = 0;
+
current_thread_info()->syscall = scno;
 
/* Do the secure computing check first; failures should be fast. */
@@ -941,7 +943,10 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, 
int scno)
secure_computing_strict(scno);
 #endif
 
-   if (test_thread_flag(TIF_SYSCALL_TRACE))
+   if (test_thread_flag(TIF_SYSCALL_EMU))
+   ret = -1;
+
+   if (ret || t

[PATCH] ARM: ptrace: Implement PTRACE_SYSEMU

2015-07-21 Thread Kyle Huey
Implement PTRACE_SYSEMU support on ARM. Currently this ptrace call is
supported only on x86. This copies the x86 semantics for invoking ptrace hooks
(the syscall entry hook is invoked, the exit hook is not). This patch also
defines PTRACE_SYSEMU_SINGLESTEP because kernel/ptrace.c expects it to be
present if PTRACE_SYSEMU is present. Attempting to use PTRACE_SYSEMU_SINGLESTEP
will fail at runtime on ARM with EIO since there is no single stepping on ARM.

Signed-off-by: Kyle Huey kh...@kylehuey.com
---
 arch/arm/include/asm/thread_info.h |  8 ++--
 arch/arm/include/uapi/asm/ptrace.h | 32 +---
 arch/arm/kernel/ptrace.c   |  9 +++--
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h 
b/arch/arm/include/asm/thread_info.h
index bd32ede..0e3ee19 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -137,7 +137,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user 
*,
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE  - syscall trace active
- *  TIF_SYSCAL_AUDIT   - syscall auditing active
+ *  TIF_SYSCALL_AUDIT  - syscall auditing active
+ *  TIF_SYSCALL_EMU- syscall emulation active
  *  TIF_SIGPENDING - signal pending
  *  TIF_NEED_RESCHED   - rescheduling necessary
  *  TIF_NOTIFY_RESUME  - callback before returning to user
@@ -153,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user 
*,
 #define TIF_SYSCALL_TRACEPOINT 10
 #define TIF_SECCOMP11  /* seccomp syscall filtering active */
 #define TIF_NOHZ   12  /* in adaptive nohz mode */
+#define TIF_SYSCALL_EMU13
 #define TIF_USING_IWMMXT   17
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK20
@@ -165,11 +167,13 @@ extern int vfp_restore_user_hwstate(struct user_vfp 
__user *,
 #define _TIF_SYSCALL_AUDIT (1  TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT(1  TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP   (1  TIF_SECCOMP)
+#define _TIF_SYSCALL_EMU   (1  TIF_SYSCALL_EMU)
 #define _TIF_USING_IWMMXT  (1  TIF_USING_IWMMXT)
 
 /* Checks for any syscall work in entry-common.S */
 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+  _TIF_SYSCALL_EMU)
 
 /*
  * Change these and you break ASM code in entry-common.S
diff --git a/arch/arm/include/uapi/asm/ptrace.h 
b/arch/arm/include/uapi/asm/ptrace.h
index 5af0ed1..2c5e4d7 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -12,25 +12,27 @@
 
 #include asm/hwcap.h
 
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS   14
-#define PTRACE_SETFPREGS   15
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS   14
+#define PTRACE_SETFPREGS   15
 /* PTRACE_ATTACH is 16 */
 /* PTRACE_DETACH is 17 */
-#define PTRACE_GETWMMXREGS 18
-#define PTRACE_SETWMMXREGS 19
+#define PTRACE_GETWMMXREGS 18
+#define PTRACE_SETWMMXREGS 19
 /* 20 is unused */
-#define PTRACE_OLDSETOPTIONS   21
-#define PTRACE_GET_THREAD_AREA 22
-#define PTRACE_SET_SYSCALL 23
+#define PTRACE_OLDSETOPTIONS   21
+#define PTRACE_GET_THREAD_AREA 22
+#define PTRACE_SET_SYSCALL 23
 /* PTRACE_SYSCALL is 24 */
-#define PTRACE_GETCRUNCHREGS   25
-#define PTRACE_SETCRUNCHREGS   26
-#define PTRACE_GETVFPREGS  27
-#define PTRACE_SETVFPREGS  28
-#define PTRACE_GETHBPREGS  29
-#define PTRACE_SETHBPREGS  30
+#define PTRACE_GETCRUNCHREGS   25
+#define PTRACE_SETCRUNCHREGS   26
+#define PTRACE_GETVFPREGS  27
+#define PTRACE_SETVFPREGS  28
+#define PTRACE_GETHBPREGS  29
+#define PTRACE_SETHBPREGS  30
+#define PTRACE_SYSEMU  31
+#define PTRACE_SYSEMU_SINGLESTEP   32
 
 /*
  * PSR bits
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index ef9119f..c84058c 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -930,6 +930,8 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 {
+   int ret = 0;
+
current_thread_info()-syscall = scno;
 
/* Do the secure computing check first; failures should be fast. */
@@ -941,7 +943,10 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, 
int scno)
secure_computing_strict(scno);
 #endif
 
-   if (test_thread_flag(TIF_SYSCALL_TRACE))
+   if (test_thread_flag(TIF_SYSCALL_EMU))
+   ret = -1;
+
+   if (ret || test_thread_flag(TIF_SYSCALL_TRACE

Re: [RESEND PATCH v3] ARM: tegra124: pmu support

2015-07-18 Thread Kyle Huey
On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding
 wrote:
> On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote:
>> This patch modifies the device tree for tegra124 based devices to enable
>> the Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM
>> DP-06905-001_v03p.  This patch was tested on a Jetson TK1.
>>
>> Updated for proper ordering and to add interrupt-affinity values.
>>
>> Signed-off-by: Kyle Huey 
>> ---
>>  arch/arm/boot/dts/tegra124.dtsi | 17 +
>>  1 file changed, 13 insertions(+), 4 deletions(-)
>
> Is there any way to test this? What are the effects of adding this?

Yes.  This enables the ARM PMU driver for the Cortex A15, which allows
one to use hardware performance counters via the perf_event_open API.
For a simple test program, see
https://github.com/khuey/perf-counter-test/.  Without this patch, the
perf_event_open syscall will fail.  With this patch, the program will
print out the performance counter value for each iteration of the
loop. (IIRC on the A15 the branch counter was removed, so you may want
to replace 0xD with 0x8 which counts instructions executed if you want
to see a non-zero number there).  You also will see a message about
the PMU in the kernel log at startup after applying this patch.

I have also tested this extensively (including the interrupt features
of the PMU) on a more complex program.

> Does it enable using perf for profiling?

I have not tested it, but I believe you can use perf without this
patch if you do not use features that require hardware performance
counter support.  This patch would enable those features.

>> diff --git a/arch/arm/boot/dts/tegra124.dtsi 
>> b/arch/arm/boot/dts/tegra124.dtsi
>> index 13cc7ca..de07d7e 100644
>> --- a/arch/arm/boot/dts/tegra124.dtsi
>> +++ b/arch/arm/boot/dts/tegra124.dtsi
>> @@ -918,31 +918,40 @@
>>   #address-cells = <1>;
>>   #size-cells = <0>;
>>
>> - cpu@0 {
>> + A15_0: cpu@0 {
>>   device_type = "cpu";
>>   compatible = "arm,cortex-a15";
>>   reg = <0>;
>>   };
>>
>> - cpu@1 {
>> + A15_1: cpu@1 {
>>   device_type = "cpu";
>>   compatible = "arm,cortex-a15";
>>   reg = <1>;
>>   };
>>
>> - cpu@2 {
>> + A15_2: cpu@2 {
>>   device_type = "cpu";
>>   compatible = "arm,cortex-a15";
>>   reg = <2>;
>>   };
>>
>> - cpu@3 {
>> + A15_3: cpu@3 {
>>   device_type = "cpu";
>>   compatible = "arm,cortex-a15";
>>   reg = <3>;
>>   };
>>   };
>>
>> + pmu {
>> + compatible = "arm,cortex-a15-pmu";
>> + interrupts = ,
>> +  ,
>> +  ,
>> +  ;
>> + interrupt-affinity = <_0>, <_1>, <_2>, <_3>;
>
> These labels look somewhat artificial to me, perhaps we could do
> something like the following instead?
>
> interrupt-affinity = <&{/cpus/cpu@0}>, ...;
>
> That's slightly more obvious and avoids the need to "invent" labels for
> the CPUs.
>
> No need to respin, I can fix that up when applying if nobody objects to
> using the alternative notation.
>
> Thierry

I have no objections.  I was not aware that the device tree syntax
supported that.  FWIW I cargo-culted my way to victory from
vexpress-v2p-ca9.dts here.

- Kyle
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RESEND PATCH v3] ARM: tegra124: pmu support

2015-07-18 Thread Kyle Huey
On Fri, Jul 17, 2015 at 4:59 PM, Thierry Reding
thierry.red...@gmail.com wrote:
 On Mon, Jul 13, 2015 at 10:35:45AM -0700, Kyle Huey wrote:
 This patch modifies the device tree for tegra124 based devices to enable
 the Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM
 DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

 Updated for proper ordering and to add interrupt-affinity values.

 Signed-off-by: Kyle Huey kh...@kylehuey.com
 ---
  arch/arm/boot/dts/tegra124.dtsi | 17 +
  1 file changed, 13 insertions(+), 4 deletions(-)

 Is there any way to test this? What are the effects of adding this?

Yes.  This enables the ARM PMU driver for the Cortex A15, which allows
one to use hardware performance counters via the perf_event_open API.
For a simple test program, see
https://github.com/khuey/perf-counter-test/.  Without this patch, the
perf_event_open syscall will fail.  With this patch, the program will
print out the performance counter value for each iteration of the
loop. (IIRC on the A15 the branch counter was removed, so you may want
to replace 0xD with 0x8 which counts instructions executed if you want
to see a non-zero number there).  You also will see a message about
the PMU in the kernel log at startup after applying this patch.

I have also tested this extensively (including the interrupt features
of the PMU) on a more complex program.

 Does it enable using perf for profiling?

I have not tested it, but I believe you can use perf without this
patch if you do not use features that require hardware performance
counter support.  This patch would enable those features.

 diff --git a/arch/arm/boot/dts/tegra124.dtsi 
 b/arch/arm/boot/dts/tegra124.dtsi
 index 13cc7ca..de07d7e 100644
 --- a/arch/arm/boot/dts/tegra124.dtsi
 +++ b/arch/arm/boot/dts/tegra124.dtsi
 @@ -918,31 +918,40 @@
   #address-cells = 1;
   #size-cells = 0;

 - cpu@0 {
 + A15_0: cpu@0 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 0;
   };

 - cpu@1 {
 + A15_1: cpu@1 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 1;
   };

 - cpu@2 {
 + A15_2: cpu@2 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 2;
   };

 - cpu@3 {
 + A15_3: cpu@3 {
   device_type = cpu;
   compatible = arm,cortex-a15;
   reg = 3;
   };
   };

 + pmu {
 + compatible = arm,cortex-a15-pmu;
 + interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH,
 +  GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH,
 +  GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH,
 +  GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH;
 + interrupt-affinity = A15_0, A15_1, A15_2, A15_3;

 These labels look somewhat artificial to me, perhaps we could do
 something like the following instead?

 interrupt-affinity = {/cpus/cpu@0}, ...;

 That's slightly more obvious and avoids the need to invent labels for
 the CPUs.

 No need to respin, I can fix that up when applying if nobody objects to
 using the alternative notation.

 Thierry

I have no objections.  I was not aware that the device tree syntax
supported that.  FWIW I cargo-culted my way to victory from
vexpress-v2p-ca9.dts here.

- Kyle
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RESEND PATCH v3] ARM: tegra124: pmu support

2015-07-13 Thread Kyle Huey
This patch modifies the device tree for tegra124 based devices to enable
the Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM
DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

Updated for proper ordering and to add interrupt-affinity values.

Signed-off-by: Kyle Huey 
---
 arch/arm/boot/dts/tegra124.dtsi | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 13cc7ca..de07d7e 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -918,31 +918,40 @@
#address-cells = <1>;
#size-cells = <0>;
 
-   cpu@0 {
+   A15_0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <0>;
};
 
-   cpu@1 {
+   A15_1: cpu@1 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <1>;
};
 
-   cpu@2 {
+   A15_2: cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <2>;
};
 
-   cpu@3 {
+   A15_3: cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <3>;
};
};
 
+   pmu {
+   compatible = "arm,cortex-a15-pmu";
+   interrupts = ,
+,
+,
+;
+   interrupt-affinity = <_0>, <_1>, <_2>, <_3>;
+   };
+
thermal-zones {
cpu {
polling-delay-passive = <1000>;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RESEND PATCH v3] ARM: tegra124: pmu support

2015-07-13 Thread Kyle Huey
This patch modifies the device tree for tegra124 based devices to enable
the Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM
DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

Updated for proper ordering and to add interrupt-affinity values.

Signed-off-by: Kyle Huey kh...@kylehuey.com
---
 arch/arm/boot/dts/tegra124.dtsi | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 13cc7ca..de07d7e 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -918,31 +918,40 @@
#address-cells = 1;
#size-cells = 0;
 
-   cpu@0 {
+   A15_0: cpu@0 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 0;
};
 
-   cpu@1 {
+   A15_1: cpu@1 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 1;
};
 
-   cpu@2 {
+   A15_2: cpu@2 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 2;
};
 
-   cpu@3 {
+   A15_3: cpu@3 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 3;
};
};
 
+   pmu {
+   compatible = arm,cortex-a15-pmu;
+   interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH;
+   interrupt-affinity = A15_0, A15_1, A15_2, A15_3;
+   };
+
thermal-zones {
cpu {
polling-delay-passive = 1000;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] ARM: tegra124: pmu support

2015-06-16 Thread Kyle Huey
This patch modifies the device tree for tegra124 based devices to enable the 
Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM 
DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

Updated for proper ordering and to add interrupt-affinity values.

Signed-off-by: Kyle Huey 
---
 arch/arm/boot/dts/tegra124.dtsi | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 13cc7ca..de07d7e 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -913,41 +913,50 @@
nvidia,xcvr-hsslew = <12>;
status = "disabled";
};
 
cpus {
#address-cells = <1>;
#size-cells = <0>;
 
-   cpu@0 {
+   A15_0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <0>;
};
 
-   cpu@1 {
+   A15_1: cpu@1 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <1>;
};
 
-   cpu@2 {
+   A15_2: cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <2>;
};
 
-   cpu@3 {
+   A15_3: cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <3>;
};
};
 
+   pmu {
+   compatible = "arm,cortex-a15-pmu";
+   interrupts = ,
+,
+,
+;
+   interrupt-affinity = <_0>, <_1>, <_2>, <_3>;
+   };
+
thermal-zones {
cpu {
polling-delay-passive = <1000>;
polling-delay = <1000>;
 
thermal-sensors =
< TEGRA124_SOCTHERM_SENSOR_CPU>;
};
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] ARM: tegra124: pmu support

2015-06-16 Thread Kyle Huey
This patch modifies the device tree for tegra124 based devices to enable the 
Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM 
DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

Updated for proper ordering and to add interrupt-affinity values.

Signed-off-by: Kyle Huey kh...@kylehuey.com
---
 arch/arm/boot/dts/tegra124.dtsi | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 13cc7ca..de07d7e 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -913,41 +913,50 @@
nvidia,xcvr-hsslew = 12;
status = disabled;
};
 
cpus {
#address-cells = 1;
#size-cells = 0;
 
-   cpu@0 {
+   A15_0: cpu@0 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 0;
};
 
-   cpu@1 {
+   A15_1: cpu@1 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 1;
};
 
-   cpu@2 {
+   A15_2: cpu@2 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 2;
};
 
-   cpu@3 {
+   A15_3: cpu@3 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 3;
};
};
 
+   pmu {
+   compatible = arm,cortex-a15-pmu;
+   interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH;
+   interrupt-affinity = A15_0, A15_1, A15_2, A15_3;
+   };
+
thermal-zones {
cpu {
polling-delay-passive = 1000;
polling-delay = 1000;
 
thermal-sensors =
soctherm TEGRA124_SOCTHERM_SENSOR_CPU;
};
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] ARM: tegra124: pmu support

2015-06-15 Thread Kyle Huey
This patch modifies the device tree for tegra124 based devices to enable the 
Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM 
DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

Signed-off-by: Kyle Huey 
---
 arch/arm/boot/dts/tegra124.dtsi | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 4be06c6..d966d4e 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -906,16 +906,24 @@
 
cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <3>;
};
};
 
+   pmu {
+   compatible = "arm,cortex-a15-pmu";
+   interrupts = ,
+,
+,
+;
+   };
+
thermal-zones {
cpu {
polling-delay-passive = <1000>;
polling-delay = <1000>;
 
thermal-sensors =
< TEGRA124_SOCTHERM_SENSOR_CPU>;
};
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] ARM: tegra124: pmu support

2015-06-15 Thread Kyle Huey
This patch modifies the device tree for tegra124 based devices to enable the 
Cortex A15 PMU.  The interrupt numbers are taken from NVIDIA TRM 
DP-06905-001_v03p.  This patch was tested on a Jetson TK1.

Signed-off-by: Kyle Huey kh...@kylehuey.com
---
 arch/arm/boot/dts/tegra124.dtsi | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 4be06c6..d966d4e 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -906,16 +906,24 @@
 
cpu@3 {
device_type = cpu;
compatible = arm,cortex-a15;
reg = 3;
};
};
 
+   pmu {
+   compatible = arm,cortex-a15-pmu;
+   interrupts = GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH,
+GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH;
+   };
+
thermal-zones {
cpu {
polling-delay-passive = 1000;
polling-delay = 1000;
 
thermal-sensors =
soctherm TEGRA124_SOCTHERM_SENSOR_CPU;
};
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


<    1   2   3   4   5