[PATCH v9 04/11] seccomp: add "seccomp" syscall

2014-06-27 Thread Kees Cook
This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook 
---
 arch/Kconfig  |1 +
 arch/x86/syscalls/syscall_32.tbl  |1 +
 arch/x86/syscalls/syscall_64.tbl  |1 +
 include/linux/syscalls.h  |2 ++
 include/uapi/asm-generic/unistd.h |4 ++-
 include/uapi/linux/seccomp.h  |4 +++
 kernel/seccomp.c  |   55 +
 kernel/sys_ni.c   |3 ++
 8 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 97ff872c7acc..0eae9df35b88 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
  - secure_computing is called from a ptrace_event()-safe context
  - secure_computing return value is checked and a return value of -1
results in the system call being skipped immediately.
+ - seccomp syscall wired up
 
 config SECCOMP_FILTER
def_bool y
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..7527eac24122 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351i386sched_setattr   sys_sched_setattr
 352i386sched_getattr   sys_sched_getattr
 353i386renameat2   sys_renameat2
+354i386seccomp sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..16272a6c12b7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314common  sched_setattr   sys_sched_setattr
 315common  sched_getattr   sys_sched_getattr
 316common  renameat2   sys_renameat2
+317common  seccomp sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..1713977ee26f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+   const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h 
b/include/uapi/asm-generic/unistd.h
index 333640608087..65acbf0e2867 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT0
+#define SECCOMP_SET_MODE_FILTER1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 812cea2e7ffb..2f83496d6016 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -314,7 +315,7 @@ free_prog:
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
struct sock_fprog fprog;
long ret = -EFAULT;
@@ -517,6 

[PATCH v9 04/11] seccomp: add seccomp syscall

2014-06-27 Thread Kees Cook
This adds the new seccomp syscall with both an operation and flags
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of seccomp not simply seccomp_add_filter
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook keesc...@chromium.org
---
 arch/Kconfig  |1 +
 arch/x86/syscalls/syscall_32.tbl  |1 +
 arch/x86/syscalls/syscall_64.tbl  |1 +
 include/linux/syscalls.h  |2 ++
 include/uapi/asm-generic/unistd.h |4 ++-
 include/uapi/linux/seccomp.h  |4 +++
 kernel/seccomp.c  |   55 +
 kernel/sys_ni.c   |3 ++
 8 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 97ff872c7acc..0eae9df35b88 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
  - secure_computing is called from a ptrace_event()-safe context
  - secure_computing return value is checked and a return value of -1
results in the system call being skipped immediately.
+ - seccomp syscall wired up
 
 config SECCOMP_FILTER
def_bool y
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..7527eac24122 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351i386sched_setattr   sys_sched_setattr
 352i386sched_getattr   sys_sched_getattr
 353i386renameat2   sys_renameat2
+354i386seccomp sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..16272a6c12b7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314common  sched_setattr   sys_sched_setattr
 315common  sched_getattr   sys_sched_getattr
 316common  renameat2   sys_renameat2
+317common  seccomp sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..1713977ee26f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+   const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h 
b/include/uapi/asm-generic/unistd.h
index 333640608087..65acbf0e2867 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT0
+#define SECCOMP_SET_MODE_FILTER1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 812cea2e7ffb..2f83496d6016 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include linux/compat.h
 #include linux/sched.h
 #include linux/seccomp.h
+#include linux/syscalls.h
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -314,7 +315,7 @@ free_prog:
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {