populate_seccomp_data is expensive: it works by inspecting
task_pt_regs and various other bits to piece together all the
information, and it's does so in multiple partially redundant steps.

Arch-specific code in the syscall entry path can do much better.

Admittedly this adds a bit of additional room for error, but the
speedup should be worth it.

Signed-off-by: Andy Lutomirski <l...@amacapital.net>
---
 include/linux/seccomp.h |  2 +-
 kernel/seccomp.c        | 32 +++++++++++++++++++-------------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 3885108..a19ddac 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -39,7 +39,7 @@ static inline int secure_computing(void)
 #define SECCOMP_PHASE1_OK      0
 #define SECCOMP_PHASE1_SKIP    1
 
-extern u32 seccomp_phase1(void);
+extern u32 seccomp_phase1(struct seccomp_data *sd);
 int seccomp_phase2(u32 phase1_result);
 #else
 extern void secure_computing_strict(int this_syscall);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 0088d29..80115b0 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -173,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter 
*filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(void)
+static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
        struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
-       struct seccomp_data sd;
+       struct seccomp_data sd_local;
        u32 ret = SECCOMP_RET_ALLOW;
 
        /* Ensure unexpected behavior doesn't result in failing open. */
@@ -186,14 +186,17 @@ static u32 seccomp_run_filters(void)
        /* Make sure cross-thread synced filter points somewhere sane. */
        smp_read_barrier_depends();
 
-       populate_seccomp_data(&sd);
+       if (!sd) {
+               populate_seccomp_data(&sd_local);
+               sd = &sd_local;
+       }
 
        /*
         * All filters in the list are evaluated and the lowest BPF return
         * value always takes priority (ignoring the DATA).
         */
        for (; f; f = f->prev) {
-               u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+               u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)sd);
 
                if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
                        ret = cur_ret;
@@ -599,7 +602,7 @@ void secure_computing_strict(int this_syscall)
 #else
 int __secure_computing(void)
 {
-       u32 phase1_result = seccomp_phase1();
+       u32 phase1_result = seccomp_phase1(NULL);
 
        if (likely(phase1_result == SECCOMP_PHASE1_OK))
                return 0;
@@ -610,7 +613,7 @@ int __secure_computing(void)
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
-static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
+static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
 {
        u32 filter_ret, action;
        int data;
@@ -621,20 +624,20 @@ static u32 __seccomp_phase1_filter(int this_syscall, 
struct pt_regs *regs)
         */
        rmb();
 
-       filter_ret = seccomp_run_filters();
+       filter_ret = seccomp_run_filters(sd);
        data = filter_ret & SECCOMP_RET_DATA;
        action = filter_ret & SECCOMP_RET_ACTION;
 
        switch (action) {
        case SECCOMP_RET_ERRNO:
                /* Set the low-order 16-bits as a errno. */
-               syscall_set_return_value(current, regs,
+               syscall_set_return_value(current, task_pt_regs(current),
                                         -data, 0);
                goto skip;
 
        case SECCOMP_RET_TRAP:
                /* Show the handler the original registers. */
-               syscall_rollback(current, regs);
+               syscall_rollback(current, task_pt_regs(current));
                /* Let the filter pass back 16 bits of data. */
                seccomp_send_sigsys(this_syscall, data);
                goto skip;
@@ -661,11 +664,14 @@ skip:
 
 /**
  * seccomp_phase1() - run fast path seccomp checks on the current syscall
+ * @arg sd: The seccomp_data or NULL
  *
  * This only reads pt_regs via the syscall_xyz helpers.  The only change
  * it will make to pt_regs is via syscall_set_return_value, and it will
  * only do that if it returns SECCOMP_PHASE1_SKIP.
  *
+ * If sd is provided, it will not read pt_regs at all.
+ *
  * It may also call do_exit or force a signal; these actions must be
  * safe.
  *
@@ -679,11 +685,11 @@ skip:
  * If it returns anything else, then the return value should be passed
  * to seccomp_phase2 from a context in which ptrace hooks are safe.
  */
-u32 seccomp_phase1(void)
+u32 seccomp_phase1(struct seccomp_data *sd)
 {
        int mode = current->seccomp.mode;
-       struct pt_regs *regs = task_pt_regs(current);
-       int this_syscall = syscall_get_nr(current, regs);
+       int this_syscall = sd ? sd->nr :
+               syscall_get_nr(current, task_pt_regs(current));
 
        switch (mode) {
        case SECCOMP_MODE_STRICT:
@@ -691,7 +697,7 @@ u32 seccomp_phase1(void)
                return SECCOMP_PHASE1_OK;
 #ifdef CONFIG_SECCOMP_FILTER
        case SECCOMP_MODE_FILTER:
-               return __seccomp_phase1_filter(this_syscall, regs);
+               return __seccomp_phase1_filter(this_syscall, sd);
 #endif
        default:
                BUG();
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to