|
@@ -21,10 +21,11 @@
|
|
|
#include <linux/slab.h>
|
|
|
#include <linux/syscalls.h>
|
|
|
|
|
|
-/* #define SECCOMP_DEBUG 1 */
|
|
|
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
|
|
|
+#include <asm/syscall.h>
|
|
|
+#endif
|
|
|
|
|
|
#ifdef CONFIG_SECCOMP_FILTER
|
|
|
-#include <asm/syscall.h>
|
|
|
#include <linux/filter.h>
|
|
|
#include <linux/pid.h>
|
|
|
#include <linux/ptrace.h>
|
|
@@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
|
|
|
*
|
|
|
* Returns valid seccomp BPF response codes.
|
|
|
*/
|
|
|
-static u32 seccomp_run_filters(int syscall)
|
|
|
+static u32 seccomp_run_filters(struct seccomp_data *sd)
|
|
|
{
|
|
|
struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
|
|
|
- struct seccomp_data sd;
|
|
|
+ struct seccomp_data sd_local;
|
|
|
u32 ret = SECCOMP_RET_ALLOW;
|
|
|
|
|
|
/* Ensure unexpected behavior doesn't result in failing open. */
|
|
@@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall)
|
|
|
/* Make sure cross-thread synced filter points somewhere sane. */
|
|
|
smp_read_barrier_depends();
|
|
|
|
|
|
- populate_seccomp_data(&sd);
|
|
|
+ if (!sd) {
|
|
|
+ populate_seccomp_data(&sd_local);
|
|
|
+ sd = &sd_local;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* All filters in the list are evaluated and the lowest BPF return
|
|
|
* value always takes priority (ignoring the DATA).
|
|
|
*/
|
|
|
for (; f; f = f->prev) {
|
|
|
- u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
|
|
|
+ u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
|
|
|
|
|
|
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
|
|
|
ret = cur_ret;
|
|
@@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = {
|
|
|
};
|
|
|
#endif
|
|
|
|
|
|
-int __secure_computing(int this_syscall)
|
|
|
+static void __secure_computing_strict(int this_syscall)
|
|
|
+{
|
|
|
+ int *syscall_whitelist = mode1_syscalls;
|
|
|
+#ifdef CONFIG_COMPAT
|
|
|
+ if (is_compat_task())
|
|
|
+ syscall_whitelist = mode1_syscalls_32;
|
|
|
+#endif
|
|
|
+ do {
|
|
|
+ if (*syscall_whitelist == this_syscall)
|
|
|
+ return;
|
|
|
+ } while (*++syscall_whitelist);
|
|
|
+
|
|
|
+#ifdef SECCOMP_DEBUG
|
|
|
+ dump_stack();
|
|
|
+#endif
|
|
|
+ audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
|
|
|
+ do_exit(SIGKILL);
|
|
|
+}
|
|
|
+
|
|
|
+#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
|
|
|
+void secure_computing_strict(int this_syscall)
|
|
|
+{
|
|
|
+ int mode = current->seccomp.mode;
|
|
|
+
|
|
|
+ if (mode == 0)
|
|
|
+ return;
|
|
|
+ else if (mode == SECCOMP_MODE_STRICT)
|
|
|
+ __secure_computing_strict(this_syscall);
|
|
|
+ else
|
|
|
+ BUG();
|
|
|
+}
|
|
|
+#else
|
|
|
+int __secure_computing(void)
|
|
|
{
|
|
|
- int exit_sig = 0;
|
|
|
- int *syscall;
|
|
|
- u32 ret;
|
|
|
+ u32 phase1_result = seccomp_phase1(NULL);
|
|
|
+
|
|
|
+ if (likely(phase1_result == SECCOMP_PHASE1_OK))
|
|
|
+ return 0;
|
|
|
+ else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
|
|
|
+ return -1;
|
|
|
+ else
|
|
|
+ return seccomp_phase2(phase1_result);
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef CONFIG_SECCOMP_FILTER
|
|
|
+static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
|
|
|
+{
|
|
|
+ u32 filter_ret, action;
|
|
|
+ int data;
|
|
|
|
|
|
/*
|
|
|
* Make sure that any changes to mode from another thread have
|
|
@@ -575,85 +623,127 @@ int __secure_computing(int this_syscall)
|
|
|
*/
|
|
|
rmb();
|
|
|
|
|
|
- switch (current->seccomp.mode) {
|
|
|
- case SECCOMP_MODE_STRICT:
|
|
|
- syscall = mode1_syscalls;
|
|
|
-#ifdef CONFIG_COMPAT
|
|
|
- if (is_compat_task())
|
|
|
- syscall = mode1_syscalls_32;
|
|
|
+ filter_ret = seccomp_run_filters(sd);
|
|
|
+ data = filter_ret & SECCOMP_RET_DATA;
|
|
|
+ action = filter_ret & SECCOMP_RET_ACTION;
|
|
|
+
|
|
|
+ switch (action) {
|
|
|
+ case SECCOMP_RET_ERRNO:
|
|
|
+ /* Set the low-order 16-bits as a errno. */
|
|
|
+ syscall_set_return_value(current, task_pt_regs(current),
|
|
|
+ -data, 0);
|
|
|
+ goto skip;
|
|
|
+
|
|
|
+ case SECCOMP_RET_TRAP:
|
|
|
+ /* Show the handler the original registers. */
|
|
|
+ syscall_rollback(current, task_pt_regs(current));
|
|
|
+ /* Let the filter pass back 16 bits of data. */
|
|
|
+ seccomp_send_sigsys(this_syscall, data);
|
|
|
+ goto skip;
|
|
|
+
|
|
|
+ case SECCOMP_RET_TRACE:
|
|
|
+ return filter_ret; /* Save the rest for phase 2. */
|
|
|
+
|
|
|
+ case SECCOMP_RET_ALLOW:
|
|
|
+ return SECCOMP_PHASE1_OK;
|
|
|
+
|
|
|
+ case SECCOMP_RET_KILL:
|
|
|
+ default:
|
|
|
+ audit_seccomp(this_syscall, SIGSYS, action);
|
|
|
+ do_exit(SIGSYS);
|
|
|
+ }
|
|
|
+
|
|
|
+ unreachable();
|
|
|
+
|
|
|
+skip:
|
|
|
+ audit_seccomp(this_syscall, 0, action);
|
|
|
+ return SECCOMP_PHASE1_SKIP;
|
|
|
+}
|
|
|
#endif
|
|
|
- do {
|
|
|
- if (*syscall == this_syscall)
|
|
|
- return 0;
|
|
|
- } while (*++syscall);
|
|
|
- exit_sig = SIGKILL;
|
|
|
- ret = SECCOMP_RET_KILL;
|
|
|
- break;
|
|
|
+
|
|
|
+/**
|
|
|
+ * seccomp_phase1() - run fast path seccomp checks on the current syscall
|
|
|
+ * @arg sd: The seccomp_data or NULL
|
|
|
+ *
|
|
|
+ * This only reads pt_regs via the syscall_xyz helpers. The only change
|
|
|
+ * it will make to pt_regs is via syscall_set_return_value, and it will
|
|
|
+ * only do that if it returns SECCOMP_PHASE1_SKIP.
|
|
|
+ *
|
|
|
+ * If sd is provided, it will not read pt_regs at all.
|
|
|
+ *
|
|
|
+ * It may also call do_exit or force a signal; these actions must be
|
|
|
+ * safe.
|
|
|
+ *
|
|
|
+ * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
|
|
|
+ * be processed normally.
|
|
|
+ *
|
|
|
+ * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
|
|
|
+ * invoked. In this case, seccomp_phase1 will have set the return value
|
|
|
+ * using syscall_set_return_value.
|
|
|
+ *
|
|
|
+ * If it returns anything else, then the return value should be passed
|
|
|
+ * to seccomp_phase2 from a context in which ptrace hooks are safe.
|
|
|
+ */
|
|
|
+u32 seccomp_phase1(struct seccomp_data *sd)
|
|
|
+{
|
|
|
+ int mode = current->seccomp.mode;
|
|
|
+ int this_syscall = sd ? sd->nr :
|
|
|
+ syscall_get_nr(current, task_pt_regs(current));
|
|
|
+
|
|
|
+ switch (mode) {
|
|
|
+ case SECCOMP_MODE_STRICT:
|
|
|
+ __secure_computing_strict(this_syscall); /* may call do_exit */
|
|
|
+ return SECCOMP_PHASE1_OK;
|
|
|
#ifdef CONFIG_SECCOMP_FILTER
|
|
|
- case SECCOMP_MODE_FILTER: {
|
|
|
- int data;
|
|
|
- struct pt_regs *regs = task_pt_regs(current);
|
|
|
- ret = seccomp_run_filters(this_syscall);
|
|
|
- data = ret & SECCOMP_RET_DATA;
|
|
|
- ret &= SECCOMP_RET_ACTION;
|
|
|
- switch (ret) {
|
|
|
- case SECCOMP_RET_ERRNO:
|
|
|
- /* Set the low-order 16-bits as a errno. */
|
|
|
- syscall_set_return_value(current, regs,
|
|
|
- -data, 0);
|
|
|
- goto skip;
|
|
|
- case SECCOMP_RET_TRAP:
|
|
|
- /* Show the handler the original registers. */
|
|
|
- syscall_rollback(current, regs);
|
|
|
- /* Let the filter pass back 16 bits of data. */
|
|
|
- seccomp_send_sigsys(this_syscall, data);
|
|
|
- goto skip;
|
|
|
- case SECCOMP_RET_TRACE:
|
|
|
- /* Skip these calls if there is no tracer. */
|
|
|
- if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
|
|
|
- syscall_set_return_value(current, regs,
|
|
|
- -ENOSYS, 0);
|
|
|
- goto skip;
|
|
|
- }
|
|
|
- /* Allow the BPF to provide the event message */
|
|
|
- ptrace_event(PTRACE_EVENT_SECCOMP, data);
|
|
|
- /*
|
|
|
- * The delivery of a fatal signal during event
|
|
|
- * notification may silently skip tracer notification.
|
|
|
- * Terminating the task now avoids executing a system
|
|
|
- * call that may not be intended.
|
|
|
- */
|
|
|
- if (fatal_signal_pending(current))
|
|
|
- break;
|
|
|
- if (syscall_get_nr(current, regs) < 0)
|
|
|
- goto skip; /* Explicit request to skip. */
|
|
|
-
|
|
|
- return 0;
|
|
|
- case SECCOMP_RET_ALLOW:
|
|
|
- return 0;
|
|
|
- case SECCOMP_RET_KILL:
|
|
|
- default:
|
|
|
- break;
|
|
|
- }
|
|
|
- exit_sig = SIGSYS;
|
|
|
- break;
|
|
|
- }
|
|
|
+ case SECCOMP_MODE_FILTER:
|
|
|
+ return __seccomp_phase1_filter(this_syscall, sd);
|
|
|
#endif
|
|
|
default:
|
|
|
BUG();
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
-#ifdef SECCOMP_DEBUG
|
|
|
- dump_stack();
|
|
|
-#endif
|
|
|
- audit_seccomp(this_syscall, exit_sig, ret);
|
|
|
- do_exit(exit_sig);
|
|
|
-#ifdef CONFIG_SECCOMP_FILTER
|
|
|
-skip:
|
|
|
- audit_seccomp(this_syscall, exit_sig, ret);
|
|
|
-#endif
|
|
|
- return -1;
|
|
|
+/**
|
|
|
+ * seccomp_phase2() - finish slow path seccomp work for the current syscall
|
|
|
+ * @phase1_result: The return value from seccomp_phase1()
|
|
|
+ *
|
|
|
+ * This must be called from a context in which ptrace hooks can be used.
|
|
|
+ *
|
|
|
+ * Returns 0 if the syscall should be processed or -1 to skip the syscall.
|
|
|
+ */
|
|
|
+int seccomp_phase2(u32 phase1_result)
|
|
|
+{
|
|
|
+ struct pt_regs *regs = task_pt_regs(current);
|
|
|
+ u32 action = phase1_result & SECCOMP_RET_ACTION;
|
|
|
+ int data = phase1_result & SECCOMP_RET_DATA;
|
|
|
+
|
|
|
+ BUG_ON(action != SECCOMP_RET_TRACE);
|
|
|
+
|
|
|
+ audit_seccomp(syscall_get_nr(current, regs), 0, action);
|
|
|
+
|
|
|
+ /* Skip these calls if there is no tracer. */
|
|
|
+ if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
|
|
|
+ syscall_set_return_value(current, regs,
|
|
|
+ -ENOSYS, 0);
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Allow the BPF to provide the event message */
|
|
|
+ ptrace_event(PTRACE_EVENT_SECCOMP, data);
|
|
|
+ /*
|
|
|
+ * The delivery of a fatal signal during event
|
|
|
+ * notification may silently skip tracer notification.
|
|
|
+ * Terminating the task now avoids executing a system
|
|
|
+ * call that may not be intended.
|
|
|
+ */
|
|
|
+ if (fatal_signal_pending(current))
|
|
|
+ do_exit(SIGSYS);
|
|
|
+ if (syscall_get_nr(current, regs) < 0)
|
|
|
+ return -1; /* Explicit request to skip. */
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
+#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
|
|
|
|
|
|
long prctl_get_seccomp(void)
|
|
|
{
|