|
@@ -7,7 +7,6 @@
|
|
|
#include <linux/export.h>
|
|
|
#include <linux/cpu.h>
|
|
|
#include <linux/debugfs.h>
|
|
|
-#include <linux/ptrace.h>
|
|
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
#include <asm/mmu_context.h>
|
|
@@ -30,6 +29,12 @@
|
|
|
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
|
|
*/
|
|
|
|
|
|
+/*
|
|
|
+ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
|
|
|
+ * stored in cpu_tlb_state.last_user_mm_ibpb.
|
|
|
+ */
|
|
|
+#define LAST_USER_MM_IBPB 0x1UL
|
|
|
+
|
|
|
/*
|
|
|
* We get here when we do something requiring a TLB invalidation
|
|
|
* but could not go invalidate all of the contexts. We do the
|
|
@@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id)
|
|
|
+static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
|
|
|
+{
|
|
|
+ unsigned long next_tif = task_thread_info(next)->flags;
|
|
|
+ unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
|
|
|
+
|
|
|
+ return (unsigned long)next->mm | ibpb;
|
|
|
+}
|
|
|
+
|
|
|
+static void cond_ibpb(struct task_struct *next)
|
|
|
{
|
|
|
+ if (!next || !next->mm)
|
|
|
+ return;
|
|
|
+
|
|
|
/*
|
|
|
- * Check if the current (previous) task has access to the memory
|
|
|
- * of the @tsk (next) task. If access is denied, make sure to
|
|
|
- * issue a IBPB to stop user->user Spectre-v2 attacks.
|
|
|
- *
|
|
|
- * Note: __ptrace_may_access() returns 0 or -ERRNO.
|
|
|
+ * Both, the conditional and the always IBPB mode use the mm
|
|
|
+ * pointer to avoid the IBPB when switching between tasks of the
|
|
|
+ * same process. Using the mm pointer instead of mm->context.ctx_id
|
|
|
+ * opens a hypothetical hole vs. mm_struct reuse, which is more or
|
|
|
+ * less impossible to control by an attacker. Aside of that it
|
|
|
+ * would only affect the first schedule so the theoretically
|
|
|
+ * exposed data is not really interesting.
|
|
|
*/
|
|
|
- return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id &&
|
|
|
- ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB));
|
|
|
+ if (static_branch_likely(&switch_mm_cond_ibpb)) {
|
|
|
+ unsigned long prev_mm, next_mm;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This is a bit more complex than the always mode because
|
|
|
+ * it has to handle two cases:
|
|
|
+ *
|
|
|
+ * 1) Switch from a user space task (potential attacker)
|
|
|
+ * which has TIF_SPEC_IB set to a user space task
|
|
|
+ * (potential victim) which has TIF_SPEC_IB not set.
|
|
|
+ *
|
|
|
+ * 2) Switch from a user space task (potential attacker)
|
|
|
+ * which has TIF_SPEC_IB not set to a user space task
|
|
|
+ * (potential victim) which has TIF_SPEC_IB set.
|
|
|
+ *
|
|
|
+ * This could be done by unconditionally issuing IBPB when
|
|
|
+ * a task which has TIF_SPEC_IB set is either scheduled in
|
|
|
+ * or out. Though that results in two flushes when:
|
|
|
+ *
|
|
|
+ * - the same user space task is scheduled out and later
|
|
|
+ * scheduled in again and only a kernel thread ran in
|
|
|
+ * between.
|
|
|
+ *
|
|
|
+ * - a user space task belonging to the same process is
|
|
|
+ * scheduled in after a kernel thread ran in between
|
|
|
+ *
|
|
|
+ * - a user space task belonging to the same process is
|
|
|
+ * scheduled in immediately.
|
|
|
+ *
|
|
|
+ * Optimize this with reasonably small overhead for the
|
|
|
+ * above cases. Mangle the TIF_SPEC_IB bit into the mm
|
|
|
+ * pointer of the incoming task which is stored in
|
|
|
+ * cpu_tlbstate.last_user_mm_ibpb for comparison.
|
|
|
+ */
|
|
|
+ next_mm = mm_mangle_tif_spec_ib(next);
|
|
|
+ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Issue IBPB only if the mm's are different and one or
|
|
|
+ * both have the IBPB bit set.
|
|
|
+ */
|
|
|
+ if (next_mm != prev_mm &&
|
|
|
+ (next_mm | prev_mm) & LAST_USER_MM_IBPB)
|
|
|
+ indirect_branch_prediction_barrier();
|
|
|
+
|
|
|
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (static_branch_unlikely(&switch_mm_always_ibpb)) {
|
|
|
+ /*
|
|
|
+ * Only flush when switching to a user space task with a
|
|
|
+ * different context than the user space task which ran
|
|
|
+ * last on this CPU.
|
|
|
+ */
|
|
|
+ if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
|
|
|
+ indirect_branch_prediction_barrier();
|
|
|
+ this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
@@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
|
new_asid = prev_asid;
|
|
|
need_flush = true;
|
|
|
} else {
|
|
|
- u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
|
|
|
-
|
|
|
/*
|
|
|
* Avoid user/user BTB poisoning by flushing the branch
|
|
|
* predictor when switching between processes. This stops
|
|
|
* one process from doing Spectre-v2 attacks on another.
|
|
|
- *
|
|
|
- * As an optimization, flush indirect branches only when
|
|
|
- * switching into a processes that can't be ptrace by the
|
|
|
- * current one (as in such case, attacker has much more
|
|
|
- * convenient way how to tamper with the next process than
|
|
|
- * branch buffer poisoning).
|
|
|
*/
|
|
|
- if (static_cpu_has(X86_FEATURE_USE_IBPB) &&
|
|
|
- ibpb_needed(tsk, last_ctx_id))
|
|
|
- indirect_branch_prediction_barrier();
|
|
|
+ cond_ibpb(tsk);
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
|
|
/*
|
|
@@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
|
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Record last user mm's context id, so we can avoid
|
|
|
- * flushing branch buffer with IBPB if we switch back
|
|
|
- * to the same user.
|
|
|
- */
|
|
|
- if (next != &init_mm)
|
|
|
- this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
|
|
|
-
|
|
|
/* Make sure we write CR3 before loaded_mm. */
|
|
|
barrier();
|
|
|
|
|
@@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void)
|
|
|
write_cr3(build_cr3(mm->pgd, 0));
|
|
|
|
|
|
/* Reinitialize tlbstate. */
|
|
|
- this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
|
|
|
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
|
|
|
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
|
|
this_cpu_write(cpu_tlbstate.next_asid, 1);
|
|
|
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|