|
@@ -116,8 +116,34 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
|
#endif
|
|
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
|
|
|
|
- /* Re-load page tables */
|
|
|
+ /*
|
|
|
+ * Re-load page tables.
|
|
|
+ *
|
|
|
+ * This logic has an ordering constraint:
|
|
|
+ *
|
|
|
+ * CPU 0: Write to a PTE for 'next'
|
|
|
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
|
|
+ * CPU 1: set bit 1 in next's mm_cpumask
|
|
|
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
|
|
|
+ *
|
|
|
+ * We need to prevent an outcome in which CPU 1 observes
|
|
|
+ * the new PTE value and CPU 0 observes bit 1 clear in
|
|
|
+ * mm_cpumask. (If that occurs, then the IPI will never
|
|
|
+ * be sent, and CPU 0's TLB will contain a stale entry.)
|
|
|
+ *
|
|
|
+ * The bad outcome can occur if either CPU's load is
|
|
|
+ * reordered before that CPU's store, so both CPUs much
|
|
|
+ * execute full barriers to prevent this from happening.
|
|
|
+ *
|
|
|
+ * Thus, switch_mm needs a full barrier between the
|
|
|
+ * store to mm_cpumask and any operation that could load
|
|
|
+ * from next->pgd. This barrier synchronizes with
|
|
|
+ * remote TLB flushers. Fortunately, load_cr3 is
|
|
|
+ * serializing and thus acts as a full barrier.
|
|
|
+ *
|
|
|
+ */
|
|
|
load_cr3(next->pgd);
|
|
|
+
|
|
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
|
|
|
|
|
/* Stop flush ipis for the previous mm */
|
|
@@ -156,10 +182,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
|
* schedule, protecting us from simultaneous changes.
|
|
|
*/
|
|
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
|
+
|
|
|
/*
|
|
|
* We were in lazy tlb mode and leave_mm disabled
|
|
|
* tlb flush IPI delivery. We must reload CR3
|
|
|
* to make sure to use no freed page tables.
|
|
|
+ *
|
|
|
+ * As above, this is a barrier that forces
|
|
|
+ * TLB repopulation to be ordered after the
|
|
|
+ * store to mm_cpumask.
|
|
|
*/
|
|
|
load_cr3(next->pgd);
|
|
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|