mmu_context.h 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. #ifndef _ASM_X86_MMU_CONTEXT_H
  2. #define _ASM_X86_MMU_CONTEXT_H
  3. #include <asm/desc.h>
  4. #include <linux/atomic.h>
  5. #include <linux/mm_types.h>
  6. #include <trace/events/tlb.h>
  7. #include <asm/pgalloc.h>
  8. #include <asm/tlbflush.h>
  9. #include <asm/paravirt.h>
  10. #include <asm/mpx.h>
  11. #ifndef CONFIG_PARAVIRT
  12. static inline void paravirt_activate_mm(struct mm_struct *prev,
  13. struct mm_struct *next)
  14. {
  15. }
  16. #endif /* !CONFIG_PARAVIRT */
  17. #ifdef CONFIG_PERF_EVENTS
  18. extern struct static_key rdpmc_always_available;
  19. static inline void load_mm_cr4(struct mm_struct *mm)
  20. {
  21. if (static_key_false(&rdpmc_always_available) ||
  22. atomic_read(&mm->context.perf_rdpmc_allowed))
  23. cr4_set_bits(X86_CR4_PCE);
  24. else
  25. cr4_clear_bits(X86_CR4_PCE);
  26. }
  27. #else
  28. static inline void load_mm_cr4(struct mm_struct *mm) {}
  29. #endif
  30. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  31. /*
  32. * ldt_structs can be allocated, used, and freed, but they are never
  33. * modified while live.
  34. */
  35. struct ldt_struct {
  36. /*
  37. * Xen requires page-aligned LDTs with special permissions. This is
  38. * needed to prevent us from installing evil descriptors such as
  39. * call gates. On native, we could merge the ldt_struct and LDT
  40. * allocations, but it's not worth trying to optimize.
  41. */
  42. struct desc_struct *entries;
  43. int size;
  44. };
  45. /*
  46. * Used for LDT copy/destruction.
  47. */
  48. int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
  49. void destroy_context(struct mm_struct *mm);
  50. #else /* CONFIG_MODIFY_LDT_SYSCALL */
  51. static inline int init_new_context(struct task_struct *tsk,
  52. struct mm_struct *mm)
  53. {
  54. return 0;
  55. }
  56. static inline void destroy_context(struct mm_struct *mm) {}
  57. #endif
  58. static inline void load_mm_ldt(struct mm_struct *mm)
  59. {
  60. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  61. struct ldt_struct *ldt;
  62. /* lockless_dereference synchronizes with smp_store_release */
  63. ldt = lockless_dereference(mm->context.ldt);
  64. /*
  65. * Any change to mm->context.ldt is followed by an IPI to all
  66. * CPUs with the mm active. The LDT will not be freed until
  67. * after the IPI is handled by all such CPUs. This means that,
  68. * if the ldt_struct changes before we return, the values we see
  69. * will be safe, and the new values will be loaded before we run
  70. * any user code.
  71. *
  72. * NB: don't try to convert this to use RCU without extreme care.
  73. * We would still need IRQs off, because we don't want to change
  74. * the local LDT after an IPI loaded a newer value than the one
  75. * that we can see.
  76. */
  77. if (unlikely(ldt))
  78. set_ldt(ldt->entries, ldt->size);
  79. else
  80. clear_LDT();
  81. #else
  82. clear_LDT();
  83. #endif
  84. DEBUG_LOCKS_WARN_ON(preemptible());
  85. }
  86. static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
  87. {
  88. #ifdef CONFIG_SMP
  89. if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
  90. this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
  91. #endif
  92. }
  93. static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  94. struct task_struct *tsk)
  95. {
  96. unsigned cpu = smp_processor_id();
  97. if (likely(prev != next)) {
  98. #ifdef CONFIG_SMP
  99. this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
  100. this_cpu_write(cpu_tlbstate.active_mm, next);
  101. #endif
  102. cpumask_set_cpu(cpu, mm_cpumask(next));
  103. /*
  104. * Re-load page tables.
  105. *
  106. * This logic has an ordering constraint:
  107. *
  108. * CPU 0: Write to a PTE for 'next'
  109. * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
  110. * CPU 1: set bit 1 in next's mm_cpumask
  111. * CPU 1: load from the PTE that CPU 0 writes (implicit)
  112. *
  113. * We need to prevent an outcome in which CPU 1 observes
  114. * the new PTE value and CPU 0 observes bit 1 clear in
  115. * mm_cpumask. (If that occurs, then the IPI will never
  116. * be sent, and CPU 0's TLB will contain a stale entry.)
  117. *
  118. * The bad outcome can occur if either CPU's load is
  119. * reordered before that CPU's store, so both CPUs must
  120. * execute full barriers to prevent this from happening.
  121. *
  122. * Thus, switch_mm needs a full barrier between the
  123. * store to mm_cpumask and any operation that could load
  124. * from next->pgd. TLB fills are special and can happen
  125. * due to instruction fetches or for no reason at all,
  126. * and neither LOCK nor MFENCE orders them.
  127. * Fortunately, load_cr3() is serializing and gives the
  128. * ordering guarantee we need.
  129. *
  130. */
  131. load_cr3(next->pgd);
  132. trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
  133. /* Stop flush ipis for the previous mm */
  134. cpumask_clear_cpu(cpu, mm_cpumask(prev));
  135. /* Load per-mm CR4 state */
  136. load_mm_cr4(next);
  137. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  138. /*
  139. * Load the LDT, if the LDT is different.
  140. *
  141. * It's possible that prev->context.ldt doesn't match
  142. * the LDT register. This can happen if leave_mm(prev)
  143. * was called and then modify_ldt changed
  144. * prev->context.ldt but suppressed an IPI to this CPU.
  145. * In this case, prev->context.ldt != NULL, because we
  146. * never set context.ldt to NULL while the mm still
  147. * exists. That means that next->context.ldt !=
  148. * prev->context.ldt, because mms never share an LDT.
  149. */
  150. if (unlikely(prev->context.ldt != next->context.ldt))
  151. load_mm_ldt(next);
  152. #endif
  153. }
  154. #ifdef CONFIG_SMP
  155. else {
  156. this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
  157. BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
  158. if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
  159. /*
  160. * On established mms, the mm_cpumask is only changed
  161. * from irq context, from ptep_clear_flush() while in
  162. * lazy tlb mode, and here. Irqs are blocked during
  163. * schedule, protecting us from simultaneous changes.
  164. */
  165. cpumask_set_cpu(cpu, mm_cpumask(next));
  166. /*
  167. * We were in lazy tlb mode and leave_mm disabled
  168. * tlb flush IPI delivery. We must reload CR3
  169. * to make sure to use no freed page tables.
  170. *
  171. * As above, load_cr3() is serializing and orders TLB
  172. * fills with respect to the mm_cpumask write.
  173. */
  174. load_cr3(next->pgd);
  175. trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
  176. load_mm_cr4(next);
  177. load_mm_ldt(next);
  178. }
  179. }
  180. #endif
  181. }
  182. #define activate_mm(prev, next) \
  183. do { \
  184. paravirt_activate_mm((prev), (next)); \
  185. switch_mm((prev), (next), NULL); \
  186. } while (0);
  187. #ifdef CONFIG_X86_32
  188. #define deactivate_mm(tsk, mm) \
  189. do { \
  190. lazy_load_gs(0); \
  191. } while (0)
  192. #else
  193. #define deactivate_mm(tsk, mm) \
  194. do { \
  195. load_gs_index(0); \
  196. loadsegment(fs, 0); \
  197. } while (0)
  198. #endif
  199. static inline void arch_dup_mmap(struct mm_struct *oldmm,
  200. struct mm_struct *mm)
  201. {
  202. paravirt_arch_dup_mmap(oldmm, mm);
  203. }
  204. static inline void arch_exit_mmap(struct mm_struct *mm)
  205. {
  206. paravirt_arch_exit_mmap(mm);
  207. }
  208. #ifdef CONFIG_X86_64
  209. static inline bool is_64bit_mm(struct mm_struct *mm)
  210. {
  211. return !config_enabled(CONFIG_IA32_EMULATION) ||
  212. !(mm->context.ia32_compat == TIF_IA32);
  213. }
  214. #else
  215. static inline bool is_64bit_mm(struct mm_struct *mm)
  216. {
  217. return false;
  218. }
  219. #endif
  220. static inline void arch_bprm_mm_init(struct mm_struct *mm,
  221. struct vm_area_struct *vma)
  222. {
  223. mpx_mm_init(mm);
  224. }
  225. static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
  226. unsigned long start, unsigned long end)
  227. {
  228. /*
  229. * mpx_notify_unmap() goes and reads a rarely-hot
  230. * cacheline in the mm_struct. That can be expensive
  231. * enough to be seen in profiles.
  232. *
  233. * The mpx_notify_unmap() call and its contents have been
  234. * observed to affect munmap() performance on hardware
  235. * where MPX is not present.
  236. *
  237. * The unlikely() optimizes for the fast case: no MPX
  238. * in the CPU, or no MPX use in the process. Even if
  239. * we get this wrong (in the unlikely event that MPX
  240. * is widely enabled on some system) the overhead of
  241. * MPX itself (reading bounds tables) is expected to
  242. * overwhelm the overhead of getting this unlikely()
  243. * consistently wrong.
  244. */
  245. if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
  246. mpx_notify_unmap(mm, vma, start, end);
  247. }
  248. static inline int vma_pkey(struct vm_area_struct *vma)
  249. {
  250. u16 pkey = 0;
  251. #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
  252. unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
  253. VM_PKEY_BIT2 | VM_PKEY_BIT3;
  254. pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
  255. #endif
  256. return pkey;
  257. }
  258. static inline bool __pkru_allows_pkey(u16 pkey, bool write)
  259. {
  260. u32 pkru = read_pkru();
  261. if (!__pkru_allows_read(pkru, pkey))
  262. return false;
  263. if (write && !__pkru_allows_write(pkru, pkey))
  264. return false;
  265. return true;
  266. }
  267. /*
  268. * We only want to enforce protection keys on the current process
  269. * because we effectively have no access to PKRU for other
  270. * processes or any way to tell *which * PKRU in a threaded
  271. * process we could use.
  272. *
  273. * So do not enforce things if the VMA is not from the current
  274. * mm, or if we are in a kernel thread.
  275. */
  276. static inline bool vma_is_foreign(struct vm_area_struct *vma)
  277. {
  278. if (!current->mm)
  279. return true;
  280. /*
  281. * Should PKRU be enforced on the access to this VMA? If
  282. * the VMA is from another process, then PKRU has no
  283. * relevance and should not be enforced.
  284. */
  285. if (current->mm != vma->vm_mm)
  286. return true;
  287. return false;
  288. }
  289. static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  290. bool write, bool execute, bool foreign)
  291. {
  292. /* pkeys never affect instruction fetches */
  293. if (execute)
  294. return true;
  295. /* allow access if the VMA is not one from this process */
  296. if (foreign || vma_is_foreign(vma))
  297. return true;
  298. return __pkru_allows_pkey(vma_pkey(vma), write);
  299. }
  300. static inline bool arch_pte_access_permitted(pte_t pte, bool write)
  301. {
  302. return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
  303. }
  304. #endif /* _ASM_X86_MMU_CONTEXT_H */