mmu_context.h 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_X86_MMU_CONTEXT_H
  3. #define _ASM_X86_MMU_CONTEXT_H
  4. #include <asm/desc.h>
  5. #include <linux/atomic.h>
  6. #include <linux/mm_types.h>
  7. #include <linux/pkeys.h>
  8. #include <trace/events/tlb.h>
  9. #include <asm/pgalloc.h>
  10. #include <asm/tlbflush.h>
  11. #include <asm/paravirt.h>
  12. #include <asm/mpx.h>
  13. extern atomic64_t last_mm_ctx_id;
  14. #ifndef CONFIG_PARAVIRT
  15. static inline void paravirt_activate_mm(struct mm_struct *prev,
  16. struct mm_struct *next)
  17. {
  18. }
  19. #endif /* !CONFIG_PARAVIRT */
  20. #ifdef CONFIG_PERF_EVENTS
  21. extern struct static_key rdpmc_always_available;
  22. static inline void load_mm_cr4(struct mm_struct *mm)
  23. {
  24. if (static_key_false(&rdpmc_always_available) ||
  25. atomic_read(&mm->context.perf_rdpmc_allowed))
  26. cr4_set_bits(X86_CR4_PCE);
  27. else
  28. cr4_clear_bits(X86_CR4_PCE);
  29. }
  30. #else
  31. static inline void load_mm_cr4(struct mm_struct *mm) {}
  32. #endif
  33. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  34. /*
  35. * ldt_structs can be allocated, used, and freed, but they are never
  36. * modified while live.
  37. */
  38. struct ldt_struct {
  39. /*
  40. * Xen requires page-aligned LDTs with special permissions. This is
  41. * needed to prevent us from installing evil descriptors such as
  42. * call gates. On native, we could merge the ldt_struct and LDT
  43. * allocations, but it's not worth trying to optimize.
  44. */
  45. struct desc_struct *entries;
  46. unsigned int nr_entries;
  47. /*
  48. * If PTI is in use, then the entries array is not mapped while we're
  49. * in user mode. The whole array will be aliased at the addressed
  50. * given by ldt_slot_va(slot). We use two slots so that we can allocate
  51. * and map, and enable a new LDT without invalidating the mapping
  52. * of an older, still-in-use LDT.
  53. *
  54. * slot will be -1 if this LDT doesn't have an alias mapping.
  55. */
  56. int slot;
  57. };
  58. /* This is a multiple of PAGE_SIZE. */
  59. #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
  60. static inline void *ldt_slot_va(int slot)
  61. {
  62. #ifdef CONFIG_X86_64
  63. return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
  64. #else
  65. BUG();
  66. return (void *)fix_to_virt(FIX_HOLE);
  67. #endif
  68. }
  69. /*
  70. * Used for LDT copy/destruction.
  71. */
  72. static inline void init_new_context_ldt(struct mm_struct *mm)
  73. {
  74. mm->context.ldt = NULL;
  75. init_rwsem(&mm->context.ldt_usr_sem);
  76. }
  77. int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
  78. void destroy_context_ldt(struct mm_struct *mm);
  79. void ldt_arch_exit_mmap(struct mm_struct *mm);
  80. #else /* CONFIG_MODIFY_LDT_SYSCALL */
  81. static inline void init_new_context_ldt(struct mm_struct *mm) { }
  82. static inline int ldt_dup_context(struct mm_struct *oldmm,
  83. struct mm_struct *mm)
  84. {
  85. return 0;
  86. }
  87. static inline void destroy_context_ldt(struct mm_struct *mm) { }
  88. static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
  89. #endif
  90. static inline void load_mm_ldt(struct mm_struct *mm)
  91. {
  92. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  93. struct ldt_struct *ldt;
  94. /* READ_ONCE synchronizes with smp_store_release */
  95. ldt = READ_ONCE(mm->context.ldt);
  96. /*
  97. * Any change to mm->context.ldt is followed by an IPI to all
  98. * CPUs with the mm active. The LDT will not be freed until
  99. * after the IPI is handled by all such CPUs. This means that,
  100. * if the ldt_struct changes before we return, the values we see
  101. * will be safe, and the new values will be loaded before we run
  102. * any user code.
  103. *
  104. * NB: don't try to convert this to use RCU without extreme care.
  105. * We would still need IRQs off, because we don't want to change
  106. * the local LDT after an IPI loaded a newer value than the one
  107. * that we can see.
  108. */
  109. if (unlikely(ldt)) {
  110. if (static_cpu_has(X86_FEATURE_PTI)) {
  111. if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
  112. /*
  113. * Whoops -- either the new LDT isn't mapped
  114. * (if slot == -1) or is mapped into a bogus
  115. * slot (if slot > 1).
  116. */
  117. clear_LDT();
  118. return;
  119. }
  120. /*
  121. * If page table isolation is enabled, ldt->entries
  122. * will not be mapped in the userspace pagetables.
  123. * Tell the CPU to access the LDT through the alias
  124. * at ldt_slot_va(ldt->slot).
  125. */
  126. set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
  127. } else {
  128. set_ldt(ldt->entries, ldt->nr_entries);
  129. }
  130. } else {
  131. clear_LDT();
  132. }
  133. #else
  134. clear_LDT();
  135. #endif
  136. }
  137. static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
  138. {
  139. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  140. /*
  141. * Load the LDT if either the old or new mm had an LDT.
  142. *
  143. * An mm will never go from having an LDT to not having an LDT. Two
  144. * mms never share an LDT, so we don't gain anything by checking to
  145. * see whether the LDT changed. There's also no guarantee that
  146. * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
  147. * then prev->context.ldt will also be non-NULL.
  148. *
  149. * If we really cared, we could optimize the case where prev == next
  150. * and we're exiting lazy mode. Most of the time, if this happens,
  151. * we don't actually need to reload LDTR, but modify_ldt() is mostly
  152. * used by legacy code and emulators where we don't need this level of
  153. * performance.
  154. *
  155. * This uses | instead of || because it generates better code.
  156. */
  157. if (unlikely((unsigned long)prev->context.ldt |
  158. (unsigned long)next->context.ldt))
  159. load_mm_ldt(next);
  160. #endif
  161. DEBUG_LOCKS_WARN_ON(preemptible());
  162. }
  163. void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
  164. static inline int init_new_context(struct task_struct *tsk,
  165. struct mm_struct *mm)
  166. {
  167. mutex_init(&mm->context.lock);
  168. mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
  169. atomic64_set(&mm->context.tlb_gen, 0);
  170. #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
  171. if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
  172. /* pkey 0 is the default and always allocated */
  173. mm->context.pkey_allocation_map = 0x1;
  174. /* -1 means unallocated or invalid */
  175. mm->context.execute_only_pkey = -1;
  176. }
  177. #endif
  178. init_new_context_ldt(mm);
  179. return 0;
  180. }
  181. static inline void destroy_context(struct mm_struct *mm)
  182. {
  183. destroy_context_ldt(mm);
  184. }
  185. extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  186. struct task_struct *tsk);
  187. extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  188. struct task_struct *tsk);
  189. #define switch_mm_irqs_off switch_mm_irqs_off
  190. #define activate_mm(prev, next) \
  191. do { \
  192. paravirt_activate_mm((prev), (next)); \
  193. switch_mm((prev), (next), NULL); \
  194. } while (0);
  195. #ifdef CONFIG_X86_32
  196. #define deactivate_mm(tsk, mm) \
  197. do { \
  198. lazy_load_gs(0); \
  199. } while (0)
  200. #else
  201. #define deactivate_mm(tsk, mm) \
  202. do { \
  203. load_gs_index(0); \
  204. loadsegment(fs, 0); \
  205. } while (0)
  206. #endif
  207. static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
  208. {
  209. paravirt_arch_dup_mmap(oldmm, mm);
  210. return ldt_dup_context(oldmm, mm);
  211. }
  212. static inline void arch_exit_mmap(struct mm_struct *mm)
  213. {
  214. paravirt_arch_exit_mmap(mm);
  215. ldt_arch_exit_mmap(mm);
  216. }
  217. #ifdef CONFIG_X86_64
  218. static inline bool is_64bit_mm(struct mm_struct *mm)
  219. {
  220. return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
  221. !(mm->context.ia32_compat == TIF_IA32);
  222. }
  223. #else
  224. static inline bool is_64bit_mm(struct mm_struct *mm)
  225. {
  226. return false;
  227. }
  228. #endif
  229. static inline void arch_bprm_mm_init(struct mm_struct *mm,
  230. struct vm_area_struct *vma)
  231. {
  232. mpx_mm_init(mm);
  233. }
  234. static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
  235. unsigned long start, unsigned long end)
  236. {
  237. /*
  238. * mpx_notify_unmap() goes and reads a rarely-hot
  239. * cacheline in the mm_struct. That can be expensive
  240. * enough to be seen in profiles.
  241. *
  242. * The mpx_notify_unmap() call and its contents have been
  243. * observed to affect munmap() performance on hardware
  244. * where MPX is not present.
  245. *
  246. * The unlikely() optimizes for the fast case: no MPX
  247. * in the CPU, or no MPX use in the process. Even if
  248. * we get this wrong (in the unlikely event that MPX
  249. * is widely enabled on some system) the overhead of
  250. * MPX itself (reading bounds tables) is expected to
  251. * overwhelm the overhead of getting this unlikely()
  252. * consistently wrong.
  253. */
  254. if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
  255. mpx_notify_unmap(mm, vma, start, end);
  256. }
  257. #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
  258. static inline int vma_pkey(struct vm_area_struct *vma)
  259. {
  260. unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
  261. VM_PKEY_BIT2 | VM_PKEY_BIT3;
  262. return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
  263. }
  264. #else
  265. static inline int vma_pkey(struct vm_area_struct *vma)
  266. {
  267. return 0;
  268. }
  269. #endif
  270. /*
  271. * We only want to enforce protection keys on the current process
  272. * because we effectively have no access to PKRU for other
  273. * processes or any way to tell *which * PKRU in a threaded
  274. * process we could use.
  275. *
  276. * So do not enforce things if the VMA is not from the current
  277. * mm, or if we are in a kernel thread.
  278. */
  279. static inline bool vma_is_foreign(struct vm_area_struct *vma)
  280. {
  281. if (!current->mm)
  282. return true;
  283. /*
  284. * Should PKRU be enforced on the access to this VMA? If
  285. * the VMA is from another process, then PKRU has no
  286. * relevance and should not be enforced.
  287. */
  288. if (current->mm != vma->vm_mm)
  289. return true;
  290. return false;
  291. }
  292. static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  293. bool write, bool execute, bool foreign)
  294. {
  295. /* pkeys never affect instruction fetches */
  296. if (execute)
  297. return true;
  298. /* allow access if the VMA is not one from this process */
  299. if (foreign || vma_is_foreign(vma))
  300. return true;
  301. return __pkru_allows_pkey(vma_pkey(vma), write);
  302. }
  303. /*
  304. * This can be used from process context to figure out what the value of
  305. * CR3 is without needing to do a (slow) __read_cr3().
  306. *
  307. * It's intended to be used for code like KVM that sneakily changes CR3
  308. * and needs to restore it. It needs to be used very carefully.
  309. */
  310. static inline unsigned long __get_current_cr3_fast(void)
  311. {
  312. unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
  313. this_cpu_read(cpu_tlbstate.loaded_mm_asid));
  314. /* For now, be very restrictive about when this can be called. */
  315. VM_WARN_ON(in_nmi() || preemptible());
  316. VM_BUG_ON(cr3 != __read_cr3());
  317. return cr3;
  318. }
  319. #endif /* _ASM_X86_MMU_CONTEXT_H */