ldt.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
  4. * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  5. * Copyright (C) 2002 Andi Kleen
  6. *
  7. * This handles calls from both 32bit and 64bit mode.
  8. *
  9. * Lock order:
  10. * contex.ldt_usr_sem
  11. * mmap_sem
  12. * context.lock
  13. */
  14. #include <linux/errno.h>
  15. #include <linux/gfp.h>
  16. #include <linux/sched.h>
  17. #include <linux/string.h>
  18. #include <linux/mm.h>
  19. #include <linux/smp.h>
  20. #include <linux/syscalls.h>
  21. #include <linux/slab.h>
  22. #include <linux/vmalloc.h>
  23. #include <linux/uaccess.h>
  24. #include <asm/ldt.h>
  25. #include <asm/tlb.h>
  26. #include <asm/desc.h>
  27. #include <asm/mmu_context.h>
  28. #include <asm/syscalls.h>
  29. static void refresh_ldt_segments(void)
  30. {
  31. #ifdef CONFIG_X86_64
  32. unsigned short sel;
  33. /*
  34. * Make sure that the cached DS and ES descriptors match the updated
  35. * LDT.
  36. */
  37. savesegment(ds, sel);
  38. if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
  39. loadsegment(ds, sel);
  40. savesegment(es, sel);
  41. if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
  42. loadsegment(es, sel);
  43. #endif
  44. }
  45. /* context.lock is held by the task which issued the smp function call */
  46. static void flush_ldt(void *__mm)
  47. {
  48. struct mm_struct *mm = __mm;
  49. if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
  50. return;
  51. load_mm_ldt(mm);
  52. refresh_ldt_segments();
  53. }
  54. /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
  55. static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
  56. {
  57. struct ldt_struct *new_ldt;
  58. unsigned int alloc_size;
  59. if (num_entries > LDT_ENTRIES)
  60. return NULL;
  61. new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
  62. if (!new_ldt)
  63. return NULL;
  64. BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
  65. alloc_size = num_entries * LDT_ENTRY_SIZE;
  66. /*
  67. * Xen is very picky: it requires a page-aligned LDT that has no
  68. * trailing nonzero bytes in any page that contains LDT descriptors.
  69. * Keep it simple: zero the whole allocation and never allocate less
  70. * than PAGE_SIZE.
  71. */
  72. if (alloc_size > PAGE_SIZE)
  73. new_ldt->entries = vzalloc(alloc_size);
  74. else
  75. new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
  76. if (!new_ldt->entries) {
  77. kfree(new_ldt);
  78. return NULL;
  79. }
  80. /* The new LDT isn't aliased for PTI yet. */
  81. new_ldt->slot = -1;
  82. new_ldt->nr_entries = num_entries;
  83. return new_ldt;
  84. }
  85. /*
  86. * If PTI is enabled, this maps the LDT into the kernelmode and
  87. * usermode tables for the given mm.
  88. *
  89. * There is no corresponding unmap function. Even if the LDT is freed, we
  90. * leave the PTEs around until the slot is reused or the mm is destroyed.
  91. * This is harmless: the LDT is always in ordinary memory, and no one will
  92. * access the freed slot.
  93. *
  94. * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
  95. * it useful, and the flush would slow down modify_ldt().
  96. */
  97. static int
  98. map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
  99. {
  100. #ifdef CONFIG_PAGE_TABLE_ISOLATION
  101. bool is_vmalloc, had_top_level_entry;
  102. unsigned long va;
  103. spinlock_t *ptl;
  104. pgd_t *pgd;
  105. int i;
  106. if (!static_cpu_has(X86_FEATURE_PTI))
  107. return 0;
  108. /*
  109. * Any given ldt_struct should have map_ldt_struct() called at most
  110. * once.
  111. */
  112. WARN_ON(ldt->slot != -1);
  113. /*
  114. * Did we already have the top level entry allocated? We can't
  115. * use pgd_none() for this because it doens't do anything on
  116. * 4-level page table kernels.
  117. */
  118. pgd = pgd_offset(mm, LDT_BASE_ADDR);
  119. had_top_level_entry = (pgd->pgd != 0);
  120. is_vmalloc = is_vmalloc_addr(ldt->entries);
  121. for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
  122. unsigned long offset = i << PAGE_SHIFT;
  123. const void *src = (char *)ldt->entries + offset;
  124. unsigned long pfn;
  125. pgprot_t pte_prot;
  126. pte_t pte, *ptep;
  127. va = (unsigned long)ldt_slot_va(slot) + offset;
  128. pfn = is_vmalloc ? vmalloc_to_pfn(src) :
  129. page_to_pfn(virt_to_page(src));
  130. /*
  131. * Treat the PTI LDT range as a *userspace* range.
  132. * get_locked_pte() will allocate all needed pagetables
  133. * and account for them in this mm.
  134. */
  135. ptep = get_locked_pte(mm, va, &ptl);
  136. if (!ptep)
  137. return -ENOMEM;
  138. /*
  139. * Map it RO so the easy to find address is not a primary
  140. * target via some kernel interface which misses a
  141. * permission check.
  142. */
  143. pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
  144. /* Filter out unsuppored __PAGE_KERNEL* bits: */
  145. pgprot_val(pte_prot) &= __supported_pte_mask;
  146. pte = pfn_pte(pfn, pte_prot);
  147. set_pte_at(mm, va, ptep, pte);
  148. pte_unmap_unlock(ptep, ptl);
  149. }
  150. if (mm->context.ldt) {
  151. /*
  152. * We already had an LDT. The top-level entry should already
  153. * have been allocated and synchronized with the usermode
  154. * tables.
  155. */
  156. WARN_ON(!had_top_level_entry);
  157. if (static_cpu_has(X86_FEATURE_PTI))
  158. WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
  159. } else {
  160. /*
  161. * This is the first time we're mapping an LDT for this process.
  162. * Sync the pgd to the usermode tables.
  163. */
  164. WARN_ON(had_top_level_entry);
  165. if (static_cpu_has(X86_FEATURE_PTI)) {
  166. WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
  167. set_pgd(kernel_to_user_pgdp(pgd), *pgd);
  168. }
  169. }
  170. va = (unsigned long)ldt_slot_va(slot);
  171. flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
  172. ldt->slot = slot;
  173. #endif
  174. return 0;
  175. }
  176. static void free_ldt_pgtables(struct mm_struct *mm)
  177. {
  178. #ifdef CONFIG_PAGE_TABLE_ISOLATION
  179. struct mmu_gather tlb;
  180. unsigned long start = LDT_BASE_ADDR;
  181. unsigned long end = start + (1UL << PGDIR_SHIFT);
  182. if (!static_cpu_has(X86_FEATURE_PTI))
  183. return;
  184. tlb_gather_mmu(&tlb, mm, start, end);
  185. free_pgd_range(&tlb, start, end, start, end);
  186. tlb_finish_mmu(&tlb, start, end);
  187. #endif
  188. }
  189. /* After calling this, the LDT is immutable. */
  190. static void finalize_ldt_struct(struct ldt_struct *ldt)
  191. {
  192. paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
  193. }
  194. static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
  195. {
  196. mutex_lock(&mm->context.lock);
  197. /* Synchronizes with READ_ONCE in load_mm_ldt. */
  198. smp_store_release(&mm->context.ldt, ldt);
  199. /* Activate the LDT for all CPUs using currents mm. */
  200. on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
  201. mutex_unlock(&mm->context.lock);
  202. }
  203. static void free_ldt_struct(struct ldt_struct *ldt)
  204. {
  205. if (likely(!ldt))
  206. return;
  207. paravirt_free_ldt(ldt->entries, ldt->nr_entries);
  208. if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
  209. vfree_atomic(ldt->entries);
  210. else
  211. free_page((unsigned long)ldt->entries);
  212. kfree(ldt);
  213. }
  214. /*
  215. * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
  216. * the new task is not running, so nothing can be installed.
  217. */
  218. int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
  219. {
  220. struct ldt_struct *new_ldt;
  221. int retval = 0;
  222. if (!old_mm)
  223. return 0;
  224. mutex_lock(&old_mm->context.lock);
  225. if (!old_mm->context.ldt)
  226. goto out_unlock;
  227. new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
  228. if (!new_ldt) {
  229. retval = -ENOMEM;
  230. goto out_unlock;
  231. }
  232. memcpy(new_ldt->entries, old_mm->context.ldt->entries,
  233. new_ldt->nr_entries * LDT_ENTRY_SIZE);
  234. finalize_ldt_struct(new_ldt);
  235. retval = map_ldt_struct(mm, new_ldt, 0);
  236. if (retval) {
  237. free_ldt_pgtables(mm);
  238. free_ldt_struct(new_ldt);
  239. goto out_unlock;
  240. }
  241. mm->context.ldt = new_ldt;
  242. out_unlock:
  243. mutex_unlock(&old_mm->context.lock);
  244. return retval;
  245. }
  246. /*
  247. * No need to lock the MM as we are the last user
  248. *
  249. * 64bit: Don't touch the LDT register - we're already in the next thread.
  250. */
  251. void destroy_context_ldt(struct mm_struct *mm)
  252. {
  253. free_ldt_struct(mm->context.ldt);
  254. mm->context.ldt = NULL;
  255. }
  256. void ldt_arch_exit_mmap(struct mm_struct *mm)
  257. {
  258. free_ldt_pgtables(mm);
  259. }
  260. static int read_ldt(void __user *ptr, unsigned long bytecount)
  261. {
  262. struct mm_struct *mm = current->mm;
  263. unsigned long entries_size;
  264. int retval;
  265. down_read(&mm->context.ldt_usr_sem);
  266. if (!mm->context.ldt) {
  267. retval = 0;
  268. goto out_unlock;
  269. }
  270. if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
  271. bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
  272. entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
  273. if (entries_size > bytecount)
  274. entries_size = bytecount;
  275. if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
  276. retval = -EFAULT;
  277. goto out_unlock;
  278. }
  279. if (entries_size != bytecount) {
  280. /* Zero-fill the rest and pretend we read bytecount bytes. */
  281. if (clear_user(ptr + entries_size, bytecount - entries_size)) {
  282. retval = -EFAULT;
  283. goto out_unlock;
  284. }
  285. }
  286. retval = bytecount;
  287. out_unlock:
  288. up_read(&mm->context.ldt_usr_sem);
  289. return retval;
  290. }
  291. static int read_default_ldt(void __user *ptr, unsigned long bytecount)
  292. {
  293. /* CHECKME: Can we use _one_ random number ? */
  294. #ifdef CONFIG_X86_32
  295. unsigned long size = 5 * sizeof(struct desc_struct);
  296. #else
  297. unsigned long size = 128;
  298. #endif
  299. if (bytecount > size)
  300. bytecount = size;
  301. if (clear_user(ptr, bytecount))
  302. return -EFAULT;
  303. return bytecount;
  304. }
  305. static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
  306. {
  307. struct mm_struct *mm = current->mm;
  308. struct ldt_struct *new_ldt, *old_ldt;
  309. unsigned int old_nr_entries, new_nr_entries;
  310. struct user_desc ldt_info;
  311. struct desc_struct ldt;
  312. int error;
  313. error = -EINVAL;
  314. if (bytecount != sizeof(ldt_info))
  315. goto out;
  316. error = -EFAULT;
  317. if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
  318. goto out;
  319. error = -EINVAL;
  320. if (ldt_info.entry_number >= LDT_ENTRIES)
  321. goto out;
  322. if (ldt_info.contents == 3) {
  323. if (oldmode)
  324. goto out;
  325. if (ldt_info.seg_not_present == 0)
  326. goto out;
  327. }
  328. if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
  329. LDT_empty(&ldt_info)) {
  330. /* The user wants to clear the entry. */
  331. memset(&ldt, 0, sizeof(ldt));
  332. } else {
  333. if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
  334. error = -EINVAL;
  335. goto out;
  336. }
  337. fill_ldt(&ldt, &ldt_info);
  338. if (oldmode)
  339. ldt.avl = 0;
  340. }
  341. if (down_write_killable(&mm->context.ldt_usr_sem))
  342. return -EINTR;
  343. old_ldt = mm->context.ldt;
  344. old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
  345. new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
  346. error = -ENOMEM;
  347. new_ldt = alloc_ldt_struct(new_nr_entries);
  348. if (!new_ldt)
  349. goto out_unlock;
  350. if (old_ldt)
  351. memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
  352. new_ldt->entries[ldt_info.entry_number] = ldt;
  353. finalize_ldt_struct(new_ldt);
  354. /*
  355. * If we are using PTI, map the new LDT into the userspace pagetables.
  356. * If there is already an LDT, use the other slot so that other CPUs
  357. * will continue to use the old LDT until install_ldt() switches
  358. * them over to the new LDT.
  359. */
  360. error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
  361. if (error) {
  362. /*
  363. * This only can fail for the first LDT setup. If an LDT is
  364. * already installed then the PTE page is already
  365. * populated. Mop up a half populated page table.
  366. */
  367. if (!WARN_ON_ONCE(old_ldt))
  368. free_ldt_pgtables(mm);
  369. free_ldt_struct(new_ldt);
  370. goto out_unlock;
  371. }
  372. install_ldt(mm, new_ldt);
  373. free_ldt_struct(old_ldt);
  374. error = 0;
  375. out_unlock:
  376. up_write(&mm->context.ldt_usr_sem);
  377. out:
  378. return error;
  379. }
  380. SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
  381. unsigned long , bytecount)
  382. {
  383. int ret = -ENOSYS;
  384. switch (func) {
  385. case 0:
  386. ret = read_ldt(ptr, bytecount);
  387. break;
  388. case 1:
  389. ret = write_ldt(ptr, bytecount, 1);
  390. break;
  391. case 2:
  392. ret = read_default_ldt(ptr, bytecount);
  393. break;
  394. case 0x11:
  395. ret = write_ldt(ptr, bytecount, 0);
  396. break;
  397. }
  398. /*
  399. * The SYSCALL_DEFINE() macros give us an 'unsigned long'
  400. * return type, but tht ABI for sys_modify_ldt() expects
  401. * 'int'. This cast gives us an int-sized value in %rax
  402. * for the return code. The 'unsigned' is necessary so
  403. * the compiler does not try to sign-extend the negative
  404. * return codes into the high half of the register when
  405. * taking the value from int->long.
  406. */
  407. return (unsigned int)ret;
  408. }