ldt.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
  4. * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  5. * Copyright (C) 2002 Andi Kleen
  6. *
  7. * This handles calls from both 32bit and 64bit mode.
  8. *
  9. * Lock order:
  10. * contex.ldt_usr_sem
  11. * mmap_sem
  12. * context.lock
  13. */
  14. #include <linux/errno.h>
  15. #include <linux/gfp.h>
  16. #include <linux/sched.h>
  17. #include <linux/string.h>
  18. #include <linux/mm.h>
  19. #include <linux/smp.h>
  20. #include <linux/syscalls.h>
  21. #include <linux/slab.h>
  22. #include <linux/vmalloc.h>
  23. #include <linux/uaccess.h>
  24. #include <asm/ldt.h>
  25. #include <asm/tlb.h>
  26. #include <asm/desc.h>
  27. #include <asm/mmu_context.h>
  28. #include <asm/syscalls.h>
  29. static void refresh_ldt_segments(void)
  30. {
  31. #ifdef CONFIG_X86_64
  32. unsigned short sel;
  33. /*
  34. * Make sure that the cached DS and ES descriptors match the updated
  35. * LDT.
  36. */
  37. savesegment(ds, sel);
  38. if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
  39. loadsegment(ds, sel);
  40. savesegment(es, sel);
  41. if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
  42. loadsegment(es, sel);
  43. #endif
  44. }
  45. /* context.lock is held by the task which issued the smp function call */
  46. static void flush_ldt(void *__mm)
  47. {
  48. struct mm_struct *mm = __mm;
  49. if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
  50. return;
  51. load_mm_ldt(mm);
  52. refresh_ldt_segments();
  53. }
  54. /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
  55. static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
  56. {
  57. struct ldt_struct *new_ldt;
  58. unsigned int alloc_size;
  59. if (num_entries > LDT_ENTRIES)
  60. return NULL;
  61. new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
  62. if (!new_ldt)
  63. return NULL;
  64. BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
  65. alloc_size = num_entries * LDT_ENTRY_SIZE;
  66. /*
  67. * Xen is very picky: it requires a page-aligned LDT that has no
  68. * trailing nonzero bytes in any page that contains LDT descriptors.
  69. * Keep it simple: zero the whole allocation and never allocate less
  70. * than PAGE_SIZE.
  71. */
  72. if (alloc_size > PAGE_SIZE)
  73. new_ldt->entries = vzalloc(alloc_size);
  74. else
  75. new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
  76. if (!new_ldt->entries) {
  77. kfree(new_ldt);
  78. return NULL;
  79. }
  80. /* The new LDT isn't aliased for PTI yet. */
  81. new_ldt->slot = -1;
  82. new_ldt->nr_entries = num_entries;
  83. return new_ldt;
  84. }
  85. /*
  86. * If PTI is enabled, this maps the LDT into the kernelmode and
  87. * usermode tables for the given mm.
  88. *
  89. * There is no corresponding unmap function. Even if the LDT is freed, we
  90. * leave the PTEs around until the slot is reused or the mm is destroyed.
  91. * This is harmless: the LDT is always in ordinary memory, and no one will
  92. * access the freed slot.
  93. *
  94. * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
  95. * it useful, and the flush would slow down modify_ldt().
  96. */
  97. static int
  98. map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
  99. {
  100. #ifdef CONFIG_PAGE_TABLE_ISOLATION
  101. bool is_vmalloc, had_top_level_entry;
  102. unsigned long va;
  103. spinlock_t *ptl;
  104. pgd_t *pgd;
  105. int i;
  106. if (!static_cpu_has(X86_FEATURE_PTI))
  107. return 0;
  108. /*
  109. * Any given ldt_struct should have map_ldt_struct() called at most
  110. * once.
  111. */
  112. WARN_ON(ldt->slot != -1);
  113. /*
  114. * Did we already have the top level entry allocated? We can't
  115. * use pgd_none() for this because it doens't do anything on
  116. * 4-level page table kernels.
  117. */
  118. pgd = pgd_offset(mm, LDT_BASE_ADDR);
  119. had_top_level_entry = (pgd->pgd != 0);
  120. is_vmalloc = is_vmalloc_addr(ldt->entries);
  121. for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
  122. unsigned long offset = i << PAGE_SHIFT;
  123. const void *src = (char *)ldt->entries + offset;
  124. unsigned long pfn;
  125. pte_t pte, *ptep;
  126. va = (unsigned long)ldt_slot_va(slot) + offset;
  127. pfn = is_vmalloc ? vmalloc_to_pfn(src) :
  128. page_to_pfn(virt_to_page(src));
  129. /*
  130. * Treat the PTI LDT range as a *userspace* range.
  131. * get_locked_pte() will allocate all needed pagetables
  132. * and account for them in this mm.
  133. */
  134. ptep = get_locked_pte(mm, va, &ptl);
  135. if (!ptep)
  136. return -ENOMEM;
  137. /*
  138. * Map it RO so the easy to find address is not a primary
  139. * target via some kernel interface which misses a
  140. * permission check.
  141. */
  142. pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
  143. set_pte_at(mm, va, ptep, pte);
  144. pte_unmap_unlock(ptep, ptl);
  145. }
  146. if (mm->context.ldt) {
  147. /*
  148. * We already had an LDT. The top-level entry should already
  149. * have been allocated and synchronized with the usermode
  150. * tables.
  151. */
  152. WARN_ON(!had_top_level_entry);
  153. if (static_cpu_has(X86_FEATURE_PTI))
  154. WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
  155. } else {
  156. /*
  157. * This is the first time we're mapping an LDT for this process.
  158. * Sync the pgd to the usermode tables.
  159. */
  160. WARN_ON(had_top_level_entry);
  161. if (static_cpu_has(X86_FEATURE_PTI)) {
  162. WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
  163. set_pgd(kernel_to_user_pgdp(pgd), *pgd);
  164. }
  165. }
  166. va = (unsigned long)ldt_slot_va(slot);
  167. flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
  168. ldt->slot = slot;
  169. #endif
  170. return 0;
  171. }
  172. static void free_ldt_pgtables(struct mm_struct *mm)
  173. {
  174. #ifdef CONFIG_PAGE_TABLE_ISOLATION
  175. struct mmu_gather tlb;
  176. unsigned long start = LDT_BASE_ADDR;
  177. unsigned long end = start + (1UL << PGDIR_SHIFT);
  178. if (!static_cpu_has(X86_FEATURE_PTI))
  179. return;
  180. tlb_gather_mmu(&tlb, mm, start, end);
  181. free_pgd_range(&tlb, start, end, start, end);
  182. tlb_finish_mmu(&tlb, start, end);
  183. #endif
  184. }
  185. /* After calling this, the LDT is immutable. */
  186. static void finalize_ldt_struct(struct ldt_struct *ldt)
  187. {
  188. paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
  189. }
  190. static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
  191. {
  192. mutex_lock(&mm->context.lock);
  193. /* Synchronizes with READ_ONCE in load_mm_ldt. */
  194. smp_store_release(&mm->context.ldt, ldt);
  195. /* Activate the LDT for all CPUs using currents mm. */
  196. on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
  197. mutex_unlock(&mm->context.lock);
  198. }
  199. static void free_ldt_struct(struct ldt_struct *ldt)
  200. {
  201. if (likely(!ldt))
  202. return;
  203. paravirt_free_ldt(ldt->entries, ldt->nr_entries);
  204. if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
  205. vfree_atomic(ldt->entries);
  206. else
  207. free_page((unsigned long)ldt->entries);
  208. kfree(ldt);
  209. }
  210. /*
  211. * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
  212. * the new task is not running, so nothing can be installed.
  213. */
  214. int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
  215. {
  216. struct ldt_struct *new_ldt;
  217. int retval = 0;
  218. if (!old_mm)
  219. return 0;
  220. mutex_lock(&old_mm->context.lock);
  221. if (!old_mm->context.ldt)
  222. goto out_unlock;
  223. new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
  224. if (!new_ldt) {
  225. retval = -ENOMEM;
  226. goto out_unlock;
  227. }
  228. memcpy(new_ldt->entries, old_mm->context.ldt->entries,
  229. new_ldt->nr_entries * LDT_ENTRY_SIZE);
  230. finalize_ldt_struct(new_ldt);
  231. retval = map_ldt_struct(mm, new_ldt, 0);
  232. if (retval) {
  233. free_ldt_pgtables(mm);
  234. free_ldt_struct(new_ldt);
  235. goto out_unlock;
  236. }
  237. mm->context.ldt = new_ldt;
  238. out_unlock:
  239. mutex_unlock(&old_mm->context.lock);
  240. return retval;
  241. }
  242. /*
  243. * No need to lock the MM as we are the last user
  244. *
  245. * 64bit: Don't touch the LDT register - we're already in the next thread.
  246. */
  247. void destroy_context_ldt(struct mm_struct *mm)
  248. {
  249. free_ldt_struct(mm->context.ldt);
  250. mm->context.ldt = NULL;
  251. }
  252. void ldt_arch_exit_mmap(struct mm_struct *mm)
  253. {
  254. free_ldt_pgtables(mm);
  255. }
  256. static int read_ldt(void __user *ptr, unsigned long bytecount)
  257. {
  258. struct mm_struct *mm = current->mm;
  259. unsigned long entries_size;
  260. int retval;
  261. down_read(&mm->context.ldt_usr_sem);
  262. if (!mm->context.ldt) {
  263. retval = 0;
  264. goto out_unlock;
  265. }
  266. if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
  267. bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
  268. entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
  269. if (entries_size > bytecount)
  270. entries_size = bytecount;
  271. if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
  272. retval = -EFAULT;
  273. goto out_unlock;
  274. }
  275. if (entries_size != bytecount) {
  276. /* Zero-fill the rest and pretend we read bytecount bytes. */
  277. if (clear_user(ptr + entries_size, bytecount - entries_size)) {
  278. retval = -EFAULT;
  279. goto out_unlock;
  280. }
  281. }
  282. retval = bytecount;
  283. out_unlock:
  284. up_read(&mm->context.ldt_usr_sem);
  285. return retval;
  286. }
  287. static int read_default_ldt(void __user *ptr, unsigned long bytecount)
  288. {
  289. /* CHECKME: Can we use _one_ random number ? */
  290. #ifdef CONFIG_X86_32
  291. unsigned long size = 5 * sizeof(struct desc_struct);
  292. #else
  293. unsigned long size = 128;
  294. #endif
  295. if (bytecount > size)
  296. bytecount = size;
  297. if (clear_user(ptr, bytecount))
  298. return -EFAULT;
  299. return bytecount;
  300. }
  301. static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
  302. {
  303. struct mm_struct *mm = current->mm;
  304. struct ldt_struct *new_ldt, *old_ldt;
  305. unsigned int old_nr_entries, new_nr_entries;
  306. struct user_desc ldt_info;
  307. struct desc_struct ldt;
  308. int error;
  309. error = -EINVAL;
  310. if (bytecount != sizeof(ldt_info))
  311. goto out;
  312. error = -EFAULT;
  313. if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
  314. goto out;
  315. error = -EINVAL;
  316. if (ldt_info.entry_number >= LDT_ENTRIES)
  317. goto out;
  318. if (ldt_info.contents == 3) {
  319. if (oldmode)
  320. goto out;
  321. if (ldt_info.seg_not_present == 0)
  322. goto out;
  323. }
  324. if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
  325. LDT_empty(&ldt_info)) {
  326. /* The user wants to clear the entry. */
  327. memset(&ldt, 0, sizeof(ldt));
  328. } else {
  329. if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
  330. error = -EINVAL;
  331. goto out;
  332. }
  333. fill_ldt(&ldt, &ldt_info);
  334. if (oldmode)
  335. ldt.avl = 0;
  336. }
  337. if (down_write_killable(&mm->context.ldt_usr_sem))
  338. return -EINTR;
  339. old_ldt = mm->context.ldt;
  340. old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
  341. new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
  342. error = -ENOMEM;
  343. new_ldt = alloc_ldt_struct(new_nr_entries);
  344. if (!new_ldt)
  345. goto out_unlock;
  346. if (old_ldt)
  347. memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
  348. new_ldt->entries[ldt_info.entry_number] = ldt;
  349. finalize_ldt_struct(new_ldt);
  350. /*
  351. * If we are using PTI, map the new LDT into the userspace pagetables.
  352. * If there is already an LDT, use the other slot so that other CPUs
  353. * will continue to use the old LDT until install_ldt() switches
  354. * them over to the new LDT.
  355. */
  356. error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
  357. if (error) {
  358. /*
  359. * This only can fail for the first LDT setup. If an LDT is
  360. * already installed then the PTE page is already
  361. * populated. Mop up a half populated page table.
  362. */
  363. if (!WARN_ON_ONCE(old_ldt))
  364. free_ldt_pgtables(mm);
  365. free_ldt_struct(new_ldt);
  366. goto out_unlock;
  367. }
  368. install_ldt(mm, new_ldt);
  369. free_ldt_struct(old_ldt);
  370. error = 0;
  371. out_unlock:
  372. up_write(&mm->context.ldt_usr_sem);
  373. out:
  374. return error;
  375. }
  376. SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
  377. unsigned long , bytecount)
  378. {
  379. int ret = -ENOSYS;
  380. switch (func) {
  381. case 0:
  382. ret = read_ldt(ptr, bytecount);
  383. break;
  384. case 1:
  385. ret = write_ldt(ptr, bytecount, 1);
  386. break;
  387. case 2:
  388. ret = read_default_ldt(ptr, bytecount);
  389. break;
  390. case 0x11:
  391. ret = write_ldt(ptr, bytecount, 0);
  392. break;
  393. }
  394. /*
  395. * The SYSCALL_DEFINE() macros give us an 'unsigned long'
  396. * return type, but tht ABI for sys_modify_ldt() expects
  397. * 'int'. This cast gives us an int-sized value in %rax
  398. * for the return code. The 'unsigned' is necessary so
  399. * the compiler does not try to sign-extend the negative
  400. * return codes into the high half of the register when
  401. * taking the value from int->long.
  402. */
  403. return (unsigned int)ret;
  404. }