vma.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /*
  2. * Copyright 2007 Andi Kleen, SUSE Labs.
  3. * Subject to the GPL, v.2
  4. *
  5. * This contains most of the x86 vDSO kernel-side code.
  6. */
  7. #include <linux/mm.h>
  8. #include <linux/err.h>
  9. #include <linux/sched.h>
  10. #include <linux/slab.h>
  11. #include <linux/init.h>
  12. #include <linux/random.h>
  13. #include <linux/elf.h>
  14. #include <linux/cpu.h>
  15. #include <asm/pvclock.h>
  16. #include <asm/vgtod.h>
  17. #include <asm/proto.h>
  18. #include <asm/vdso.h>
  19. #include <asm/vvar.h>
  20. #include <asm/page.h>
  21. #include <asm/hpet.h>
  22. #include <asm/desc.h>
  23. #if defined(CONFIG_X86_64)
  24. unsigned int __read_mostly vdso64_enabled = 1;
  25. #endif
  26. void __init init_vdso_image(const struct vdso_image *image)
  27. {
  28. int i;
  29. int npages = (image->size) / PAGE_SIZE;
  30. BUG_ON(image->size % PAGE_SIZE != 0);
  31. for (i = 0; i < npages; i++)
  32. image->text_mapping.pages[i] =
  33. virt_to_page(image->data + i*PAGE_SIZE);
  34. apply_alternatives((struct alt_instr *)(image->data + image->alt),
  35. (struct alt_instr *)(image->data + image->alt +
  36. image->alt_len));
  37. }
  38. struct linux_binprm;
  39. /*
  40. * Put the vdso above the (randomized) stack with another randomized
  41. * offset. This way there is no hole in the middle of address space.
  42. * To save memory make sure it is still in the same PTE as the stack
  43. * top. This doesn't give that many random bits.
  44. *
  45. * Note that this algorithm is imperfect: the distribution of the vdso
  46. * start address within a PMD is biased toward the end.
  47. *
  48. * Only used for the 64-bit and x32 vdsos.
  49. */
  50. static unsigned long vdso_addr(unsigned long start, unsigned len)
  51. {
  52. #ifdef CONFIG_X86_32
  53. return 0;
  54. #else
  55. unsigned long addr, end;
  56. unsigned offset;
  57. /*
  58. * Round up the start address. It can start out unaligned as a result
  59. * of stack start randomization.
  60. */
  61. start = PAGE_ALIGN(start);
  62. /* Round the lowest possible end address up to a PMD boundary. */
  63. end = (start + len + PMD_SIZE - 1) & PMD_MASK;
  64. if (end >= TASK_SIZE_MAX)
  65. end = TASK_SIZE_MAX;
  66. end -= len;
  67. if (end > start) {
  68. offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
  69. addr = start + (offset << PAGE_SHIFT);
  70. } else {
  71. addr = start;
  72. }
  73. /*
  74. * Forcibly align the final address in case we have a hardware
  75. * issue that requires alignment for performance reasons.
  76. */
  77. addr = align_vdso_addr(addr);
  78. return addr;
  79. #endif
  80. }
  81. static int map_vdso(const struct vdso_image *image, bool calculate_addr)
  82. {
  83. struct mm_struct *mm = current->mm;
  84. struct vm_area_struct *vma;
  85. unsigned long addr, text_start;
  86. int ret = 0;
  87. static struct page *no_pages[] = {NULL};
  88. static struct vm_special_mapping vvar_mapping = {
  89. .name = "[vvar]",
  90. .pages = no_pages,
  91. };
  92. struct pvclock_vsyscall_time_info *pvti;
  93. if (calculate_addr) {
  94. addr = vdso_addr(current->mm->start_stack,
  95. image->size - image->sym_vvar_start);
  96. } else {
  97. addr = 0;
  98. }
  99. down_write(&mm->mmap_sem);
  100. addr = get_unmapped_area(NULL, addr,
  101. image->size - image->sym_vvar_start, 0, 0);
  102. if (IS_ERR_VALUE(addr)) {
  103. ret = addr;
  104. goto up_fail;
  105. }
  106. text_start = addr - image->sym_vvar_start;
  107. current->mm->context.vdso = (void __user *)text_start;
  108. /*
  109. * MAYWRITE to allow gdb to COW and set breakpoints
  110. */
  111. vma = _install_special_mapping(mm,
  112. text_start,
  113. image->size,
  114. VM_READ|VM_EXEC|
  115. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  116. &image->text_mapping);
  117. if (IS_ERR(vma)) {
  118. ret = PTR_ERR(vma);
  119. goto up_fail;
  120. }
  121. vma = _install_special_mapping(mm,
  122. addr,
  123. -image->sym_vvar_start,
  124. VM_READ|VM_MAYREAD,
  125. &vvar_mapping);
  126. if (IS_ERR(vma)) {
  127. ret = PTR_ERR(vma);
  128. goto up_fail;
  129. }
  130. if (image->sym_vvar_page)
  131. ret = remap_pfn_range(vma,
  132. text_start + image->sym_vvar_page,
  133. __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
  134. PAGE_SIZE,
  135. PAGE_READONLY);
  136. if (ret)
  137. goto up_fail;
  138. #ifdef CONFIG_HPET_TIMER
  139. if (hpet_address && image->sym_hpet_page) {
  140. ret = io_remap_pfn_range(vma,
  141. text_start + image->sym_hpet_page,
  142. hpet_address >> PAGE_SHIFT,
  143. PAGE_SIZE,
  144. pgprot_noncached(PAGE_READONLY));
  145. if (ret)
  146. goto up_fail;
  147. }
  148. #endif
  149. pvti = pvclock_pvti_cpu0_va();
  150. if (pvti && image->sym_pvclock_page) {
  151. ret = remap_pfn_range(vma,
  152. text_start + image->sym_pvclock_page,
  153. __pa(pvti) >> PAGE_SHIFT,
  154. PAGE_SIZE,
  155. PAGE_READONLY);
  156. if (ret)
  157. goto up_fail;
  158. }
  159. up_fail:
  160. if (ret)
  161. current->mm->context.vdso = NULL;
  162. up_write(&mm->mmap_sem);
  163. return ret;
  164. }
  165. #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
  166. static int load_vdso32(void)
  167. {
  168. if (vdso32_enabled != 1) /* Other values all mean "disabled" */
  169. return 0;
  170. return map_vdso(&vdso_image_32, false);
  171. }
  172. #endif
  173. #ifdef CONFIG_X86_64
  174. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  175. {
  176. if (!vdso64_enabled)
  177. return 0;
  178. return map_vdso(&vdso_image_64, true);
  179. }
  180. #ifdef CONFIG_COMPAT
  181. int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  182. int uses_interp)
  183. {
  184. #ifdef CONFIG_X86_X32_ABI
  185. if (test_thread_flag(TIF_X32)) {
  186. if (!vdso64_enabled)
  187. return 0;
  188. return map_vdso(&vdso_image_x32, true);
  189. }
  190. #endif
  191. #ifdef CONFIG_IA32_EMULATION
  192. return load_vdso32();
  193. #else
  194. return 0;
  195. #endif
  196. }
  197. #endif
  198. #else
  199. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  200. {
  201. return load_vdso32();
  202. }
  203. #endif
  204. #ifdef CONFIG_X86_64
  205. static __init int vdso_setup(char *s)
  206. {
  207. vdso64_enabled = simple_strtoul(s, NULL, 0);
  208. return 0;
  209. }
  210. __setup("vdso=", vdso_setup);
  211. #endif
  212. #ifdef CONFIG_X86_64
  213. static void vgetcpu_cpu_init(void *arg)
  214. {
  215. int cpu = smp_processor_id();
  216. struct desc_struct d = { };
  217. unsigned long node = 0;
  218. #ifdef CONFIG_NUMA
  219. node = cpu_to_node(cpu);
  220. #endif
  221. if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
  222. write_rdtscp_aux((node << 12) | cpu);
  223. /*
  224. * Store cpu number in limit so that it can be loaded
  225. * quickly in user space in vgetcpu. (12 bits for the CPU
  226. * and 8 bits for the node)
  227. */
  228. d.limit0 = cpu | ((node & 0xf) << 12);
  229. d.limit = node >> 4;
  230. d.type = 5; /* RO data, expand down, accessed */
  231. d.dpl = 3; /* Visible to user code */
  232. d.s = 1; /* Not a system segment */
  233. d.p = 1; /* Present */
  234. d.d = 1; /* 32-bit */
  235. write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
  236. }
  237. static int
  238. vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
  239. {
  240. long cpu = (long)arg;
  241. if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
  242. smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
  243. return NOTIFY_DONE;
  244. }
  245. static int __init init_vdso(void)
  246. {
  247. init_vdso_image(&vdso_image_64);
  248. #ifdef CONFIG_X86_X32_ABI
  249. init_vdso_image(&vdso_image_x32);
  250. #endif
  251. cpu_notifier_register_begin();
  252. on_each_cpu(vgetcpu_cpu_init, NULL, 1);
  253. /* notifier priority > KVM */
  254. __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
  255. cpu_notifier_register_done();
  256. return 0;
  257. }
  258. subsys_initcall(init_vdso);
  259. #endif /* CONFIG_X86_64 */