process.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  2. #include <linux/errno.h>
  3. #include <linux/kernel.h>
  4. #include <linux/mm.h>
  5. #include <linux/smp.h>
  6. #include <linux/prctl.h>
  7. #include <linux/slab.h>
  8. #include <linux/sched.h>
  9. #include <linux/module.h>
  10. #include <linux/pm.h>
  11. #include <linux/tick.h>
  12. #include <linux/random.h>
  13. #include <linux/user-return-notifier.h>
  14. #include <linux/dmi.h>
  15. #include <linux/utsname.h>
  16. #include <linux/stackprotector.h>
  17. #include <linux/tick.h>
  18. #include <linux/cpuidle.h>
  19. #include <trace/events/power.h>
  20. #include <linux/hw_breakpoint.h>
  21. #include <asm/cpu.h>
  22. #include <asm/apic.h>
  23. #include <asm/syscalls.h>
  24. #include <asm/idle.h>
  25. #include <asm/uaccess.h>
  26. #include <asm/i387.h>
  27. #include <asm/fpu-internal.h>
  28. #include <asm/debugreg.h>
  29. #include <asm/nmi.h>
  30. #include <asm/tlbflush.h>
  31. /*
  32. * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  33. * no more per-task TSS's. The TSS size is kept cacheline-aligned
  34. * so they are allowed to end up in the .data..cacheline_aligned
  35. * section. Since TSS's are completely CPU-local, we want them
  36. * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  37. */
  38. __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
  39. #ifdef CONFIG_X86_64
  40. static DEFINE_PER_CPU(unsigned char, is_idle);
  41. static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  42. void idle_notifier_register(struct notifier_block *n)
  43. {
  44. atomic_notifier_chain_register(&idle_notifier, n);
  45. }
  46. EXPORT_SYMBOL_GPL(idle_notifier_register);
  47. void idle_notifier_unregister(struct notifier_block *n)
  48. {
  49. atomic_notifier_chain_unregister(&idle_notifier, n);
  50. }
  51. EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  52. #endif
  53. struct kmem_cache *task_xstate_cachep;
  54. EXPORT_SYMBOL_GPL(task_xstate_cachep);
  55. /*
  56. * this gets called so that we can store lazy state into memory and copy the
  57. * current task into the new thread.
  58. */
  59. int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  60. {
  61. *dst = *src;
  62. dst->thread.fpu_counter = 0;
  63. dst->thread.fpu.has_fpu = 0;
  64. dst->thread.fpu.last_cpu = ~0;
  65. dst->thread.fpu.state = NULL;
  66. if (tsk_used_math(src)) {
  67. int err = fpu_alloc(&dst->thread.fpu);
  68. if (err)
  69. return err;
  70. fpu_copy(dst, src);
  71. }
  72. return 0;
  73. }
  74. void free_thread_xstate(struct task_struct *tsk)
  75. {
  76. fpu_free(&tsk->thread.fpu);
  77. }
  78. void arch_release_task_struct(struct task_struct *tsk)
  79. {
  80. free_thread_xstate(tsk);
  81. }
  82. void arch_task_cache_init(void)
  83. {
  84. task_xstate_cachep =
  85. kmem_cache_create("task_xstate", xstate_size,
  86. __alignof__(union thread_xstate),
  87. SLAB_PANIC | SLAB_NOTRACK, NULL);
  88. setup_xstate_comp();
  89. }
  90. /*
  91. * Free current thread data structures etc..
  92. */
  93. void exit_thread(void)
  94. {
  95. struct task_struct *me = current;
  96. struct thread_struct *t = &me->thread;
  97. unsigned long *bp = t->io_bitmap_ptr;
  98. if (bp) {
  99. struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
  100. t->io_bitmap_ptr = NULL;
  101. clear_thread_flag(TIF_IO_BITMAP);
  102. /*
  103. * Careful, clear this in the TSS too:
  104. */
  105. memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
  106. t->io_bitmap_max = 0;
  107. put_cpu();
  108. kfree(bp);
  109. }
  110. drop_fpu(me);
  111. }
  112. void flush_thread(void)
  113. {
  114. struct task_struct *tsk = current;
  115. flush_ptrace_hw_breakpoint(tsk);
  116. memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
  117. drop_init_fpu(tsk);
  118. /*
  119. * Free the FPU state for non xsave platforms. They get reallocated
  120. * lazily at the first use.
  121. */
  122. if (!use_eager_fpu())
  123. free_thread_xstate(tsk);
  124. }
  125. static void hard_disable_TSC(void)
  126. {
  127. cr4_set_bits(X86_CR4_TSD);
  128. }
  129. void disable_TSC(void)
  130. {
  131. preempt_disable();
  132. if (!test_and_set_thread_flag(TIF_NOTSC))
  133. /*
  134. * Must flip the CPU state synchronously with
  135. * TIF_NOTSC in the current running context.
  136. */
  137. hard_disable_TSC();
  138. preempt_enable();
  139. }
  140. static void hard_enable_TSC(void)
  141. {
  142. cr4_clear_bits(X86_CR4_TSD);
  143. }
  144. static void enable_TSC(void)
  145. {
  146. preempt_disable();
  147. if (test_and_clear_thread_flag(TIF_NOTSC))
  148. /*
  149. * Must flip the CPU state synchronously with
  150. * TIF_NOTSC in the current running context.
  151. */
  152. hard_enable_TSC();
  153. preempt_enable();
  154. }
  155. int get_tsc_mode(unsigned long adr)
  156. {
  157. unsigned int val;
  158. if (test_thread_flag(TIF_NOTSC))
  159. val = PR_TSC_SIGSEGV;
  160. else
  161. val = PR_TSC_ENABLE;
  162. return put_user(val, (unsigned int __user *)adr);
  163. }
  164. int set_tsc_mode(unsigned int val)
  165. {
  166. if (val == PR_TSC_SIGSEGV)
  167. disable_TSC();
  168. else if (val == PR_TSC_ENABLE)
  169. enable_TSC();
  170. else
  171. return -EINVAL;
  172. return 0;
  173. }
  174. void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  175. struct tss_struct *tss)
  176. {
  177. struct thread_struct *prev, *next;
  178. prev = &prev_p->thread;
  179. next = &next_p->thread;
  180. if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
  181. test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
  182. unsigned long debugctl = get_debugctlmsr();
  183. debugctl &= ~DEBUGCTLMSR_BTF;
  184. if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
  185. debugctl |= DEBUGCTLMSR_BTF;
  186. update_debugctlmsr(debugctl);
  187. }
  188. if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
  189. test_tsk_thread_flag(next_p, TIF_NOTSC)) {
  190. /* prev and next are different */
  191. if (test_tsk_thread_flag(next_p, TIF_NOTSC))
  192. hard_disable_TSC();
  193. else
  194. hard_enable_TSC();
  195. }
  196. if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
  197. /*
  198. * Copy the relevant range of the IO bitmap.
  199. * Normally this is 128 bytes or less:
  200. */
  201. memcpy(tss->io_bitmap, next->io_bitmap_ptr,
  202. max(prev->io_bitmap_max, next->io_bitmap_max));
  203. } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
  204. /*
  205. * Clear any possible leftover bits:
  206. */
  207. memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
  208. }
  209. propagate_user_return_notify(prev_p, next_p);
  210. }
  211. /*
  212. * Idle related variables and functions
  213. */
  214. unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
  215. EXPORT_SYMBOL(boot_option_idle_override);
  216. static void (*x86_idle)(void);
  217. #ifndef CONFIG_SMP
  218. static inline void play_dead(void)
  219. {
  220. BUG();
  221. }
  222. #endif
  223. #ifdef CONFIG_X86_64
  224. void enter_idle(void)
  225. {
  226. this_cpu_write(is_idle, 1);
  227. atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
  228. }
  229. static void __exit_idle(void)
  230. {
  231. if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
  232. return;
  233. atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
  234. }
  235. /* Called from interrupts to signify idle end */
  236. void exit_idle(void)
  237. {
  238. /* idle loop has pid 0 */
  239. if (current->pid)
  240. return;
  241. __exit_idle();
  242. }
  243. #endif
  244. void arch_cpu_idle_enter(void)
  245. {
  246. local_touch_nmi();
  247. enter_idle();
  248. }
  249. void arch_cpu_idle_exit(void)
  250. {
  251. __exit_idle();
  252. }
  253. void arch_cpu_idle_dead(void)
  254. {
  255. play_dead();
  256. }
  257. /*
  258. * Called from the generic idle code.
  259. */
  260. void arch_cpu_idle(void)
  261. {
  262. x86_idle();
  263. }
  264. /*
  265. * We use this if we don't have any better idle routine..
  266. */
  267. void default_idle(void)
  268. {
  269. trace_cpu_idle_rcuidle(1, smp_processor_id());
  270. safe_halt();
  271. trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
  272. }
  273. #ifdef CONFIG_APM_MODULE
  274. EXPORT_SYMBOL(default_idle);
  275. #endif
  276. #ifdef CONFIG_XEN
  277. bool xen_set_default_idle(void)
  278. {
  279. bool ret = !!x86_idle;
  280. x86_idle = default_idle;
  281. return ret;
  282. }
  283. #endif
  284. void stop_this_cpu(void *dummy)
  285. {
  286. local_irq_disable();
  287. /*
  288. * Remove this CPU:
  289. */
  290. set_cpu_online(smp_processor_id(), false);
  291. disable_local_APIC();
  292. for (;;)
  293. halt();
  294. }
  295. bool amd_e400_c1e_detected;
  296. EXPORT_SYMBOL(amd_e400_c1e_detected);
  297. static cpumask_var_t amd_e400_c1e_mask;
  298. void amd_e400_remove_cpu(int cpu)
  299. {
  300. if (amd_e400_c1e_mask != NULL)
  301. cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
  302. }
  303. /*
  304. * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
  305. * pending message MSR. If we detect C1E, then we handle it the same
  306. * way as C3 power states (local apic timer and TSC stop)
  307. */
  308. static void amd_e400_idle(void)
  309. {
  310. if (!amd_e400_c1e_detected) {
  311. u32 lo, hi;
  312. rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
  313. if (lo & K8_INTP_C1E_ACTIVE_MASK) {
  314. amd_e400_c1e_detected = true;
  315. if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
  316. mark_tsc_unstable("TSC halt in AMD C1E");
  317. pr_info("System has AMD C1E enabled\n");
  318. }
  319. }
  320. if (amd_e400_c1e_detected) {
  321. int cpu = smp_processor_id();
  322. if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
  323. cpumask_set_cpu(cpu, amd_e400_c1e_mask);
  324. /* Force broadcast so ACPI can not interfere. */
  325. tick_broadcast_force();
  326. pr_info("Switch to broadcast mode on CPU%d\n", cpu);
  327. }
  328. clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
  329. default_idle();
  330. /*
  331. * The switch back from broadcast mode needs to be
  332. * called with interrupts disabled.
  333. */
  334. local_irq_disable();
  335. clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
  336. local_irq_enable();
  337. } else
  338. default_idle();
  339. }
  340. void select_idle_routine(const struct cpuinfo_x86 *c)
  341. {
  342. #ifdef CONFIG_SMP
  343. if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
  344. pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
  345. #endif
  346. if (x86_idle || boot_option_idle_override == IDLE_POLL)
  347. return;
  348. if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
  349. /* E400: APIC timer interrupt does not wake up CPU from C1e */
  350. pr_info("using AMD E400 aware idle routine\n");
  351. x86_idle = amd_e400_idle;
  352. } else
  353. x86_idle = default_idle;
  354. }
  355. void __init init_amd_e400_c1e_mask(void)
  356. {
  357. /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
  358. if (x86_idle == amd_e400_idle)
  359. zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
  360. }
  361. static int __init idle_setup(char *str)
  362. {
  363. if (!str)
  364. return -EINVAL;
  365. if (!strcmp(str, "poll")) {
  366. pr_info("using polling idle threads\n");
  367. boot_option_idle_override = IDLE_POLL;
  368. cpu_idle_poll_ctrl(true);
  369. } else if (!strcmp(str, "halt")) {
  370. /*
  371. * When the boot option of idle=halt is added, halt is
  372. * forced to be used for CPU idle. In such case CPU C2/C3
  373. * won't be used again.
  374. * To continue to load the CPU idle driver, don't touch
  375. * the boot_option_idle_override.
  376. */
  377. x86_idle = default_idle;
  378. boot_option_idle_override = IDLE_HALT;
  379. } else if (!strcmp(str, "nomwait")) {
  380. /*
  381. * If the boot option of "idle=nomwait" is added,
  382. * it means that mwait will be disabled for CPU C2/C3
  383. * states. In such case it won't touch the variable
  384. * of boot_option_idle_override.
  385. */
  386. boot_option_idle_override = IDLE_NOMWAIT;
  387. } else
  388. return -1;
  389. return 0;
  390. }
  391. early_param("idle", idle_setup);
  392. unsigned long arch_align_stack(unsigned long sp)
  393. {
  394. if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
  395. sp -= get_random_int() % 8192;
  396. return sp & ~0xf;
  397. }
  398. unsigned long arch_randomize_brk(struct mm_struct *mm)
  399. {
  400. unsigned long range_end = mm->brk + 0x02000000;
  401. return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
  402. }