tlb-radix.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. /*
  2. * TLB flush routines for radix kernels.
  3. *
  4. * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/mm.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/memblock.h>
  14. #include <linux/mmu_context.h>
  15. #include <linux/sched/mm.h>
  16. #include <asm/ppc-opcode.h>
  17. #include <asm/tlb.h>
  18. #include <asm/tlbflush.h>
  19. #include <asm/trace.h>
  20. #include <asm/cputhreads.h>
  21. #define RIC_FLUSH_TLB 0
  22. #define RIC_FLUSH_PWC 1
  23. #define RIC_FLUSH_ALL 2
  24. /*
  25. * tlbiel instruction for radix, set invalidation
  26. * i.e., r=1 and is=01 or is=10 or is=11
  27. */
  28. static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
  29. unsigned int pid,
  30. unsigned int ric, unsigned int prs)
  31. {
  32. unsigned long rb;
  33. unsigned long rs;
  34. rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  35. rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
  36. asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
  37. : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
  38. : "memory");
  39. }
  40. static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
  41. {
  42. unsigned int set;
  43. asm volatile("ptesync": : :"memory");
  44. /*
  45. * Flush the first set of the TLB, and the entire Page Walk Cache
  46. * and partition table entries. Then flush the remaining sets of the
  47. * TLB.
  48. */
  49. tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
  50. for (set = 1; set < num_sets; set++)
  51. tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
  52. /* Do the same for process scoped entries. */
  53. tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
  54. for (set = 1; set < num_sets; set++)
  55. tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
  56. asm volatile("ptesync": : :"memory");
  57. }
  58. void radix__tlbiel_all(unsigned int action)
  59. {
  60. unsigned int is;
  61. switch (action) {
  62. case TLB_INVAL_SCOPE_GLOBAL:
  63. is = 3;
  64. break;
  65. case TLB_INVAL_SCOPE_LPID:
  66. is = 2;
  67. break;
  68. default:
  69. BUG();
  70. }
  71. if (early_cpu_has_feature(CPU_FTR_ARCH_300))
  72. tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
  73. else
  74. WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
  75. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  76. }
  77. static inline void __tlbiel_pid(unsigned long pid, int set,
  78. unsigned long ric)
  79. {
  80. unsigned long rb,rs,prs,r;
  81. rb = PPC_BIT(53); /* IS = 1 */
  82. rb |= set << PPC_BITLSHIFT(51);
  83. rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
  84. prs = 1; /* process scoped */
  85. r = 1; /* radix format */
  86. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  87. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  88. trace_tlbie(0, 1, rb, rs, ric, prs, r);
  89. }
  90. static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
  91. {
  92. unsigned long rb,rs,prs,r;
  93. rb = PPC_BIT(53); /* IS = 1 */
  94. rs = pid << PPC_BITLSHIFT(31);
  95. prs = 1; /* process scoped */
  96. r = 1; /* radix format */
  97. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  98. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  99. trace_tlbie(0, 0, rb, rs, ric, prs, r);
  100. }
  101. static inline void __tlbiel_lpid(unsigned long lpid, int set,
  102. unsigned long ric)
  103. {
  104. unsigned long rb,rs,prs,r;
  105. rb = PPC_BIT(52); /* IS = 2 */
  106. rb |= set << PPC_BITLSHIFT(51);
  107. rs = 0; /* LPID comes from LPIDR */
  108. prs = 0; /* partition scoped */
  109. r = 1; /* radix format */
  110. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  111. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  112. trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
  113. }
  114. static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
  115. {
  116. unsigned long rb,rs,prs,r;
  117. rb = PPC_BIT(52); /* IS = 2 */
  118. rs = lpid;
  119. prs = 0; /* partition scoped */
  120. r = 1; /* radix format */
  121. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  122. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  123. trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
  124. }
  125. static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
  126. unsigned long ric)
  127. {
  128. unsigned long rb,rs,prs,r;
  129. rb = PPC_BIT(52); /* IS = 2 */
  130. rb |= set << PPC_BITLSHIFT(51);
  131. rs = 0; /* LPID comes from LPIDR */
  132. prs = 1; /* process scoped */
  133. r = 1; /* radix format */
  134. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  135. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  136. trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
  137. }
  138. static inline void __tlbiel_va(unsigned long va, unsigned long pid,
  139. unsigned long ap, unsigned long ric)
  140. {
  141. unsigned long rb,rs,prs,r;
  142. rb = va & ~(PPC_BITMASK(52, 63));
  143. rb |= ap << PPC_BITLSHIFT(58);
  144. rs = pid << PPC_BITLSHIFT(31);
  145. prs = 1; /* process scoped */
  146. r = 1; /* radix format */
  147. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  148. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  149. trace_tlbie(0, 1, rb, rs, ric, prs, r);
  150. }
  151. static inline void __tlbie_va(unsigned long va, unsigned long pid,
  152. unsigned long ap, unsigned long ric)
  153. {
  154. unsigned long rb,rs,prs,r;
  155. rb = va & ~(PPC_BITMASK(52, 63));
  156. rb |= ap << PPC_BITLSHIFT(58);
  157. rs = pid << PPC_BITLSHIFT(31);
  158. prs = 1; /* process scoped */
  159. r = 1; /* radix format */
  160. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  161. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  162. trace_tlbie(0, 0, rb, rs, ric, prs, r);
  163. }
  164. static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
  165. unsigned long ap, unsigned long ric)
  166. {
  167. unsigned long rb,rs,prs,r;
  168. rb = va & ~(PPC_BITMASK(52, 63));
  169. rb |= ap << PPC_BITLSHIFT(58);
  170. rs = lpid;
  171. prs = 0; /* partition scoped */
  172. r = 1; /* radix format */
  173. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  174. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  175. trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
  176. }
  177. static inline void fixup_tlbie(void)
  178. {
  179. unsigned long pid = 0;
  180. unsigned long va = ((1UL << 52) - 1);
  181. if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
  182. asm volatile("ptesync": : :"memory");
  183. __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
  184. }
  185. }
  186. static inline void fixup_tlbie_lpid(unsigned long lpid)
  187. {
  188. unsigned long va = ((1UL << 52) - 1);
  189. if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
  190. asm volatile("ptesync": : :"memory");
  191. __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
  192. }
  193. }
  194. /*
  195. * We use 128 set in radix mode and 256 set in hpt mode.
  196. */
  197. static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
  198. {
  199. int set;
  200. asm volatile("ptesync": : :"memory");
  201. /*
  202. * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
  203. * also flush the entire Page Walk Cache.
  204. */
  205. __tlbiel_pid(pid, 0, ric);
  206. /* For PWC, only one flush is needed */
  207. if (ric == RIC_FLUSH_PWC) {
  208. asm volatile("ptesync": : :"memory");
  209. return;
  210. }
  211. /* For the remaining sets, just flush the TLB */
  212. for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
  213. __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
  214. asm volatile("ptesync": : :"memory");
  215. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  216. }
  217. static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
  218. {
  219. asm volatile("ptesync": : :"memory");
  220. /*
  221. * Workaround the fact that the "ric" argument to __tlbie_pid
  222. * must be a compile-time contraint to match the "i" constraint
  223. * in the asm statement.
  224. */
  225. switch (ric) {
  226. case RIC_FLUSH_TLB:
  227. __tlbie_pid(pid, RIC_FLUSH_TLB);
  228. break;
  229. case RIC_FLUSH_PWC:
  230. __tlbie_pid(pid, RIC_FLUSH_PWC);
  231. break;
  232. case RIC_FLUSH_ALL:
  233. default:
  234. __tlbie_pid(pid, RIC_FLUSH_ALL);
  235. }
  236. fixup_tlbie();
  237. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  238. }
  239. static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
  240. {
  241. int set;
  242. VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
  243. asm volatile("ptesync": : :"memory");
  244. /*
  245. * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
  246. * also flush the entire Page Walk Cache.
  247. */
  248. __tlbiel_lpid(lpid, 0, ric);
  249. /* For PWC, only one flush is needed */
  250. if (ric == RIC_FLUSH_PWC) {
  251. asm volatile("ptesync": : :"memory");
  252. return;
  253. }
  254. /* For the remaining sets, just flush the TLB */
  255. for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
  256. __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
  257. asm volatile("ptesync": : :"memory");
  258. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  259. }
  260. static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
  261. {
  262. asm volatile("ptesync": : :"memory");
  263. /*
  264. * Workaround the fact that the "ric" argument to __tlbie_pid
  265. * must be a compile-time contraint to match the "i" constraint
  266. * in the asm statement.
  267. */
  268. switch (ric) {
  269. case RIC_FLUSH_TLB:
  270. __tlbie_lpid(lpid, RIC_FLUSH_TLB);
  271. break;
  272. case RIC_FLUSH_PWC:
  273. __tlbie_lpid(lpid, RIC_FLUSH_PWC);
  274. break;
  275. case RIC_FLUSH_ALL:
  276. default:
  277. __tlbie_lpid(lpid, RIC_FLUSH_ALL);
  278. }
  279. fixup_tlbie_lpid(lpid);
  280. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  281. }
  282. static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
  283. {
  284. int set;
  285. VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
  286. asm volatile("ptesync": : :"memory");
  287. /*
  288. * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
  289. * also flush the entire Page Walk Cache.
  290. */
  291. __tlbiel_lpid_guest(lpid, 0, ric);
  292. /* For PWC, only one flush is needed */
  293. if (ric == RIC_FLUSH_PWC) {
  294. asm volatile("ptesync": : :"memory");
  295. return;
  296. }
  297. /* For the remaining sets, just flush the TLB */
  298. for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
  299. __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
  300. asm volatile("ptesync": : :"memory");
  301. }
  302. static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
  303. unsigned long pid, unsigned long page_size,
  304. unsigned long psize)
  305. {
  306. unsigned long addr;
  307. unsigned long ap = mmu_get_ap(psize);
  308. for (addr = start; addr < end; addr += page_size)
  309. __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
  310. }
  311. static inline void _tlbiel_va(unsigned long va, unsigned long pid,
  312. unsigned long psize, unsigned long ric)
  313. {
  314. unsigned long ap = mmu_get_ap(psize);
  315. asm volatile("ptesync": : :"memory");
  316. __tlbiel_va(va, pid, ap, ric);
  317. asm volatile("ptesync": : :"memory");
  318. }
  319. static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
  320. unsigned long pid, unsigned long page_size,
  321. unsigned long psize, bool also_pwc)
  322. {
  323. asm volatile("ptesync": : :"memory");
  324. if (also_pwc)
  325. __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
  326. __tlbiel_va_range(start, end, pid, page_size, psize);
  327. asm volatile("ptesync": : :"memory");
  328. }
  329. static inline void __tlbie_va_range(unsigned long start, unsigned long end,
  330. unsigned long pid, unsigned long page_size,
  331. unsigned long psize)
  332. {
  333. unsigned long addr;
  334. unsigned long ap = mmu_get_ap(psize);
  335. for (addr = start; addr < end; addr += page_size)
  336. __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
  337. }
  338. static inline void _tlbie_va(unsigned long va, unsigned long pid,
  339. unsigned long psize, unsigned long ric)
  340. {
  341. unsigned long ap = mmu_get_ap(psize);
  342. asm volatile("ptesync": : :"memory");
  343. __tlbie_va(va, pid, ap, ric);
  344. fixup_tlbie();
  345. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  346. }
  347. static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
  348. unsigned long psize, unsigned long ric)
  349. {
  350. unsigned long ap = mmu_get_ap(psize);
  351. asm volatile("ptesync": : :"memory");
  352. __tlbie_lpid_va(va, lpid, ap, ric);
  353. fixup_tlbie_lpid(lpid);
  354. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  355. }
  356. static inline void _tlbie_va_range(unsigned long start, unsigned long end,
  357. unsigned long pid, unsigned long page_size,
  358. unsigned long psize, bool also_pwc)
  359. {
  360. asm volatile("ptesync": : :"memory");
  361. if (also_pwc)
  362. __tlbie_pid(pid, RIC_FLUSH_PWC);
  363. __tlbie_va_range(start, end, pid, page_size, psize);
  364. fixup_tlbie();
  365. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  366. }
  367. /*
  368. * Base TLB flushing operations:
  369. *
  370. * - flush_tlb_mm(mm) flushes the specified mm context TLB's
  371. * - flush_tlb_page(vma, vmaddr) flushes one page
  372. * - flush_tlb_range(vma, start, end) flushes a range of pages
  373. * - flush_tlb_kernel_range(start, end) flushes kernel pages
  374. *
  375. * - local_* variants of page and mm only apply to the current
  376. * processor
  377. */
  378. void radix__local_flush_tlb_mm(struct mm_struct *mm)
  379. {
  380. unsigned long pid;
  381. preempt_disable();
  382. pid = mm->context.id;
  383. if (pid != MMU_NO_CONTEXT)
  384. _tlbiel_pid(pid, RIC_FLUSH_TLB);
  385. preempt_enable();
  386. }
  387. EXPORT_SYMBOL(radix__local_flush_tlb_mm);
  388. #ifndef CONFIG_SMP
  389. void radix__local_flush_all_mm(struct mm_struct *mm)
  390. {
  391. unsigned long pid;
  392. preempt_disable();
  393. pid = mm->context.id;
  394. if (pid != MMU_NO_CONTEXT)
  395. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  396. preempt_enable();
  397. }
  398. EXPORT_SYMBOL(radix__local_flush_all_mm);
  399. #endif /* CONFIG_SMP */
  400. void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
  401. int psize)
  402. {
  403. unsigned long pid;
  404. preempt_disable();
  405. pid = mm->context.id;
  406. if (pid != MMU_NO_CONTEXT)
  407. _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
  408. preempt_enable();
  409. }
  410. void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
  411. {
  412. #ifdef CONFIG_HUGETLB_PAGE
  413. /* need the return fix for nohash.c */
  414. if (is_vm_hugetlb_page(vma))
  415. return radix__local_flush_hugetlb_page(vma, vmaddr);
  416. #endif
  417. radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
  418. }
  419. EXPORT_SYMBOL(radix__local_flush_tlb_page);
  420. static bool mm_is_singlethreaded(struct mm_struct *mm)
  421. {
  422. if (atomic_read(&mm->context.copros) > 0)
  423. return false;
  424. if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
  425. return true;
  426. return false;
  427. }
  428. static bool mm_needs_flush_escalation(struct mm_struct *mm)
  429. {
  430. /*
  431. * P9 nest MMU has issues with the page walk cache
  432. * caching PTEs and not flushing them properly when
  433. * RIC = 0 for a PID/LPID invalidate
  434. */
  435. if (atomic_read(&mm->context.copros) > 0)
  436. return true;
  437. return false;
  438. }
  439. #ifdef CONFIG_SMP
  440. static void do_exit_flush_lazy_tlb(void *arg)
  441. {
  442. struct mm_struct *mm = arg;
  443. unsigned long pid = mm->context.id;
  444. if (current->mm == mm)
  445. return; /* Local CPU */
  446. if (current->active_mm == mm) {
  447. /*
  448. * Must be a kernel thread because sender is single-threaded.
  449. */
  450. BUG_ON(current->mm);
  451. mmgrab(&init_mm);
  452. switch_mm(mm, &init_mm, current);
  453. current->active_mm = &init_mm;
  454. mmdrop(mm);
  455. }
  456. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  457. }
  458. static void exit_flush_lazy_tlbs(struct mm_struct *mm)
  459. {
  460. /*
  461. * Would be nice if this was async so it could be run in
  462. * parallel with our local flush, but generic code does not
  463. * give a good API for it. Could extend the generic code or
  464. * make a special powerpc IPI for flushing TLBs.
  465. * For now it's not too performance critical.
  466. */
  467. smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
  468. (void *)mm, 1);
  469. mm_reset_thread_local(mm);
  470. }
  471. void radix__flush_tlb_mm(struct mm_struct *mm)
  472. {
  473. unsigned long pid;
  474. pid = mm->context.id;
  475. if (unlikely(pid == MMU_NO_CONTEXT))
  476. return;
  477. preempt_disable();
  478. /*
  479. * Order loads of mm_cpumask vs previous stores to clear ptes before
  480. * the invalidate. See barrier in switch_mm_irqs_off
  481. */
  482. smp_mb();
  483. if (!mm_is_thread_local(mm)) {
  484. if (unlikely(mm_is_singlethreaded(mm))) {
  485. exit_flush_lazy_tlbs(mm);
  486. goto local;
  487. }
  488. if (mm_needs_flush_escalation(mm))
  489. _tlbie_pid(pid, RIC_FLUSH_ALL);
  490. else
  491. _tlbie_pid(pid, RIC_FLUSH_TLB);
  492. } else {
  493. local:
  494. _tlbiel_pid(pid, RIC_FLUSH_TLB);
  495. }
  496. preempt_enable();
  497. }
  498. EXPORT_SYMBOL(radix__flush_tlb_mm);
  499. static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
  500. {
  501. unsigned long pid;
  502. pid = mm->context.id;
  503. if (unlikely(pid == MMU_NO_CONTEXT))
  504. return;
  505. preempt_disable();
  506. smp_mb(); /* see radix__flush_tlb_mm */
  507. if (!mm_is_thread_local(mm)) {
  508. if (unlikely(mm_is_singlethreaded(mm))) {
  509. if (!fullmm) {
  510. exit_flush_lazy_tlbs(mm);
  511. goto local;
  512. }
  513. }
  514. _tlbie_pid(pid, RIC_FLUSH_ALL);
  515. } else {
  516. local:
  517. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  518. }
  519. preempt_enable();
  520. }
  521. void radix__flush_all_mm(struct mm_struct *mm)
  522. {
  523. __flush_all_mm(mm, false);
  524. }
  525. EXPORT_SYMBOL(radix__flush_all_mm);
  526. void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
  527. {
  528. tlb->need_flush_all = 1;
  529. }
  530. EXPORT_SYMBOL(radix__flush_tlb_pwc);
  531. void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
  532. int psize)
  533. {
  534. unsigned long pid;
  535. pid = mm->context.id;
  536. if (unlikely(pid == MMU_NO_CONTEXT))
  537. return;
  538. preempt_disable();
  539. smp_mb(); /* see radix__flush_tlb_mm */
  540. if (!mm_is_thread_local(mm)) {
  541. if (unlikely(mm_is_singlethreaded(mm))) {
  542. exit_flush_lazy_tlbs(mm);
  543. goto local;
  544. }
  545. _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
  546. } else {
  547. local:
  548. _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
  549. }
  550. preempt_enable();
  551. }
  552. void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
  553. {
  554. #ifdef CONFIG_HUGETLB_PAGE
  555. if (is_vm_hugetlb_page(vma))
  556. return radix__flush_hugetlb_page(vma, vmaddr);
  557. #endif
  558. radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
  559. }
  560. EXPORT_SYMBOL(radix__flush_tlb_page);
  561. #else /* CONFIG_SMP */
  562. #define radix__flush_all_mm radix__local_flush_all_mm
  563. #endif /* CONFIG_SMP */
  564. void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
  565. {
  566. _tlbie_pid(0, RIC_FLUSH_ALL);
  567. }
  568. EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
  569. #define TLB_FLUSH_ALL -1UL
  570. /*
  571. * Number of pages above which we invalidate the entire PID rather than
  572. * flush individual pages, for local and global flushes respectively.
  573. *
  574. * tlbie goes out to the interconnect and individual ops are more costly.
  575. * It also does not iterate over sets like the local tlbiel variant when
  576. * invalidating a full PID, so it has a far lower threshold to change from
  577. * individual page flushes to full-pid flushes.
  578. */
  579. static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
  580. static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
  581. static inline void __radix__flush_tlb_range(struct mm_struct *mm,
  582. unsigned long start, unsigned long end,
  583. bool flush_all_sizes)
  584. {
  585. unsigned long pid;
  586. unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
  587. unsigned long page_size = 1UL << page_shift;
  588. unsigned long nr_pages = (end - start) >> page_shift;
  589. bool local, full;
  590. pid = mm->context.id;
  591. if (unlikely(pid == MMU_NO_CONTEXT))
  592. return;
  593. preempt_disable();
  594. smp_mb(); /* see radix__flush_tlb_mm */
  595. if (!mm_is_thread_local(mm)) {
  596. if (unlikely(mm_is_singlethreaded(mm))) {
  597. if (end != TLB_FLUSH_ALL) {
  598. exit_flush_lazy_tlbs(mm);
  599. goto is_local;
  600. }
  601. }
  602. local = false;
  603. full = (end == TLB_FLUSH_ALL ||
  604. nr_pages > tlb_single_page_flush_ceiling);
  605. } else {
  606. is_local:
  607. local = true;
  608. full = (end == TLB_FLUSH_ALL ||
  609. nr_pages > tlb_local_single_page_flush_ceiling);
  610. }
  611. if (full) {
  612. if (local) {
  613. _tlbiel_pid(pid, RIC_FLUSH_TLB);
  614. } else {
  615. if (mm_needs_flush_escalation(mm))
  616. _tlbie_pid(pid, RIC_FLUSH_ALL);
  617. else
  618. _tlbie_pid(pid, RIC_FLUSH_TLB);
  619. }
  620. } else {
  621. bool hflush = flush_all_sizes;
  622. bool gflush = flush_all_sizes;
  623. unsigned long hstart, hend;
  624. unsigned long gstart, gend;
  625. if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
  626. hflush = true;
  627. if (hflush) {
  628. hstart = (start + PMD_SIZE - 1) & PMD_MASK;
  629. hend = end & PMD_MASK;
  630. if (hstart == hend)
  631. hflush = false;
  632. }
  633. if (gflush) {
  634. gstart = (start + PUD_SIZE - 1) & PUD_MASK;
  635. gend = end & PUD_MASK;
  636. if (gstart == gend)
  637. gflush = false;
  638. }
  639. asm volatile("ptesync": : :"memory");
  640. if (local) {
  641. __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
  642. if (hflush)
  643. __tlbiel_va_range(hstart, hend, pid,
  644. PMD_SIZE, MMU_PAGE_2M);
  645. if (gflush)
  646. __tlbiel_va_range(gstart, gend, pid,
  647. PUD_SIZE, MMU_PAGE_1G);
  648. asm volatile("ptesync": : :"memory");
  649. } else {
  650. __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
  651. if (hflush)
  652. __tlbie_va_range(hstart, hend, pid,
  653. PMD_SIZE, MMU_PAGE_2M);
  654. if (gflush)
  655. __tlbie_va_range(gstart, gend, pid,
  656. PUD_SIZE, MMU_PAGE_1G);
  657. fixup_tlbie();
  658. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  659. }
  660. }
  661. preempt_enable();
  662. }
  663. void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
  664. unsigned long end)
  665. {
  666. #ifdef CONFIG_HUGETLB_PAGE
  667. if (is_vm_hugetlb_page(vma))
  668. return radix__flush_hugetlb_tlb_range(vma, start, end);
  669. #endif
  670. __radix__flush_tlb_range(vma->vm_mm, start, end, false);
  671. }
  672. EXPORT_SYMBOL(radix__flush_tlb_range);
  673. static int radix_get_mmu_psize(int page_size)
  674. {
  675. int psize;
  676. if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
  677. psize = mmu_virtual_psize;
  678. else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
  679. psize = MMU_PAGE_2M;
  680. else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
  681. psize = MMU_PAGE_1G;
  682. else
  683. return -1;
  684. return psize;
  685. }
  686. /*
  687. * Flush partition scoped LPID address translation for all CPUs.
  688. */
  689. void radix__flush_tlb_lpid_page(unsigned int lpid,
  690. unsigned long addr,
  691. unsigned long page_size)
  692. {
  693. int psize = radix_get_mmu_psize(page_size);
  694. _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
  695. }
  696. EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
  697. /*
  698. * Flush partition scoped PWC from LPID for all CPUs.
  699. */
  700. void radix__flush_pwc_lpid(unsigned int lpid)
  701. {
  702. _tlbie_lpid(lpid, RIC_FLUSH_PWC);
  703. }
  704. EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
  705. /*
  706. * Flush partition scoped translations from LPID (=LPIDR)
  707. */
  708. void radix__local_flush_tlb_lpid(unsigned int lpid)
  709. {
  710. _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
  711. }
  712. EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
  713. /*
  714. * Flush process scoped translations from LPID (=LPIDR).
  715. * Important difference, the guest normally manages its own translations,
  716. * but some cases e.g., vCPU CPU migration require KVM to flush.
  717. */
  718. void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
  719. {
  720. _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
  721. }
  722. EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
  723. static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
  724. unsigned long end, int psize);
  725. void radix__tlb_flush(struct mmu_gather *tlb)
  726. {
  727. int psize = 0;
  728. struct mm_struct *mm = tlb->mm;
  729. int page_size = tlb->page_size;
  730. unsigned long start = tlb->start;
  731. unsigned long end = tlb->end;
  732. /*
  733. * if page size is not something we understand, do a full mm flush
  734. *
  735. * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
  736. * that flushes the process table entry cache upon process teardown.
  737. * See the comment for radix in arch_exit_mmap().
  738. */
  739. if (tlb->fullmm) {
  740. __flush_all_mm(mm, true);
  741. #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
  742. } else if (mm_tlb_flush_nested(mm)) {
  743. /*
  744. * If there is a concurrent invalidation that is clearing ptes,
  745. * then it's possible this invalidation will miss one of those
  746. * cleared ptes and miss flushing the TLB. If this invalidate
  747. * returns before the other one flushes TLBs, that can result
  748. * in it returning while there are still valid TLBs inside the
  749. * range to be invalidated.
  750. *
  751. * See mm/memory.c:tlb_finish_mmu() for more details.
  752. *
  753. * The solution to this is ensure the entire range is always
  754. * flushed here. The problem for powerpc is that the flushes
  755. * are page size specific, so this "forced flush" would not
  756. * do the right thing if there are a mix of page sizes in
  757. * the range to be invalidated. So use __flush_tlb_range
  758. * which invalidates all possible page sizes in the range.
  759. *
  760. * PWC flush probably is not be required because the core code
  761. * shouldn't free page tables in this path, but accounting
  762. * for the possibility makes us a bit more robust.
  763. *
  764. * need_flush_all is an uncommon case because page table
  765. * teardown should be done with exclusive locks held (but
  766. * after locks are dropped another invalidate could come
  767. * in), it could be optimized further if necessary.
  768. */
  769. if (!tlb->need_flush_all)
  770. __radix__flush_tlb_range(mm, start, end, true);
  771. else
  772. radix__flush_all_mm(mm);
  773. #endif
  774. } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
  775. if (!tlb->need_flush_all)
  776. radix__flush_tlb_mm(mm);
  777. else
  778. radix__flush_all_mm(mm);
  779. } else {
  780. if (!tlb->need_flush_all)
  781. radix__flush_tlb_range_psize(mm, start, end, psize);
  782. else
  783. radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
  784. }
  785. tlb->need_flush_all = 0;
  786. }
  787. static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
  788. unsigned long start, unsigned long end,
  789. int psize, bool also_pwc)
  790. {
  791. unsigned long pid;
  792. unsigned int page_shift = mmu_psize_defs[psize].shift;
  793. unsigned long page_size = 1UL << page_shift;
  794. unsigned long nr_pages = (end - start) >> page_shift;
  795. bool local, full;
  796. pid = mm->context.id;
  797. if (unlikely(pid == MMU_NO_CONTEXT))
  798. return;
  799. preempt_disable();
  800. smp_mb(); /* see radix__flush_tlb_mm */
  801. if (!mm_is_thread_local(mm)) {
  802. if (unlikely(mm_is_singlethreaded(mm))) {
  803. if (end != TLB_FLUSH_ALL) {
  804. exit_flush_lazy_tlbs(mm);
  805. goto is_local;
  806. }
  807. }
  808. local = false;
  809. full = (end == TLB_FLUSH_ALL ||
  810. nr_pages > tlb_single_page_flush_ceiling);
  811. } else {
  812. is_local:
  813. local = true;
  814. full = (end == TLB_FLUSH_ALL ||
  815. nr_pages > tlb_local_single_page_flush_ceiling);
  816. }
  817. if (full) {
  818. if (local) {
  819. _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
  820. } else {
  821. if (mm_needs_flush_escalation(mm))
  822. also_pwc = true;
  823. _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
  824. }
  825. } else {
  826. if (local)
  827. _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
  828. else
  829. _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
  830. }
  831. preempt_enable();
  832. }
  833. void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
  834. unsigned long end, int psize)
  835. {
  836. return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
  837. }
  838. static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
  839. unsigned long end, int psize)
  840. {
  841. __radix__flush_tlb_range_psize(mm, start, end, psize, true);
  842. }
  843. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  844. void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
  845. {
  846. unsigned long pid, end;
  847. pid = mm->context.id;
  848. if (unlikely(pid == MMU_NO_CONTEXT))
  849. return;
  850. /* 4k page size, just blow the world */
  851. if (PAGE_SIZE == 0x1000) {
  852. radix__flush_all_mm(mm);
  853. return;
  854. }
  855. end = addr + HPAGE_PMD_SIZE;
  856. /* Otherwise first do the PWC, then iterate the pages. */
  857. preempt_disable();
  858. smp_mb(); /* see radix__flush_tlb_mm */
  859. if (!mm_is_thread_local(mm)) {
  860. if (unlikely(mm_is_singlethreaded(mm))) {
  861. exit_flush_lazy_tlbs(mm);
  862. goto local;
  863. }
  864. _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
  865. goto local;
  866. } else {
  867. local:
  868. _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
  869. }
  870. preempt_enable();
  871. }
  872. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  873. void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
  874. unsigned long start, unsigned long end)
  875. {
  876. radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
  877. }
  878. EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
  879. void radix__flush_tlb_all(void)
  880. {
  881. unsigned long rb,prs,r,rs;
  882. unsigned long ric = RIC_FLUSH_ALL;
  883. rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
  884. prs = 0; /* partition scoped */
  885. r = 1; /* radix format */
  886. rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
  887. asm volatile("ptesync": : :"memory");
  888. /*
  889. * now flush guest entries by passing PRS = 1 and LPID != 0
  890. */
  891. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  892. : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
  893. /*
  894. * now flush host entires by passing PRS = 0 and LPID == 0
  895. */
  896. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  897. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
  898. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  899. }
  900. void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
  901. unsigned long address)
  902. {
  903. /*
  904. * We track page size in pte only for DD1, So we can
  905. * call this only on DD1.
  906. */
  907. if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
  908. VM_WARN_ON(1);
  909. return;
  910. }
  911. if (old_pte & R_PAGE_LARGE)
  912. radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
  913. else
  914. radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
  915. }
  916. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  917. extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
  918. {
  919. unsigned long pid = mm->context.id;
  920. if (unlikely(pid == MMU_NO_CONTEXT))
  921. return;
  922. /*
  923. * If this context hasn't run on that CPU before and KVM is
  924. * around, there's a slim chance that the guest on another
  925. * CPU just brought in obsolete translation into the TLB of
  926. * this CPU due to a bad prefetch using the guest PID on
  927. * the way into the hypervisor.
  928. *
  929. * We work around this here. If KVM is possible, we check if
  930. * any sibling thread is in KVM. If it is, the window may exist
  931. * and thus we flush that PID from the core.
  932. *
  933. * A potential future improvement would be to mark which PIDs
  934. * have never been used on the system and avoid it if the PID
  935. * is new and the process has no other cpumask bit set.
  936. */
  937. if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
  938. int cpu = smp_processor_id();
  939. int sib = cpu_first_thread_sibling(cpu);
  940. bool flush = false;
  941. for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
  942. if (sib == cpu)
  943. continue;
  944. if (!cpu_possible(sib))
  945. continue;
  946. if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
  947. flush = true;
  948. }
  949. if (flush)
  950. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  951. }
  952. }
  953. EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
  954. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */