pageattr.c 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383
  1. /*
  2. * Copyright 2002 Andi Kleen, SuSE Labs.
  3. * Thanks to Ben LaHaise for precious feedback.
  4. */
  5. #include <linux/highmem.h>
  6. #include <linux/memblock.h>
  7. #include <linux/sched.h>
  8. #include <linux/mm.h>
  9. #include <linux/interrupt.h>
  10. #include <linux/seq_file.h>
  11. #include <linux/debugfs.h>
  12. #include <linux/pfn.h>
  13. #include <linux/percpu.h>
  14. #include <linux/gfp.h>
  15. #include <linux/pci.h>
  16. #include <linux/vmalloc.h>
  17. #include <asm/e820/api.h>
  18. #include <asm/processor.h>
  19. #include <asm/tlbflush.h>
  20. #include <asm/sections.h>
  21. #include <asm/setup.h>
  22. #include <linux/uaccess.h>
  23. #include <asm/pgalloc.h>
  24. #include <asm/proto.h>
  25. #include <asm/pat.h>
  26. #include <asm/set_memory.h>
  27. /*
  28. * The current flushing context - we pass it instead of 5 arguments:
  29. */
  30. struct cpa_data {
  31. unsigned long *vaddr;
  32. pgd_t *pgd;
  33. pgprot_t mask_set;
  34. pgprot_t mask_clr;
  35. unsigned long numpages;
  36. int flags;
  37. unsigned long pfn;
  38. unsigned force_split : 1,
  39. force_static_prot : 1;
  40. int curpage;
  41. struct page **pages;
  42. };
  43. enum cpa_warn {
  44. CPA_CONFLICT,
  45. CPA_PROTECT,
  46. CPA_DETECT,
  47. };
  48. static const int cpa_warn_level = CPA_PROTECT;
  49. /*
  50. * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
  51. * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
  52. * entries change the page attribute in parallel to some other cpu
  53. * splitting a large page entry along with changing the attribute.
  54. */
  55. static DEFINE_SPINLOCK(cpa_lock);
  56. #define CPA_FLUSHTLB 1
  57. #define CPA_ARRAY 2
  58. #define CPA_PAGES_ARRAY 4
  59. #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
  60. #ifdef CONFIG_PROC_FS
  61. static unsigned long direct_pages_count[PG_LEVEL_NUM];
  62. void update_page_count(int level, unsigned long pages)
  63. {
  64. /* Protect against CPA */
  65. spin_lock(&pgd_lock);
  66. direct_pages_count[level] += pages;
  67. spin_unlock(&pgd_lock);
  68. }
  69. static void split_page_count(int level)
  70. {
  71. if (direct_pages_count[level] == 0)
  72. return;
  73. direct_pages_count[level]--;
  74. direct_pages_count[level - 1] += PTRS_PER_PTE;
  75. }
  76. void arch_report_meminfo(struct seq_file *m)
  77. {
  78. seq_printf(m, "DirectMap4k: %8lu kB\n",
  79. direct_pages_count[PG_LEVEL_4K] << 2);
  80. #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
  81. seq_printf(m, "DirectMap2M: %8lu kB\n",
  82. direct_pages_count[PG_LEVEL_2M] << 11);
  83. #else
  84. seq_printf(m, "DirectMap4M: %8lu kB\n",
  85. direct_pages_count[PG_LEVEL_2M] << 12);
  86. #endif
  87. if (direct_gbpages)
  88. seq_printf(m, "DirectMap1G: %8lu kB\n",
  89. direct_pages_count[PG_LEVEL_1G] << 20);
  90. }
  91. #else
  92. static inline void split_page_count(int level) { }
  93. #endif
  94. #ifdef CONFIG_X86_CPA_STATISTICS
  95. static unsigned long cpa_1g_checked;
  96. static unsigned long cpa_1g_sameprot;
  97. static unsigned long cpa_1g_preserved;
  98. static unsigned long cpa_2m_checked;
  99. static unsigned long cpa_2m_sameprot;
  100. static unsigned long cpa_2m_preserved;
  101. static unsigned long cpa_4k_install;
  102. static inline void cpa_inc_1g_checked(void)
  103. {
  104. cpa_1g_checked++;
  105. }
  106. static inline void cpa_inc_2m_checked(void)
  107. {
  108. cpa_2m_checked++;
  109. }
  110. static inline void cpa_inc_4k_install(void)
  111. {
  112. cpa_4k_install++;
  113. }
  114. static inline void cpa_inc_lp_sameprot(int level)
  115. {
  116. if (level == PG_LEVEL_1G)
  117. cpa_1g_sameprot++;
  118. else
  119. cpa_2m_sameprot++;
  120. }
  121. static inline void cpa_inc_lp_preserved(int level)
  122. {
  123. if (level == PG_LEVEL_1G)
  124. cpa_1g_preserved++;
  125. else
  126. cpa_2m_preserved++;
  127. }
  128. static int cpastats_show(struct seq_file *m, void *p)
  129. {
  130. seq_printf(m, "1G pages checked: %16lu\n", cpa_1g_checked);
  131. seq_printf(m, "1G pages sameprot: %16lu\n", cpa_1g_sameprot);
  132. seq_printf(m, "1G pages preserved: %16lu\n", cpa_1g_preserved);
  133. seq_printf(m, "2M pages checked: %16lu\n", cpa_2m_checked);
  134. seq_printf(m, "2M pages sameprot: %16lu\n", cpa_2m_sameprot);
  135. seq_printf(m, "2M pages preserved: %16lu\n", cpa_2m_preserved);
  136. seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install);
  137. return 0;
  138. }
  139. static int cpastats_open(struct inode *inode, struct file *file)
  140. {
  141. return single_open(file, cpastats_show, NULL);
  142. }
  143. static const struct file_operations cpastats_fops = {
  144. .open = cpastats_open,
  145. .read = seq_read,
  146. .llseek = seq_lseek,
  147. .release = single_release,
  148. };
  149. static int __init cpa_stats_init(void)
  150. {
  151. debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL,
  152. &cpastats_fops);
  153. return 0;
  154. }
  155. late_initcall(cpa_stats_init);
  156. #else
  157. static inline void cpa_inc_1g_checked(void) { }
  158. static inline void cpa_inc_2m_checked(void) { }
  159. static inline void cpa_inc_4k_install(void) { }
  160. static inline void cpa_inc_lp_sameprot(int level) { }
  161. static inline void cpa_inc_lp_preserved(int level) { }
  162. #endif
  163. static inline int
  164. within(unsigned long addr, unsigned long start, unsigned long end)
  165. {
  166. return addr >= start && addr < end;
  167. }
  168. static inline int
  169. within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
  170. {
  171. return addr >= start && addr <= end;
  172. }
  173. #ifdef CONFIG_X86_64
  174. static inline unsigned long highmap_start_pfn(void)
  175. {
  176. return __pa_symbol(_text) >> PAGE_SHIFT;
  177. }
  178. static inline unsigned long highmap_end_pfn(void)
  179. {
  180. /* Do not reference physical address outside the kernel. */
  181. return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
  182. }
  183. static bool __cpa_pfn_in_highmap(unsigned long pfn)
  184. {
  185. /*
  186. * Kernel text has an alias mapping at a high address, known
  187. * here as "highmap".
  188. */
  189. return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
  190. }
  191. #else
  192. static bool __cpa_pfn_in_highmap(unsigned long pfn)
  193. {
  194. /* There is no highmap on 32-bit */
  195. return false;
  196. }
  197. #endif
  198. /*
  199. * Flushing functions
  200. */
  201. /**
  202. * clflush_cache_range - flush a cache range with clflush
  203. * @vaddr: virtual start address
  204. * @size: number of bytes to flush
  205. *
  206. * clflushopt is an unordered instruction which needs fencing with mfence or
  207. * sfence to avoid ordering issues.
  208. */
  209. void clflush_cache_range(void *vaddr, unsigned int size)
  210. {
  211. const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
  212. void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
  213. void *vend = vaddr + size;
  214. if (p >= vend)
  215. return;
  216. mb();
  217. for (; p < vend; p += clflush_size)
  218. clflushopt(p);
  219. mb();
  220. }
  221. EXPORT_SYMBOL_GPL(clflush_cache_range);
  222. void arch_invalidate_pmem(void *addr, size_t size)
  223. {
  224. clflush_cache_range(addr, size);
  225. }
  226. EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
  227. static void __cpa_flush_all(void *arg)
  228. {
  229. unsigned long cache = (unsigned long)arg;
  230. /*
  231. * Flush all to work around Errata in early athlons regarding
  232. * large page flushing.
  233. */
  234. __flush_tlb_all();
  235. if (cache && boot_cpu_data.x86 >= 4)
  236. wbinvd();
  237. }
  238. static void cpa_flush_all(unsigned long cache)
  239. {
  240. BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
  241. on_each_cpu(__cpa_flush_all, (void *) cache, 1);
  242. }
  243. static bool __cpa_flush_range(unsigned long start, int numpages, int cache)
  244. {
  245. BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
  246. WARN_ON(PAGE_ALIGN(start) != start);
  247. if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
  248. cpa_flush_all(cache);
  249. return true;
  250. }
  251. flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
  252. return !cache;
  253. }
  254. static void cpa_flush_range(unsigned long start, int numpages, int cache)
  255. {
  256. unsigned int i, level;
  257. unsigned long addr;
  258. if (__cpa_flush_range(start, numpages, cache))
  259. return;
  260. /*
  261. * We only need to flush on one CPU,
  262. * clflush is a MESI-coherent instruction that
  263. * will cause all other CPUs to flush the same
  264. * cachelines:
  265. */
  266. for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
  267. pte_t *pte = lookup_address(addr, &level);
  268. /*
  269. * Only flush present addresses:
  270. */
  271. if (pte && (pte_val(*pte) & _PAGE_PRESENT))
  272. clflush_cache_range((void *) addr, PAGE_SIZE);
  273. }
  274. }
  275. static void cpa_flush_array(unsigned long baddr, unsigned long *start,
  276. int numpages, int cache,
  277. int in_flags, struct page **pages)
  278. {
  279. unsigned int i, level;
  280. if (__cpa_flush_range(baddr, numpages, cache))
  281. return;
  282. /*
  283. * We only need to flush on one CPU,
  284. * clflush is a MESI-coherent instruction that
  285. * will cause all other CPUs to flush the same
  286. * cachelines:
  287. */
  288. for (i = 0; i < numpages; i++) {
  289. unsigned long addr;
  290. pte_t *pte;
  291. if (in_flags & CPA_PAGES_ARRAY)
  292. addr = (unsigned long)page_address(pages[i]);
  293. else
  294. addr = start[i];
  295. pte = lookup_address(addr, &level);
  296. /*
  297. * Only flush present addresses:
  298. */
  299. if (pte && (pte_val(*pte) & _PAGE_PRESENT))
  300. clflush_cache_range((void *)addr, PAGE_SIZE);
  301. }
  302. }
  303. static bool overlaps(unsigned long r1_start, unsigned long r1_end,
  304. unsigned long r2_start, unsigned long r2_end)
  305. {
  306. return (r1_start <= r2_end && r1_end >= r2_start) ||
  307. (r2_start <= r1_end && r2_end >= r1_start);
  308. }
  309. #ifdef CONFIG_PCI_BIOS
  310. /*
  311. * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
  312. * based config access (CONFIG_PCI_GOBIOS) support.
  313. */
  314. #define BIOS_PFN PFN_DOWN(BIOS_BEGIN)
  315. #define BIOS_PFN_END PFN_DOWN(BIOS_END - 1)
  316. static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
  317. {
  318. if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
  319. return _PAGE_NX;
  320. return 0;
  321. }
  322. #else
  323. static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
  324. {
  325. return 0;
  326. }
  327. #endif
  328. /*
  329. * The .rodata section needs to be read-only. Using the pfn catches all
  330. * aliases. This also includes __ro_after_init, so do not enforce until
  331. * kernel_set_to_readonly is true.
  332. */
  333. static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
  334. {
  335. unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata));
  336. /*
  337. * Note: __end_rodata is at page aligned and not inclusive, so
  338. * subtract 1 to get the last enforced PFN in the rodata area.
  339. */
  340. epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1;
  341. if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro))
  342. return _PAGE_RW;
  343. return 0;
  344. }
  345. /*
  346. * Protect kernel text against becoming non executable by forbidding
  347. * _PAGE_NX. This protects only the high kernel mapping (_text -> _etext)
  348. * out of which the kernel actually executes. Do not protect the low
  349. * mapping.
  350. *
  351. * This does not cover __inittext since that is gone after boot.
  352. */
  353. static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
  354. {
  355. unsigned long t_end = (unsigned long)_etext - 1;
  356. unsigned long t_start = (unsigned long)_text;
  357. if (overlaps(start, end, t_start, t_end))
  358. return _PAGE_NX;
  359. return 0;
  360. }
  361. #if defined(CONFIG_X86_64)
  362. /*
  363. * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
  364. * kernel text mappings for the large page aligned text, rodata sections
  365. * will be always read-only. For the kernel identity mappings covering the
  366. * holes caused by this alignment can be anything that user asks.
  367. *
  368. * This will preserve the large page mappings for kernel text/data at no
  369. * extra cost.
  370. */
  371. static pgprotval_t protect_kernel_text_ro(unsigned long start,
  372. unsigned long end)
  373. {
  374. unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1;
  375. unsigned long t_start = (unsigned long)_text;
  376. unsigned int level;
  377. if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end))
  378. return 0;
  379. /*
  380. * Don't enforce the !RW mapping for the kernel text mapping, if
  381. * the current mapping is already using small page mapping. No
  382. * need to work hard to preserve large page mappings in this case.
  383. *
  384. * This also fixes the Linux Xen paravirt guest boot failure caused
  385. * by unexpected read-only mappings for kernel identity
  386. * mappings. In this paravirt guest case, the kernel text mapping
  387. * and the kernel identity mapping share the same page-table pages,
  388. * so the protections for kernel text and identity mappings have to
  389. * be the same.
  390. */
  391. if (lookup_address(start, &level) && (level != PG_LEVEL_4K))
  392. return _PAGE_RW;
  393. return 0;
  394. }
  395. #else
  396. static pgprotval_t protect_kernel_text_ro(unsigned long start,
  397. unsigned long end)
  398. {
  399. return 0;
  400. }
  401. #endif
  402. static inline bool conflicts(pgprot_t prot, pgprotval_t val)
  403. {
  404. return (pgprot_val(prot) & ~val) != pgprot_val(prot);
  405. }
  406. static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
  407. unsigned long start, unsigned long end,
  408. unsigned long pfn, const char *txt)
  409. {
  410. static const char *lvltxt[] = {
  411. [CPA_CONFLICT] = "conflict",
  412. [CPA_PROTECT] = "protect",
  413. [CPA_DETECT] = "detect",
  414. };
  415. if (warnlvl > cpa_warn_level || !conflicts(prot, val))
  416. return;
  417. pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n",
  418. lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot),
  419. (unsigned long long)val);
  420. }
  421. /*
  422. * Certain areas of memory on x86 require very specific protection flags,
  423. * for example the BIOS area or kernel text. Callers don't always get this
  424. * right (again, ioremap() on BIOS memory is not uncommon) so this function
  425. * checks and fixes these known static required protection bits.
  426. */
  427. static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
  428. unsigned long pfn, unsigned long npg,
  429. int warnlvl)
  430. {
  431. pgprotval_t forbidden, res;
  432. unsigned long end;
  433. /*
  434. * There is no point in checking RW/NX conflicts when the requested
  435. * mapping is setting the page !PRESENT.
  436. */
  437. if (!(pgprot_val(prot) & _PAGE_PRESENT))
  438. return prot;
  439. /* Operate on the virtual address */
  440. end = start + npg * PAGE_SIZE - 1;
  441. res = protect_kernel_text(start, end);
  442. check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
  443. forbidden = res;
  444. res = protect_kernel_text_ro(start, end);
  445. check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
  446. forbidden |= res;
  447. /* Check the PFN directly */
  448. res = protect_pci_bios(pfn, pfn + npg - 1);
  449. check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX");
  450. forbidden |= res;
  451. res = protect_rodata(pfn, pfn + npg - 1);
  452. check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
  453. forbidden |= res;
  454. return __pgprot(pgprot_val(prot) & ~forbidden);
  455. }
  456. /*
  457. * Lookup the page table entry for a virtual address in a specific pgd.
  458. * Return a pointer to the entry and the level of the mapping.
  459. */
  460. pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
  461. unsigned int *level)
  462. {
  463. p4d_t *p4d;
  464. pud_t *pud;
  465. pmd_t *pmd;
  466. *level = PG_LEVEL_NONE;
  467. if (pgd_none(*pgd))
  468. return NULL;
  469. p4d = p4d_offset(pgd, address);
  470. if (p4d_none(*p4d))
  471. return NULL;
  472. *level = PG_LEVEL_512G;
  473. if (p4d_large(*p4d) || !p4d_present(*p4d))
  474. return (pte_t *)p4d;
  475. pud = pud_offset(p4d, address);
  476. if (pud_none(*pud))
  477. return NULL;
  478. *level = PG_LEVEL_1G;
  479. if (pud_large(*pud) || !pud_present(*pud))
  480. return (pte_t *)pud;
  481. pmd = pmd_offset(pud, address);
  482. if (pmd_none(*pmd))
  483. return NULL;
  484. *level = PG_LEVEL_2M;
  485. if (pmd_large(*pmd) || !pmd_present(*pmd))
  486. return (pte_t *)pmd;
  487. *level = PG_LEVEL_4K;
  488. return pte_offset_kernel(pmd, address);
  489. }
  490. /*
  491. * Lookup the page table entry for a virtual address. Return a pointer
  492. * to the entry and the level of the mapping.
  493. *
  494. * Note: We return pud and pmd either when the entry is marked large
  495. * or when the present bit is not set. Otherwise we would return a
  496. * pointer to a nonexisting mapping.
  497. */
  498. pte_t *lookup_address(unsigned long address, unsigned int *level)
  499. {
  500. return lookup_address_in_pgd(pgd_offset_k(address), address, level);
  501. }
  502. EXPORT_SYMBOL_GPL(lookup_address);
  503. static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
  504. unsigned int *level)
  505. {
  506. if (cpa->pgd)
  507. return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
  508. address, level);
  509. return lookup_address(address, level);
  510. }
  511. /*
  512. * Lookup the PMD entry for a virtual address. Return a pointer to the entry
  513. * or NULL if not present.
  514. */
  515. pmd_t *lookup_pmd_address(unsigned long address)
  516. {
  517. pgd_t *pgd;
  518. p4d_t *p4d;
  519. pud_t *pud;
  520. pgd = pgd_offset_k(address);
  521. if (pgd_none(*pgd))
  522. return NULL;
  523. p4d = p4d_offset(pgd, address);
  524. if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
  525. return NULL;
  526. pud = pud_offset(p4d, address);
  527. if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
  528. return NULL;
  529. return pmd_offset(pud, address);
  530. }
  531. /*
  532. * This is necessary because __pa() does not work on some
  533. * kinds of memory, like vmalloc() or the alloc_remap()
  534. * areas on 32-bit NUMA systems. The percpu areas can
  535. * end up in this kind of memory, for instance.
  536. *
  537. * This could be optimized, but it is only intended to be
  538. * used at inititalization time, and keeping it
  539. * unoptimized should increase the testing coverage for
  540. * the more obscure platforms.
  541. */
  542. phys_addr_t slow_virt_to_phys(void *__virt_addr)
  543. {
  544. unsigned long virt_addr = (unsigned long)__virt_addr;
  545. phys_addr_t phys_addr;
  546. unsigned long offset;
  547. enum pg_level level;
  548. pte_t *pte;
  549. pte = lookup_address(virt_addr, &level);
  550. BUG_ON(!pte);
  551. /*
  552. * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
  553. * before being left-shifted PAGE_SHIFT bits -- this trick is to
  554. * make 32-PAE kernel work correctly.
  555. */
  556. switch (level) {
  557. case PG_LEVEL_1G:
  558. phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
  559. offset = virt_addr & ~PUD_PAGE_MASK;
  560. break;
  561. case PG_LEVEL_2M:
  562. phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
  563. offset = virt_addr & ~PMD_PAGE_MASK;
  564. break;
  565. default:
  566. phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
  567. offset = virt_addr & ~PAGE_MASK;
  568. }
  569. return (phys_addr_t)(phys_addr | offset);
  570. }
  571. EXPORT_SYMBOL_GPL(slow_virt_to_phys);
  572. /*
  573. * Set the new pmd in all the pgds we know about:
  574. */
  575. static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  576. {
  577. /* change init_mm */
  578. set_pte_atomic(kpte, pte);
  579. #ifdef CONFIG_X86_32
  580. if (!SHARED_KERNEL_PMD) {
  581. struct page *page;
  582. list_for_each_entry(page, &pgd_list, lru) {
  583. pgd_t *pgd;
  584. p4d_t *p4d;
  585. pud_t *pud;
  586. pmd_t *pmd;
  587. pgd = (pgd_t *)page_address(page) + pgd_index(address);
  588. p4d = p4d_offset(pgd, address);
  589. pud = pud_offset(p4d, address);
  590. pmd = pmd_offset(pud, address);
  591. set_pte_atomic((pte_t *)pmd, pte);
  592. }
  593. }
  594. #endif
  595. }
  596. static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
  597. {
  598. /*
  599. * _PAGE_GLOBAL means "global page" for present PTEs.
  600. * But, it is also used to indicate _PAGE_PROTNONE
  601. * for non-present PTEs.
  602. *
  603. * This ensures that a _PAGE_GLOBAL PTE going from
  604. * present to non-present is not confused as
  605. * _PAGE_PROTNONE.
  606. */
  607. if (!(pgprot_val(prot) & _PAGE_PRESENT))
  608. pgprot_val(prot) &= ~_PAGE_GLOBAL;
  609. return prot;
  610. }
  611. static int __should_split_large_page(pte_t *kpte, unsigned long address,
  612. struct cpa_data *cpa)
  613. {
  614. unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
  615. pgprot_t old_prot, new_prot, req_prot, chk_prot;
  616. pte_t new_pte, old_pte, *tmp;
  617. enum pg_level level;
  618. /*
  619. * Check for races, another CPU might have split this page
  620. * up already:
  621. */
  622. tmp = _lookup_address_cpa(cpa, address, &level);
  623. if (tmp != kpte)
  624. return 1;
  625. switch (level) {
  626. case PG_LEVEL_2M:
  627. old_prot = pmd_pgprot(*(pmd_t *)kpte);
  628. old_pfn = pmd_pfn(*(pmd_t *)kpte);
  629. cpa_inc_2m_checked();
  630. break;
  631. case PG_LEVEL_1G:
  632. old_prot = pud_pgprot(*(pud_t *)kpte);
  633. old_pfn = pud_pfn(*(pud_t *)kpte);
  634. cpa_inc_1g_checked();
  635. break;
  636. default:
  637. return -EINVAL;
  638. }
  639. psize = page_level_size(level);
  640. pmask = page_level_mask(level);
  641. /*
  642. * Calculate the number of pages, which fit into this large
  643. * page starting at address:
  644. */
  645. lpaddr = (address + psize) & pmask;
  646. numpages = (lpaddr - address) >> PAGE_SHIFT;
  647. if (numpages < cpa->numpages)
  648. cpa->numpages = numpages;
  649. /*
  650. * We are safe now. Check whether the new pgprot is the same:
  651. * Convert protection attributes to 4k-format, as cpa->mask* are set
  652. * up accordingly.
  653. */
  654. old_pte = *kpte;
  655. /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
  656. req_prot = pgprot_large_2_4k(old_prot);
  657. pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
  658. pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
  659. /*
  660. * req_prot is in format of 4k pages. It must be converted to large
  661. * page format: the caching mode includes the PAT bit located at
  662. * different bit positions in the two formats.
  663. */
  664. req_prot = pgprot_4k_2_large(req_prot);
  665. req_prot = pgprot_clear_protnone_bits(req_prot);
  666. if (pgprot_val(req_prot) & _PAGE_PRESENT)
  667. pgprot_val(req_prot) |= _PAGE_PSE;
  668. /*
  669. * old_pfn points to the large page base pfn. So we need to add the
  670. * offset of the virtual address:
  671. */
  672. pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
  673. cpa->pfn = pfn;
  674. /*
  675. * Calculate the large page base address and the number of 4K pages
  676. * in the large page
  677. */
  678. lpaddr = address & pmask;
  679. numpages = psize >> PAGE_SHIFT;
  680. /*
  681. * Sanity check that the existing mapping is correct versus the static
  682. * protections. static_protections() guards against !PRESENT, so no
  683. * extra conditional required here.
  684. */
  685. chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
  686. CPA_CONFLICT);
  687. if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
  688. /*
  689. * Split the large page and tell the split code to
  690. * enforce static protections.
  691. */
  692. cpa->force_static_prot = 1;
  693. return 1;
  694. }
  695. /*
  696. * Optimization: If the requested pgprot is the same as the current
  697. * pgprot, then the large page can be preserved and no updates are
  698. * required independent of alignment and length of the requested
  699. * range. The above already established that the current pgprot is
  700. * correct, which in consequence makes the requested pgprot correct
  701. * as well if it is the same. The static protection scan below will
  702. * not come to a different conclusion.
  703. */
  704. if (pgprot_val(req_prot) == pgprot_val(old_prot)) {
  705. cpa_inc_lp_sameprot(level);
  706. return 0;
  707. }
  708. /*
  709. * If the requested range does not cover the full page, split it up
  710. */
  711. if (address != lpaddr || cpa->numpages != numpages)
  712. return 1;
  713. /*
  714. * Check whether the requested pgprot is conflicting with a static
  715. * protection requirement in the large page.
  716. */
  717. new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
  718. CPA_DETECT);
  719. /*
  720. * If there is a conflict, split the large page.
  721. *
  722. * There used to be a 4k wise evaluation trying really hard to
  723. * preserve the large pages, but experimentation has shown, that this
  724. * does not help at all. There might be corner cases which would
  725. * preserve one large page occasionally, but it's really not worth the
  726. * extra code and cycles for the common case.
  727. */
  728. if (pgprot_val(req_prot) != pgprot_val(new_prot))
  729. return 1;
  730. /* All checks passed. Update the large page mapping. */
  731. new_pte = pfn_pte(old_pfn, new_prot);
  732. __set_pmd_pte(kpte, address, new_pte);
  733. cpa->flags |= CPA_FLUSHTLB;
  734. cpa_inc_lp_preserved(level);
  735. return 0;
  736. }
  737. static int should_split_large_page(pte_t *kpte, unsigned long address,
  738. struct cpa_data *cpa)
  739. {
  740. int do_split;
  741. if (cpa->force_split)
  742. return 1;
  743. spin_lock(&pgd_lock);
  744. do_split = __should_split_large_page(kpte, address, cpa);
  745. spin_unlock(&pgd_lock);
  746. return do_split;
  747. }
  748. static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
  749. pgprot_t ref_prot, unsigned long address,
  750. unsigned long size)
  751. {
  752. unsigned int npg = PFN_DOWN(size);
  753. pgprot_t prot;
  754. /*
  755. * If should_split_large_page() discovered an inconsistent mapping,
  756. * remove the invalid protection in the split mapping.
  757. */
  758. if (!cpa->force_static_prot)
  759. goto set;
  760. prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT);
  761. if (pgprot_val(prot) == pgprot_val(ref_prot))
  762. goto set;
  763. /*
  764. * If this is splitting a PMD, fix it up. PUD splits cannot be
  765. * fixed trivially as that would require to rescan the newly
  766. * installed PMD mappings after returning from split_large_page()
  767. * so an eventual further split can allocate the necessary PTE
  768. * pages. Warn for now and revisit it in case this actually
  769. * happens.
  770. */
  771. if (size == PAGE_SIZE)
  772. ref_prot = prot;
  773. else
  774. pr_warn_once("CPA: Cannot fixup static protections for PUD split\n");
  775. set:
  776. set_pte(pte, pfn_pte(pfn, ref_prot));
  777. }
  778. static int
  779. __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
  780. struct page *base)
  781. {
  782. unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
  783. pte_t *pbase = (pte_t *)page_address(base);
  784. unsigned int i, level;
  785. pgprot_t ref_prot;
  786. pte_t *tmp;
  787. spin_lock(&pgd_lock);
  788. /*
  789. * Check for races, another CPU might have split this page
  790. * up for us already:
  791. */
  792. tmp = _lookup_address_cpa(cpa, address, &level);
  793. if (tmp != kpte) {
  794. spin_unlock(&pgd_lock);
  795. return 1;
  796. }
  797. paravirt_alloc_pte(&init_mm, page_to_pfn(base));
  798. switch (level) {
  799. case PG_LEVEL_2M:
  800. ref_prot = pmd_pgprot(*(pmd_t *)kpte);
  801. /*
  802. * Clear PSE (aka _PAGE_PAT) and move
  803. * PAT bit to correct position.
  804. */
  805. ref_prot = pgprot_large_2_4k(ref_prot);
  806. ref_pfn = pmd_pfn(*(pmd_t *)kpte);
  807. lpaddr = address & PMD_MASK;
  808. lpinc = PAGE_SIZE;
  809. break;
  810. case PG_LEVEL_1G:
  811. ref_prot = pud_pgprot(*(pud_t *)kpte);
  812. ref_pfn = pud_pfn(*(pud_t *)kpte);
  813. pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
  814. lpaddr = address & PUD_MASK;
  815. lpinc = PMD_SIZE;
  816. /*
  817. * Clear the PSE flags if the PRESENT flag is not set
  818. * otherwise pmd_present/pmd_huge will return true
  819. * even on a non present pmd.
  820. */
  821. if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
  822. pgprot_val(ref_prot) &= ~_PAGE_PSE;
  823. break;
  824. default:
  825. spin_unlock(&pgd_lock);
  826. return 1;
  827. }
  828. ref_prot = pgprot_clear_protnone_bits(ref_prot);
  829. /*
  830. * Get the target pfn from the original entry:
  831. */
  832. pfn = ref_pfn;
  833. for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc)
  834. split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc);
  835. if (virt_addr_valid(address)) {
  836. unsigned long pfn = PFN_DOWN(__pa(address));
  837. if (pfn_range_is_mapped(pfn, pfn + 1))
  838. split_page_count(level);
  839. }
  840. /*
  841. * Install the new, split up pagetable.
  842. *
  843. * We use the standard kernel pagetable protections for the new
  844. * pagetable protections, the actual ptes set above control the
  845. * primary protection behavior:
  846. */
  847. __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
  848. /*
  849. * Do a global flush tlb after splitting the large page
  850. * and before we do the actual change page attribute in the PTE.
  851. *
  852. * Without this, we violate the TLB application note, that says:
  853. * "The TLBs may contain both ordinary and large-page
  854. * translations for a 4-KByte range of linear addresses. This
  855. * may occur if software modifies the paging structures so that
  856. * the page size used for the address range changes. If the two
  857. * translations differ with respect to page frame or attributes
  858. * (e.g., permissions), processor behavior is undefined and may
  859. * be implementation-specific."
  860. *
  861. * We do this global tlb flush inside the cpa_lock, so that we
  862. * don't allow any other cpu, with stale tlb entries change the
  863. * page attribute in parallel, that also falls into the
  864. * just split large page entry.
  865. */
  866. flush_tlb_all();
  867. spin_unlock(&pgd_lock);
  868. return 0;
  869. }
  870. static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
  871. unsigned long address)
  872. {
  873. struct page *base;
  874. if (!debug_pagealloc_enabled())
  875. spin_unlock(&cpa_lock);
  876. base = alloc_pages(GFP_KERNEL, 0);
  877. if (!debug_pagealloc_enabled())
  878. spin_lock(&cpa_lock);
  879. if (!base)
  880. return -ENOMEM;
  881. if (__split_large_page(cpa, kpte, address, base))
  882. __free_page(base);
  883. return 0;
  884. }
  885. static bool try_to_free_pte_page(pte_t *pte)
  886. {
  887. int i;
  888. for (i = 0; i < PTRS_PER_PTE; i++)
  889. if (!pte_none(pte[i]))
  890. return false;
  891. free_page((unsigned long)pte);
  892. return true;
  893. }
  894. static bool try_to_free_pmd_page(pmd_t *pmd)
  895. {
  896. int i;
  897. for (i = 0; i < PTRS_PER_PMD; i++)
  898. if (!pmd_none(pmd[i]))
  899. return false;
  900. free_page((unsigned long)pmd);
  901. return true;
  902. }
  903. static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
  904. {
  905. pte_t *pte = pte_offset_kernel(pmd, start);
  906. while (start < end) {
  907. set_pte(pte, __pte(0));
  908. start += PAGE_SIZE;
  909. pte++;
  910. }
  911. if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
  912. pmd_clear(pmd);
  913. return true;
  914. }
  915. return false;
  916. }
  917. static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
  918. unsigned long start, unsigned long end)
  919. {
  920. if (unmap_pte_range(pmd, start, end))
  921. if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
  922. pud_clear(pud);
  923. }
  924. static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
  925. {
  926. pmd_t *pmd = pmd_offset(pud, start);
  927. /*
  928. * Not on a 2MB page boundary?
  929. */
  930. if (start & (PMD_SIZE - 1)) {
  931. unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
  932. unsigned long pre_end = min_t(unsigned long, end, next_page);
  933. __unmap_pmd_range(pud, pmd, start, pre_end);
  934. start = pre_end;
  935. pmd++;
  936. }
  937. /*
  938. * Try to unmap in 2M chunks.
  939. */
  940. while (end - start >= PMD_SIZE) {
  941. if (pmd_large(*pmd))
  942. pmd_clear(pmd);
  943. else
  944. __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
  945. start += PMD_SIZE;
  946. pmd++;
  947. }
  948. /*
  949. * 4K leftovers?
  950. */
  951. if (start < end)
  952. return __unmap_pmd_range(pud, pmd, start, end);
  953. /*
  954. * Try again to free the PMD page if haven't succeeded above.
  955. */
  956. if (!pud_none(*pud))
  957. if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
  958. pud_clear(pud);
  959. }
  960. static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
  961. {
  962. pud_t *pud = pud_offset(p4d, start);
  963. /*
  964. * Not on a GB page boundary?
  965. */
  966. if (start & (PUD_SIZE - 1)) {
  967. unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
  968. unsigned long pre_end = min_t(unsigned long, end, next_page);
  969. unmap_pmd_range(pud, start, pre_end);
  970. start = pre_end;
  971. pud++;
  972. }
  973. /*
  974. * Try to unmap in 1G chunks?
  975. */
  976. while (end - start >= PUD_SIZE) {
  977. if (pud_large(*pud))
  978. pud_clear(pud);
  979. else
  980. unmap_pmd_range(pud, start, start + PUD_SIZE);
  981. start += PUD_SIZE;
  982. pud++;
  983. }
  984. /*
  985. * 2M leftovers?
  986. */
  987. if (start < end)
  988. unmap_pmd_range(pud, start, end);
  989. /*
  990. * No need to try to free the PUD page because we'll free it in
  991. * populate_pgd's error path
  992. */
  993. }
  994. static int alloc_pte_page(pmd_t *pmd)
  995. {
  996. pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
  997. if (!pte)
  998. return -1;
  999. set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
  1000. return 0;
  1001. }
  1002. static int alloc_pmd_page(pud_t *pud)
  1003. {
  1004. pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
  1005. if (!pmd)
  1006. return -1;
  1007. set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
  1008. return 0;
  1009. }
  1010. static void populate_pte(struct cpa_data *cpa,
  1011. unsigned long start, unsigned long end,
  1012. unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
  1013. {
  1014. pte_t *pte;
  1015. pte = pte_offset_kernel(pmd, start);
  1016. pgprot = pgprot_clear_protnone_bits(pgprot);
  1017. while (num_pages-- && start < end) {
  1018. set_pte(pte, pfn_pte(cpa->pfn, pgprot));
  1019. start += PAGE_SIZE;
  1020. cpa->pfn++;
  1021. pte++;
  1022. }
  1023. }
  1024. static long populate_pmd(struct cpa_data *cpa,
  1025. unsigned long start, unsigned long end,
  1026. unsigned num_pages, pud_t *pud, pgprot_t pgprot)
  1027. {
  1028. long cur_pages = 0;
  1029. pmd_t *pmd;
  1030. pgprot_t pmd_pgprot;
  1031. /*
  1032. * Not on a 2M boundary?
  1033. */
  1034. if (start & (PMD_SIZE - 1)) {
  1035. unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
  1036. unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
  1037. pre_end = min_t(unsigned long, pre_end, next_page);
  1038. cur_pages = (pre_end - start) >> PAGE_SHIFT;
  1039. cur_pages = min_t(unsigned int, num_pages, cur_pages);
  1040. /*
  1041. * Need a PTE page?
  1042. */
  1043. pmd = pmd_offset(pud, start);
  1044. if (pmd_none(*pmd))
  1045. if (alloc_pte_page(pmd))
  1046. return -1;
  1047. populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
  1048. start = pre_end;
  1049. }
  1050. /*
  1051. * We mapped them all?
  1052. */
  1053. if (num_pages == cur_pages)
  1054. return cur_pages;
  1055. pmd_pgprot = pgprot_4k_2_large(pgprot);
  1056. while (end - start >= PMD_SIZE) {
  1057. /*
  1058. * We cannot use a 1G page so allocate a PMD page if needed.
  1059. */
  1060. if (pud_none(*pud))
  1061. if (alloc_pmd_page(pud))
  1062. return -1;
  1063. pmd = pmd_offset(pud, start);
  1064. set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
  1065. canon_pgprot(pmd_pgprot))));
  1066. start += PMD_SIZE;
  1067. cpa->pfn += PMD_SIZE >> PAGE_SHIFT;
  1068. cur_pages += PMD_SIZE >> PAGE_SHIFT;
  1069. }
  1070. /*
  1071. * Map trailing 4K pages.
  1072. */
  1073. if (start < end) {
  1074. pmd = pmd_offset(pud, start);
  1075. if (pmd_none(*pmd))
  1076. if (alloc_pte_page(pmd))
  1077. return -1;
  1078. populate_pte(cpa, start, end, num_pages - cur_pages,
  1079. pmd, pgprot);
  1080. }
  1081. return num_pages;
  1082. }
  1083. static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
  1084. pgprot_t pgprot)
  1085. {
  1086. pud_t *pud;
  1087. unsigned long end;
  1088. long cur_pages = 0;
  1089. pgprot_t pud_pgprot;
  1090. end = start + (cpa->numpages << PAGE_SHIFT);
  1091. /*
  1092. * Not on a Gb page boundary? => map everything up to it with
  1093. * smaller pages.
  1094. */
  1095. if (start & (PUD_SIZE - 1)) {
  1096. unsigned long pre_end;
  1097. unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
  1098. pre_end = min_t(unsigned long, end, next_page);
  1099. cur_pages = (pre_end - start) >> PAGE_SHIFT;
  1100. cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
  1101. pud = pud_offset(p4d, start);
  1102. /*
  1103. * Need a PMD page?
  1104. */
  1105. if (pud_none(*pud))
  1106. if (alloc_pmd_page(pud))
  1107. return -1;
  1108. cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
  1109. pud, pgprot);
  1110. if (cur_pages < 0)
  1111. return cur_pages;
  1112. start = pre_end;
  1113. }
  1114. /* We mapped them all? */
  1115. if (cpa->numpages == cur_pages)
  1116. return cur_pages;
  1117. pud = pud_offset(p4d, start);
  1118. pud_pgprot = pgprot_4k_2_large(pgprot);
  1119. /*
  1120. * Map everything starting from the Gb boundary, possibly with 1G pages
  1121. */
  1122. while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
  1123. set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
  1124. canon_pgprot(pud_pgprot))));
  1125. start += PUD_SIZE;
  1126. cpa->pfn += PUD_SIZE >> PAGE_SHIFT;
  1127. cur_pages += PUD_SIZE >> PAGE_SHIFT;
  1128. pud++;
  1129. }
  1130. /* Map trailing leftover */
  1131. if (start < end) {
  1132. long tmp;
  1133. pud = pud_offset(p4d, start);
  1134. if (pud_none(*pud))
  1135. if (alloc_pmd_page(pud))
  1136. return -1;
  1137. tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
  1138. pud, pgprot);
  1139. if (tmp < 0)
  1140. return cur_pages;
  1141. cur_pages += tmp;
  1142. }
  1143. return cur_pages;
  1144. }
  1145. /*
  1146. * Restrictions for kernel page table do not necessarily apply when mapping in
  1147. * an alternate PGD.
  1148. */
  1149. static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
  1150. {
  1151. pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
  1152. pud_t *pud = NULL; /* shut up gcc */
  1153. p4d_t *p4d;
  1154. pgd_t *pgd_entry;
  1155. long ret;
  1156. pgd_entry = cpa->pgd + pgd_index(addr);
  1157. if (pgd_none(*pgd_entry)) {
  1158. p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
  1159. if (!p4d)
  1160. return -1;
  1161. set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
  1162. }
  1163. /*
  1164. * Allocate a PUD page and hand it down for mapping.
  1165. */
  1166. p4d = p4d_offset(pgd_entry, addr);
  1167. if (p4d_none(*p4d)) {
  1168. pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
  1169. if (!pud)
  1170. return -1;
  1171. set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
  1172. }
  1173. pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
  1174. pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
  1175. ret = populate_pud(cpa, addr, p4d, pgprot);
  1176. if (ret < 0) {
  1177. /*
  1178. * Leave the PUD page in place in case some other CPU or thread
  1179. * already found it, but remove any useless entries we just
  1180. * added to it.
  1181. */
  1182. unmap_pud_range(p4d, addr,
  1183. addr + (cpa->numpages << PAGE_SHIFT));
  1184. return ret;
  1185. }
  1186. cpa->numpages = ret;
  1187. return 0;
  1188. }
  1189. static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
  1190. int primary)
  1191. {
  1192. if (cpa->pgd) {
  1193. /*
  1194. * Right now, we only execute this code path when mapping
  1195. * the EFI virtual memory map regions, no other users
  1196. * provide a ->pgd value. This may change in the future.
  1197. */
  1198. return populate_pgd(cpa, vaddr);
  1199. }
  1200. /*
  1201. * Ignore all non primary paths.
  1202. */
  1203. if (!primary) {
  1204. cpa->numpages = 1;
  1205. return 0;
  1206. }
  1207. /*
  1208. * Ignore the NULL PTE for kernel identity mapping, as it is expected
  1209. * to have holes.
  1210. * Also set numpages to '1' indicating that we processed cpa req for
  1211. * one virtual address page and its pfn. TBD: numpages can be set based
  1212. * on the initial value and the level returned by lookup_address().
  1213. */
  1214. if (within(vaddr, PAGE_OFFSET,
  1215. PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
  1216. cpa->numpages = 1;
  1217. cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
  1218. return 0;
  1219. } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
  1220. /* Faults in the highmap are OK, so do not warn: */
  1221. return -EFAULT;
  1222. } else {
  1223. WARN(1, KERN_WARNING "CPA: called for zero pte. "
  1224. "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
  1225. *cpa->vaddr);
  1226. return -EFAULT;
  1227. }
  1228. }
  1229. static int __change_page_attr(struct cpa_data *cpa, int primary)
  1230. {
  1231. unsigned long address;
  1232. int do_split, err;
  1233. unsigned int level;
  1234. pte_t *kpte, old_pte;
  1235. if (cpa->flags & CPA_PAGES_ARRAY) {
  1236. struct page *page = cpa->pages[cpa->curpage];
  1237. if (unlikely(PageHighMem(page)))
  1238. return 0;
  1239. address = (unsigned long)page_address(page);
  1240. } else if (cpa->flags & CPA_ARRAY)
  1241. address = cpa->vaddr[cpa->curpage];
  1242. else
  1243. address = *cpa->vaddr;
  1244. repeat:
  1245. kpte = _lookup_address_cpa(cpa, address, &level);
  1246. if (!kpte)
  1247. return __cpa_process_fault(cpa, address, primary);
  1248. old_pte = *kpte;
  1249. if (pte_none(old_pte))
  1250. return __cpa_process_fault(cpa, address, primary);
  1251. if (level == PG_LEVEL_4K) {
  1252. pte_t new_pte;
  1253. pgprot_t new_prot = pte_pgprot(old_pte);
  1254. unsigned long pfn = pte_pfn(old_pte);
  1255. pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
  1256. pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
  1257. cpa_inc_4k_install();
  1258. new_prot = static_protections(new_prot, address, pfn, 1,
  1259. CPA_PROTECT);
  1260. new_prot = pgprot_clear_protnone_bits(new_prot);
  1261. /*
  1262. * We need to keep the pfn from the existing PTE,
  1263. * after all we're only going to change it's attributes
  1264. * not the memory it points to
  1265. */
  1266. new_pte = pfn_pte(pfn, new_prot);
  1267. cpa->pfn = pfn;
  1268. /*
  1269. * Do we really change anything ?
  1270. */
  1271. if (pte_val(old_pte) != pte_val(new_pte)) {
  1272. set_pte_atomic(kpte, new_pte);
  1273. cpa->flags |= CPA_FLUSHTLB;
  1274. }
  1275. cpa->numpages = 1;
  1276. return 0;
  1277. }
  1278. /*
  1279. * Check, whether we can keep the large page intact
  1280. * and just change the pte:
  1281. */
  1282. do_split = should_split_large_page(kpte, address, cpa);
  1283. /*
  1284. * When the range fits into the existing large page,
  1285. * return. cp->numpages and cpa->tlbflush have been updated in
  1286. * try_large_page:
  1287. */
  1288. if (do_split <= 0)
  1289. return do_split;
  1290. /*
  1291. * We have to split the large page:
  1292. */
  1293. err = split_large_page(cpa, kpte, address);
  1294. if (!err)
  1295. goto repeat;
  1296. return err;
  1297. }
  1298. static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
  1299. static int cpa_process_alias(struct cpa_data *cpa)
  1300. {
  1301. struct cpa_data alias_cpa;
  1302. unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
  1303. unsigned long vaddr;
  1304. int ret;
  1305. if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
  1306. return 0;
  1307. /*
  1308. * No need to redo, when the primary call touched the direct
  1309. * mapping already:
  1310. */
  1311. if (cpa->flags & CPA_PAGES_ARRAY) {
  1312. struct page *page = cpa->pages[cpa->curpage];
  1313. if (unlikely(PageHighMem(page)))
  1314. return 0;
  1315. vaddr = (unsigned long)page_address(page);
  1316. } else if (cpa->flags & CPA_ARRAY)
  1317. vaddr = cpa->vaddr[cpa->curpage];
  1318. else
  1319. vaddr = *cpa->vaddr;
  1320. if (!(within(vaddr, PAGE_OFFSET,
  1321. PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
  1322. alias_cpa = *cpa;
  1323. alias_cpa.vaddr = &laddr;
  1324. alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
  1325. ret = __change_page_attr_set_clr(&alias_cpa, 0);
  1326. if (ret)
  1327. return ret;
  1328. }
  1329. #ifdef CONFIG_X86_64
  1330. /*
  1331. * If the primary call didn't touch the high mapping already
  1332. * and the physical address is inside the kernel map, we need
  1333. * to touch the high mapped kernel as well:
  1334. */
  1335. if (!within(vaddr, (unsigned long)_text, _brk_end) &&
  1336. __cpa_pfn_in_highmap(cpa->pfn)) {
  1337. unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
  1338. __START_KERNEL_map - phys_base;
  1339. alias_cpa = *cpa;
  1340. alias_cpa.vaddr = &temp_cpa_vaddr;
  1341. alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
  1342. /*
  1343. * The high mapping range is imprecise, so ignore the
  1344. * return value.
  1345. */
  1346. __change_page_attr_set_clr(&alias_cpa, 0);
  1347. }
  1348. #endif
  1349. return 0;
  1350. }
  1351. static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
  1352. {
  1353. unsigned long numpages = cpa->numpages;
  1354. int ret;
  1355. while (numpages) {
  1356. /*
  1357. * Store the remaining nr of pages for the large page
  1358. * preservation check.
  1359. */
  1360. cpa->numpages = numpages;
  1361. /* for array changes, we can't use large page */
  1362. if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
  1363. cpa->numpages = 1;
  1364. if (!debug_pagealloc_enabled())
  1365. spin_lock(&cpa_lock);
  1366. ret = __change_page_attr(cpa, checkalias);
  1367. if (!debug_pagealloc_enabled())
  1368. spin_unlock(&cpa_lock);
  1369. if (ret)
  1370. return ret;
  1371. if (checkalias) {
  1372. ret = cpa_process_alias(cpa);
  1373. if (ret)
  1374. return ret;
  1375. }
  1376. /*
  1377. * Adjust the number of pages with the result of the
  1378. * CPA operation. Either a large page has been
  1379. * preserved or a single page update happened.
  1380. */
  1381. BUG_ON(cpa->numpages > numpages || !cpa->numpages);
  1382. numpages -= cpa->numpages;
  1383. if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
  1384. cpa->curpage++;
  1385. else
  1386. *cpa->vaddr += cpa->numpages * PAGE_SIZE;
  1387. }
  1388. return 0;
  1389. }
  1390. /*
  1391. * Machine check recovery code needs to change cache mode of poisoned
  1392. * pages to UC to avoid speculative access logging another error. But
  1393. * passing the address of the 1:1 mapping to set_memory_uc() is a fine
  1394. * way to encourage a speculative access. So we cheat and flip the top
  1395. * bit of the address. This works fine for the code that updates the
  1396. * page tables. But at the end of the process we need to flush the cache
  1397. * and the non-canonical address causes a #GP fault when used by the
  1398. * CLFLUSH instruction.
  1399. *
  1400. * But in the common case we already have a canonical address. This code
  1401. * will fix the top bit if needed and is a no-op otherwise.
  1402. */
  1403. static inline unsigned long make_addr_canonical_again(unsigned long addr)
  1404. {
  1405. #ifdef CONFIG_X86_64
  1406. return (long)(addr << 1) >> 1;
  1407. #else
  1408. return addr;
  1409. #endif
  1410. }
  1411. static int change_page_attr_set_clr(unsigned long *addr, int numpages,
  1412. pgprot_t mask_set, pgprot_t mask_clr,
  1413. int force_split, int in_flag,
  1414. struct page **pages)
  1415. {
  1416. struct cpa_data cpa;
  1417. int ret, cache, checkalias;
  1418. unsigned long baddr = 0;
  1419. memset(&cpa, 0, sizeof(cpa));
  1420. /*
  1421. * Check, if we are requested to set a not supported
  1422. * feature. Clearing non-supported features is OK.
  1423. */
  1424. mask_set = canon_pgprot(mask_set);
  1425. if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
  1426. return 0;
  1427. /* Ensure we are PAGE_SIZE aligned */
  1428. if (in_flag & CPA_ARRAY) {
  1429. int i;
  1430. for (i = 0; i < numpages; i++) {
  1431. if (addr[i] & ~PAGE_MASK) {
  1432. addr[i] &= PAGE_MASK;
  1433. WARN_ON_ONCE(1);
  1434. }
  1435. }
  1436. } else if (!(in_flag & CPA_PAGES_ARRAY)) {
  1437. /*
  1438. * in_flag of CPA_PAGES_ARRAY implies it is aligned.
  1439. * No need to cehck in that case
  1440. */
  1441. if (*addr & ~PAGE_MASK) {
  1442. *addr &= PAGE_MASK;
  1443. /*
  1444. * People should not be passing in unaligned addresses:
  1445. */
  1446. WARN_ON_ONCE(1);
  1447. }
  1448. /*
  1449. * Save address for cache flush. *addr is modified in the call
  1450. * to __change_page_attr_set_clr() below.
  1451. */
  1452. baddr = make_addr_canonical_again(*addr);
  1453. }
  1454. /* Must avoid aliasing mappings in the highmem code */
  1455. kmap_flush_unused();
  1456. vm_unmap_aliases();
  1457. cpa.vaddr = addr;
  1458. cpa.pages = pages;
  1459. cpa.numpages = numpages;
  1460. cpa.mask_set = mask_set;
  1461. cpa.mask_clr = mask_clr;
  1462. cpa.flags = 0;
  1463. cpa.curpage = 0;
  1464. cpa.force_split = force_split;
  1465. if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
  1466. cpa.flags |= in_flag;
  1467. /* No alias checking for _NX bit modifications */
  1468. checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
  1469. /* Has caller explicitly disabled alias checking? */
  1470. if (in_flag & CPA_NO_CHECK_ALIAS)
  1471. checkalias = 0;
  1472. ret = __change_page_attr_set_clr(&cpa, checkalias);
  1473. /*
  1474. * Check whether we really changed something:
  1475. */
  1476. if (!(cpa.flags & CPA_FLUSHTLB))
  1477. goto out;
  1478. /*
  1479. * No need to flush, when we did not set any of the caching
  1480. * attributes:
  1481. */
  1482. cache = !!pgprot2cachemode(mask_set);
  1483. /*
  1484. * On error; flush everything to be sure.
  1485. */
  1486. if (ret) {
  1487. cpa_flush_all(cache);
  1488. goto out;
  1489. }
  1490. if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
  1491. cpa_flush_array(baddr, addr, numpages, cache,
  1492. cpa.flags, pages);
  1493. } else {
  1494. cpa_flush_range(baddr, numpages, cache);
  1495. }
  1496. out:
  1497. return ret;
  1498. }
  1499. static inline int change_page_attr_set(unsigned long *addr, int numpages,
  1500. pgprot_t mask, int array)
  1501. {
  1502. return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
  1503. (array ? CPA_ARRAY : 0), NULL);
  1504. }
  1505. static inline int change_page_attr_clear(unsigned long *addr, int numpages,
  1506. pgprot_t mask, int array)
  1507. {
  1508. return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
  1509. (array ? CPA_ARRAY : 0), NULL);
  1510. }
  1511. static inline int cpa_set_pages_array(struct page **pages, int numpages,
  1512. pgprot_t mask)
  1513. {
  1514. return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
  1515. CPA_PAGES_ARRAY, pages);
  1516. }
  1517. static inline int cpa_clear_pages_array(struct page **pages, int numpages,
  1518. pgprot_t mask)
  1519. {
  1520. return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
  1521. CPA_PAGES_ARRAY, pages);
  1522. }
  1523. int _set_memory_uc(unsigned long addr, int numpages)
  1524. {
  1525. /*
  1526. * for now UC MINUS. see comments in ioremap_nocache()
  1527. * If you really need strong UC use ioremap_uc(), but note
  1528. * that you cannot override IO areas with set_memory_*() as
  1529. * these helpers cannot work with IO memory.
  1530. */
  1531. return change_page_attr_set(&addr, numpages,
  1532. cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
  1533. 0);
  1534. }
  1535. int set_memory_uc(unsigned long addr, int numpages)
  1536. {
  1537. int ret;
  1538. /*
  1539. * for now UC MINUS. see comments in ioremap_nocache()
  1540. */
  1541. ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
  1542. _PAGE_CACHE_MODE_UC_MINUS, NULL);
  1543. if (ret)
  1544. goto out_err;
  1545. ret = _set_memory_uc(addr, numpages);
  1546. if (ret)
  1547. goto out_free;
  1548. return 0;
  1549. out_free:
  1550. free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
  1551. out_err:
  1552. return ret;
  1553. }
  1554. EXPORT_SYMBOL(set_memory_uc);
  1555. static int _set_memory_array(unsigned long *addr, int addrinarray,
  1556. enum page_cache_mode new_type)
  1557. {
  1558. enum page_cache_mode set_type;
  1559. int i, j;
  1560. int ret;
  1561. for (i = 0; i < addrinarray; i++) {
  1562. ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
  1563. new_type, NULL);
  1564. if (ret)
  1565. goto out_free;
  1566. }
  1567. /* If WC, set to UC- first and then WC */
  1568. set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
  1569. _PAGE_CACHE_MODE_UC_MINUS : new_type;
  1570. ret = change_page_attr_set(addr, addrinarray,
  1571. cachemode2pgprot(set_type), 1);
  1572. if (!ret && new_type == _PAGE_CACHE_MODE_WC)
  1573. ret = change_page_attr_set_clr(addr, addrinarray,
  1574. cachemode2pgprot(
  1575. _PAGE_CACHE_MODE_WC),
  1576. __pgprot(_PAGE_CACHE_MASK),
  1577. 0, CPA_ARRAY, NULL);
  1578. if (ret)
  1579. goto out_free;
  1580. return 0;
  1581. out_free:
  1582. for (j = 0; j < i; j++)
  1583. free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
  1584. return ret;
  1585. }
  1586. int set_memory_array_uc(unsigned long *addr, int addrinarray)
  1587. {
  1588. return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_UC_MINUS);
  1589. }
  1590. EXPORT_SYMBOL(set_memory_array_uc);
  1591. int set_memory_array_wc(unsigned long *addr, int addrinarray)
  1592. {
  1593. return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WC);
  1594. }
  1595. EXPORT_SYMBOL(set_memory_array_wc);
  1596. int set_memory_array_wt(unsigned long *addr, int addrinarray)
  1597. {
  1598. return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT);
  1599. }
  1600. EXPORT_SYMBOL_GPL(set_memory_array_wt);
  1601. int _set_memory_wc(unsigned long addr, int numpages)
  1602. {
  1603. int ret;
  1604. unsigned long addr_copy = addr;
  1605. ret = change_page_attr_set(&addr, numpages,
  1606. cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
  1607. 0);
  1608. if (!ret) {
  1609. ret = change_page_attr_set_clr(&addr_copy, numpages,
  1610. cachemode2pgprot(
  1611. _PAGE_CACHE_MODE_WC),
  1612. __pgprot(_PAGE_CACHE_MASK),
  1613. 0, 0, NULL);
  1614. }
  1615. return ret;
  1616. }
  1617. int set_memory_wc(unsigned long addr, int numpages)
  1618. {
  1619. int ret;
  1620. ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
  1621. _PAGE_CACHE_MODE_WC, NULL);
  1622. if (ret)
  1623. return ret;
  1624. ret = _set_memory_wc(addr, numpages);
  1625. if (ret)
  1626. free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
  1627. return ret;
  1628. }
  1629. EXPORT_SYMBOL(set_memory_wc);
  1630. int _set_memory_wt(unsigned long addr, int numpages)
  1631. {
  1632. return change_page_attr_set(&addr, numpages,
  1633. cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
  1634. }
  1635. int set_memory_wt(unsigned long addr, int numpages)
  1636. {
  1637. int ret;
  1638. ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
  1639. _PAGE_CACHE_MODE_WT, NULL);
  1640. if (ret)
  1641. return ret;
  1642. ret = _set_memory_wt(addr, numpages);
  1643. if (ret)
  1644. free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
  1645. return ret;
  1646. }
  1647. EXPORT_SYMBOL_GPL(set_memory_wt);
  1648. int _set_memory_wb(unsigned long addr, int numpages)
  1649. {
  1650. /* WB cache mode is hard wired to all cache attribute bits being 0 */
  1651. return change_page_attr_clear(&addr, numpages,
  1652. __pgprot(_PAGE_CACHE_MASK), 0);
  1653. }
  1654. int set_memory_wb(unsigned long addr, int numpages)
  1655. {
  1656. int ret;
  1657. ret = _set_memory_wb(addr, numpages);
  1658. if (ret)
  1659. return ret;
  1660. free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
  1661. return 0;
  1662. }
  1663. EXPORT_SYMBOL(set_memory_wb);
  1664. int set_memory_array_wb(unsigned long *addr, int addrinarray)
  1665. {
  1666. int i;
  1667. int ret;
  1668. /* WB cache mode is hard wired to all cache attribute bits being 0 */
  1669. ret = change_page_attr_clear(addr, addrinarray,
  1670. __pgprot(_PAGE_CACHE_MASK), 1);
  1671. if (ret)
  1672. return ret;
  1673. for (i = 0; i < addrinarray; i++)
  1674. free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
  1675. return 0;
  1676. }
  1677. EXPORT_SYMBOL(set_memory_array_wb);
  1678. int set_memory_x(unsigned long addr, int numpages)
  1679. {
  1680. if (!(__supported_pte_mask & _PAGE_NX))
  1681. return 0;
  1682. return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
  1683. }
  1684. EXPORT_SYMBOL(set_memory_x);
  1685. int set_memory_nx(unsigned long addr, int numpages)
  1686. {
  1687. if (!(__supported_pte_mask & _PAGE_NX))
  1688. return 0;
  1689. return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
  1690. }
  1691. EXPORT_SYMBOL(set_memory_nx);
  1692. int set_memory_ro(unsigned long addr, int numpages)
  1693. {
  1694. return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
  1695. }
  1696. int set_memory_rw(unsigned long addr, int numpages)
  1697. {
  1698. return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
  1699. }
  1700. int set_memory_np(unsigned long addr, int numpages)
  1701. {
  1702. return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
  1703. }
  1704. int set_memory_np_noalias(unsigned long addr, int numpages)
  1705. {
  1706. int cpa_flags = CPA_NO_CHECK_ALIAS;
  1707. return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
  1708. __pgprot(_PAGE_PRESENT), 0,
  1709. cpa_flags, NULL);
  1710. }
  1711. int set_memory_4k(unsigned long addr, int numpages)
  1712. {
  1713. return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
  1714. __pgprot(0), 1, 0, NULL);
  1715. }
  1716. int set_memory_nonglobal(unsigned long addr, int numpages)
  1717. {
  1718. return change_page_attr_clear(&addr, numpages,
  1719. __pgprot(_PAGE_GLOBAL), 0);
  1720. }
  1721. int set_memory_global(unsigned long addr, int numpages)
  1722. {
  1723. return change_page_attr_set(&addr, numpages,
  1724. __pgprot(_PAGE_GLOBAL), 0);
  1725. }
  1726. static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
  1727. {
  1728. struct cpa_data cpa;
  1729. unsigned long start;
  1730. int ret;
  1731. /* Nothing to do if memory encryption is not active */
  1732. if (!mem_encrypt_active())
  1733. return 0;
  1734. /* Should not be working on unaligned addresses */
  1735. if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
  1736. addr &= PAGE_MASK;
  1737. start = addr;
  1738. memset(&cpa, 0, sizeof(cpa));
  1739. cpa.vaddr = &addr;
  1740. cpa.numpages = numpages;
  1741. cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
  1742. cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
  1743. cpa.pgd = init_mm.pgd;
  1744. /* Must avoid aliasing mappings in the highmem code */
  1745. kmap_flush_unused();
  1746. vm_unmap_aliases();
  1747. /*
  1748. * Before changing the encryption attribute, we need to flush caches.
  1749. */
  1750. cpa_flush_range(start, numpages, 1);
  1751. ret = __change_page_attr_set_clr(&cpa, 1);
  1752. /*
  1753. * After changing the encryption attribute, we need to flush TLBs
  1754. * again in case any speculative TLB caching occurred (but no need
  1755. * to flush caches again). We could just use cpa_flush_all(), but
  1756. * in case TLB flushing gets optimized in the cpa_flush_range()
  1757. * path use the same logic as above.
  1758. */
  1759. cpa_flush_range(start, numpages, 0);
  1760. return ret;
  1761. }
  1762. int set_memory_encrypted(unsigned long addr, int numpages)
  1763. {
  1764. return __set_memory_enc_dec(addr, numpages, true);
  1765. }
  1766. EXPORT_SYMBOL_GPL(set_memory_encrypted);
  1767. int set_memory_decrypted(unsigned long addr, int numpages)
  1768. {
  1769. return __set_memory_enc_dec(addr, numpages, false);
  1770. }
  1771. EXPORT_SYMBOL_GPL(set_memory_decrypted);
  1772. int set_pages_uc(struct page *page, int numpages)
  1773. {
  1774. unsigned long addr = (unsigned long)page_address(page);
  1775. return set_memory_uc(addr, numpages);
  1776. }
  1777. EXPORT_SYMBOL(set_pages_uc);
  1778. static int _set_pages_array(struct page **pages, int addrinarray,
  1779. enum page_cache_mode new_type)
  1780. {
  1781. unsigned long start;
  1782. unsigned long end;
  1783. enum page_cache_mode set_type;
  1784. int i;
  1785. int free_idx;
  1786. int ret;
  1787. for (i = 0; i < addrinarray; i++) {
  1788. if (PageHighMem(pages[i]))
  1789. continue;
  1790. start = page_to_pfn(pages[i]) << PAGE_SHIFT;
  1791. end = start + PAGE_SIZE;
  1792. if (reserve_memtype(start, end, new_type, NULL))
  1793. goto err_out;
  1794. }
  1795. /* If WC, set to UC- first and then WC */
  1796. set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
  1797. _PAGE_CACHE_MODE_UC_MINUS : new_type;
  1798. ret = cpa_set_pages_array(pages, addrinarray,
  1799. cachemode2pgprot(set_type));
  1800. if (!ret && new_type == _PAGE_CACHE_MODE_WC)
  1801. ret = change_page_attr_set_clr(NULL, addrinarray,
  1802. cachemode2pgprot(
  1803. _PAGE_CACHE_MODE_WC),
  1804. __pgprot(_PAGE_CACHE_MASK),
  1805. 0, CPA_PAGES_ARRAY, pages);
  1806. if (ret)
  1807. goto err_out;
  1808. return 0; /* Success */
  1809. err_out:
  1810. free_idx = i;
  1811. for (i = 0; i < free_idx; i++) {
  1812. if (PageHighMem(pages[i]))
  1813. continue;
  1814. start = page_to_pfn(pages[i]) << PAGE_SHIFT;
  1815. end = start + PAGE_SIZE;
  1816. free_memtype(start, end);
  1817. }
  1818. return -EINVAL;
  1819. }
  1820. int set_pages_array_uc(struct page **pages, int addrinarray)
  1821. {
  1822. return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_UC_MINUS);
  1823. }
  1824. EXPORT_SYMBOL(set_pages_array_uc);
  1825. int set_pages_array_wc(struct page **pages, int addrinarray)
  1826. {
  1827. return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WC);
  1828. }
  1829. EXPORT_SYMBOL(set_pages_array_wc);
  1830. int set_pages_array_wt(struct page **pages, int addrinarray)
  1831. {
  1832. return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT);
  1833. }
  1834. EXPORT_SYMBOL_GPL(set_pages_array_wt);
  1835. int set_pages_wb(struct page *page, int numpages)
  1836. {
  1837. unsigned long addr = (unsigned long)page_address(page);
  1838. return set_memory_wb(addr, numpages);
  1839. }
  1840. EXPORT_SYMBOL(set_pages_wb);
  1841. int set_pages_array_wb(struct page **pages, int addrinarray)
  1842. {
  1843. int retval;
  1844. unsigned long start;
  1845. unsigned long end;
  1846. int i;
  1847. /* WB cache mode is hard wired to all cache attribute bits being 0 */
  1848. retval = cpa_clear_pages_array(pages, addrinarray,
  1849. __pgprot(_PAGE_CACHE_MASK));
  1850. if (retval)
  1851. return retval;
  1852. for (i = 0; i < addrinarray; i++) {
  1853. if (PageHighMem(pages[i]))
  1854. continue;
  1855. start = page_to_pfn(pages[i]) << PAGE_SHIFT;
  1856. end = start + PAGE_SIZE;
  1857. free_memtype(start, end);
  1858. }
  1859. return 0;
  1860. }
  1861. EXPORT_SYMBOL(set_pages_array_wb);
  1862. int set_pages_x(struct page *page, int numpages)
  1863. {
  1864. unsigned long addr = (unsigned long)page_address(page);
  1865. return set_memory_x(addr, numpages);
  1866. }
  1867. EXPORT_SYMBOL(set_pages_x);
  1868. int set_pages_nx(struct page *page, int numpages)
  1869. {
  1870. unsigned long addr = (unsigned long)page_address(page);
  1871. return set_memory_nx(addr, numpages);
  1872. }
  1873. EXPORT_SYMBOL(set_pages_nx);
  1874. int set_pages_ro(struct page *page, int numpages)
  1875. {
  1876. unsigned long addr = (unsigned long)page_address(page);
  1877. return set_memory_ro(addr, numpages);
  1878. }
  1879. int set_pages_rw(struct page *page, int numpages)
  1880. {
  1881. unsigned long addr = (unsigned long)page_address(page);
  1882. return set_memory_rw(addr, numpages);
  1883. }
  1884. #ifdef CONFIG_DEBUG_PAGEALLOC
  1885. static int __set_pages_p(struct page *page, int numpages)
  1886. {
  1887. unsigned long tempaddr = (unsigned long) page_address(page);
  1888. struct cpa_data cpa = { .vaddr = &tempaddr,
  1889. .pgd = NULL,
  1890. .numpages = numpages,
  1891. .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
  1892. .mask_clr = __pgprot(0),
  1893. .flags = 0};
  1894. /*
  1895. * No alias checking needed for setting present flag. otherwise,
  1896. * we may need to break large pages for 64-bit kernel text
  1897. * mappings (this adds to complexity if we want to do this from
  1898. * atomic context especially). Let's keep it simple!
  1899. */
  1900. return __change_page_attr_set_clr(&cpa, 0);
  1901. }
  1902. static int __set_pages_np(struct page *page, int numpages)
  1903. {
  1904. unsigned long tempaddr = (unsigned long) page_address(page);
  1905. struct cpa_data cpa = { .vaddr = &tempaddr,
  1906. .pgd = NULL,
  1907. .numpages = numpages,
  1908. .mask_set = __pgprot(0),
  1909. .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
  1910. .flags = 0};
  1911. /*
  1912. * No alias checking needed for setting not present flag. otherwise,
  1913. * we may need to break large pages for 64-bit kernel text
  1914. * mappings (this adds to complexity if we want to do this from
  1915. * atomic context especially). Let's keep it simple!
  1916. */
  1917. return __change_page_attr_set_clr(&cpa, 0);
  1918. }
  1919. void __kernel_map_pages(struct page *page, int numpages, int enable)
  1920. {
  1921. if (PageHighMem(page))
  1922. return;
  1923. if (!enable) {
  1924. debug_check_no_locks_freed(page_address(page),
  1925. numpages * PAGE_SIZE);
  1926. }
  1927. /*
  1928. * The return value is ignored as the calls cannot fail.
  1929. * Large pages for identity mappings are not used at boot time
  1930. * and hence no memory allocations during large page split.
  1931. */
  1932. if (enable)
  1933. __set_pages_p(page, numpages);
  1934. else
  1935. __set_pages_np(page, numpages);
  1936. /*
  1937. * We should perform an IPI and flush all tlbs,
  1938. * but that can deadlock->flush only current cpu.
  1939. * Preemption needs to be disabled around __flush_tlb_all() due to
  1940. * CR3 reload in __native_flush_tlb().
  1941. */
  1942. preempt_disable();
  1943. __flush_tlb_all();
  1944. preempt_enable();
  1945. arch_flush_lazy_mmu_mode();
  1946. }
  1947. #ifdef CONFIG_HIBERNATION
  1948. bool kernel_page_present(struct page *page)
  1949. {
  1950. unsigned int level;
  1951. pte_t *pte;
  1952. if (PageHighMem(page))
  1953. return false;
  1954. pte = lookup_address((unsigned long)page_address(page), &level);
  1955. return (pte_val(*pte) & _PAGE_PRESENT);
  1956. }
  1957. #endif /* CONFIG_HIBERNATION */
  1958. #endif /* CONFIG_DEBUG_PAGEALLOC */
  1959. int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
  1960. unsigned numpages, unsigned long page_flags)
  1961. {
  1962. int retval = -EINVAL;
  1963. struct cpa_data cpa = {
  1964. .vaddr = &address,
  1965. .pfn = pfn,
  1966. .pgd = pgd,
  1967. .numpages = numpages,
  1968. .mask_set = __pgprot(0),
  1969. .mask_clr = __pgprot(0),
  1970. .flags = 0,
  1971. };
  1972. if (!(__supported_pte_mask & _PAGE_NX))
  1973. goto out;
  1974. if (!(page_flags & _PAGE_NX))
  1975. cpa.mask_clr = __pgprot(_PAGE_NX);
  1976. if (!(page_flags & _PAGE_RW))
  1977. cpa.mask_clr = __pgprot(_PAGE_RW);
  1978. if (!(page_flags & _PAGE_ENC))
  1979. cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
  1980. cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
  1981. retval = __change_page_attr_set_clr(&cpa, 0);
  1982. __flush_tlb_all();
  1983. out:
  1984. return retval;
  1985. }
  1986. /*
  1987. * The testcases use internal knowledge of the implementation that shouldn't
  1988. * be exposed to the rest of the kernel. Include these directly here.
  1989. */
  1990. #ifdef CONFIG_CPA_DEBUG
  1991. #include "pageattr-test.c"
  1992. #endif