pat.c 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. /*
  2. * Handle caching attributes in page tables (PAT)
  3. *
  4. * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
  5. * Suresh B Siddha <suresh.b.siddha@intel.com>
  6. *
  7. * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
  8. */
  9. #include <linux/seq_file.h>
  10. #include <linux/bootmem.h>
  11. #include <linux/debugfs.h>
  12. #include <linux/kernel.h>
  13. #include <linux/module.h>
  14. #include <linux/slab.h>
  15. #include <linux/mm.h>
  16. #include <linux/fs.h>
  17. #include <linux/rbtree.h>
  18. #include <asm/cacheflush.h>
  19. #include <asm/processor.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/x86_init.h>
  22. #include <asm/pgtable.h>
  23. #include <asm/fcntl.h>
  24. #include <asm/e820.h>
  25. #include <asm/mtrr.h>
  26. #include <asm/page.h>
  27. #include <asm/msr.h>
  28. #include <asm/pat.h>
  29. #include <asm/io.h>
  30. #include "pat_internal.h"
  31. #include "mm_internal.h"
  32. #ifdef CONFIG_X86_PAT
  33. int __read_mostly pat_enabled = 1;
  34. static inline void pat_disable(const char *reason)
  35. {
  36. pat_enabled = 0;
  37. printk(KERN_INFO "%s\n", reason);
  38. }
  39. static int __init nopat(char *str)
  40. {
  41. pat_disable("PAT support disabled.");
  42. return 0;
  43. }
  44. early_param("nopat", nopat);
  45. #else
  46. static inline void pat_disable(const char *reason)
  47. {
  48. (void)reason;
  49. }
  50. #endif
  51. int pat_debug_enable;
  52. static int __init pat_debug_setup(char *str)
  53. {
  54. pat_debug_enable = 1;
  55. return 0;
  56. }
  57. __setup("debugpat", pat_debug_setup);
  58. static u64 __read_mostly boot_pat_state;
  59. #ifdef CONFIG_X86_PAT
  60. /*
  61. * X86 PAT uses page flags WC and Uncached together to keep track of
  62. * memory type of pages that have backing page struct. X86 PAT supports 3
  63. * different memory types, _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC and
  64. * _PAGE_CACHE_MODE_UC_MINUS and fourth state where page's memory type has not
  65. * been changed from its default (value of -1 used to denote this).
  66. * Note we do not support _PAGE_CACHE_MODE_UC here.
  67. */
  68. #define _PGMT_DEFAULT 0
  69. #define _PGMT_WC (1UL << PG_arch_1)
  70. #define _PGMT_UC_MINUS (1UL << PG_uncached)
  71. #define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1)
  72. #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
  73. #define _PGMT_CLEAR_MASK (~_PGMT_MASK)
  74. static inline enum page_cache_mode get_page_memtype(struct page *pg)
  75. {
  76. unsigned long pg_flags = pg->flags & _PGMT_MASK;
  77. if (pg_flags == _PGMT_DEFAULT)
  78. return -1;
  79. else if (pg_flags == _PGMT_WC)
  80. return _PAGE_CACHE_MODE_WC;
  81. else if (pg_flags == _PGMT_UC_MINUS)
  82. return _PAGE_CACHE_MODE_UC_MINUS;
  83. else
  84. return _PAGE_CACHE_MODE_WB;
  85. }
  86. static inline void set_page_memtype(struct page *pg,
  87. enum page_cache_mode memtype)
  88. {
  89. unsigned long memtype_flags;
  90. unsigned long old_flags;
  91. unsigned long new_flags;
  92. switch (memtype) {
  93. case _PAGE_CACHE_MODE_WC:
  94. memtype_flags = _PGMT_WC;
  95. break;
  96. case _PAGE_CACHE_MODE_UC_MINUS:
  97. memtype_flags = _PGMT_UC_MINUS;
  98. break;
  99. case _PAGE_CACHE_MODE_WB:
  100. memtype_flags = _PGMT_WB;
  101. break;
  102. default:
  103. memtype_flags = _PGMT_DEFAULT;
  104. break;
  105. }
  106. do {
  107. old_flags = pg->flags;
  108. new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
  109. } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
  110. }
  111. #else
  112. static inline enum page_cache_mode get_page_memtype(struct page *pg)
  113. {
  114. return -1;
  115. }
  116. static inline void set_page_memtype(struct page *pg,
  117. enum page_cache_mode memtype)
  118. {
  119. }
  120. #endif
  121. enum {
  122. PAT_UC = 0, /* uncached */
  123. PAT_WC = 1, /* Write combining */
  124. PAT_WT = 4, /* Write Through */
  125. PAT_WP = 5, /* Write Protected */
  126. PAT_WB = 6, /* Write Back (default) */
  127. PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
  128. };
  129. #define CM(c) (_PAGE_CACHE_MODE_ ## c)
  130. static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
  131. {
  132. enum page_cache_mode cache;
  133. char *cache_mode;
  134. switch (pat_val) {
  135. case PAT_UC: cache = CM(UC); cache_mode = "UC "; break;
  136. case PAT_WC: cache = CM(WC); cache_mode = "WC "; break;
  137. case PAT_WT: cache = CM(WT); cache_mode = "WT "; break;
  138. case PAT_WP: cache = CM(WP); cache_mode = "WP "; break;
  139. case PAT_WB: cache = CM(WB); cache_mode = "WB "; break;
  140. case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break;
  141. default: cache = CM(WB); cache_mode = "WB "; break;
  142. }
  143. memcpy(msg, cache_mode, 4);
  144. return cache;
  145. }
  146. #undef CM
  147. /*
  148. * Update the cache mode to pgprot translation tables according to PAT
  149. * configuration.
  150. * Using lower indices is preferred, so we start with highest index.
  151. */
  152. void pat_init_cache_modes(void)
  153. {
  154. int i;
  155. enum page_cache_mode cache;
  156. char pat_msg[33];
  157. u64 pat;
  158. rdmsrl(MSR_IA32_CR_PAT, pat);
  159. pat_msg[32] = 0;
  160. for (i = 7; i >= 0; i--) {
  161. cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
  162. pat_msg + 4 * i);
  163. update_cache_mode_entry(i, cache);
  164. }
  165. pr_info("PAT configuration [0-7]: %s\n", pat_msg);
  166. }
  167. #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
  168. void pat_init(void)
  169. {
  170. u64 pat;
  171. bool boot_cpu = !boot_pat_state;
  172. if (!pat_enabled)
  173. return;
  174. if (!cpu_has_pat) {
  175. if (!boot_pat_state) {
  176. pat_disable("PAT not supported by CPU.");
  177. return;
  178. } else {
  179. /*
  180. * If this happens we are on a secondary CPU, but
  181. * switched to PAT on the boot CPU. We have no way to
  182. * undo PAT.
  183. */
  184. printk(KERN_ERR "PAT enabled, "
  185. "but not supported by secondary CPU\n");
  186. BUG();
  187. }
  188. }
  189. /* Set PWT to Write-Combining. All other bits stay the same */
  190. /*
  191. * PTE encoding used in Linux:
  192. * PAT
  193. * |PCD
  194. * ||PWT
  195. * |||
  196. * 000 WB _PAGE_CACHE_WB
  197. * 001 WC _PAGE_CACHE_WC
  198. * 010 UC- _PAGE_CACHE_UC_MINUS
  199. * 011 UC _PAGE_CACHE_UC
  200. * PAT bit unused
  201. */
  202. pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
  203. PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
  204. /* Boot CPU check */
  205. if (!boot_pat_state)
  206. rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
  207. wrmsrl(MSR_IA32_CR_PAT, pat);
  208. if (boot_cpu)
  209. pat_init_cache_modes();
  210. }
  211. #undef PAT
  212. static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
  213. /*
  214. * Does intersection of PAT memory type and MTRR memory type and returns
  215. * the resulting memory type as PAT understands it.
  216. * (Type in pat and mtrr will not have same value)
  217. * The intersection is based on "Effective Memory Type" tables in IA-32
  218. * SDM vol 3a
  219. */
  220. static unsigned long pat_x_mtrr_type(u64 start, u64 end,
  221. enum page_cache_mode req_type)
  222. {
  223. /*
  224. * Look for MTRR hint to get the effective type in case where PAT
  225. * request is for WB.
  226. */
  227. if (req_type == _PAGE_CACHE_MODE_WB) {
  228. u8 mtrr_type;
  229. mtrr_type = mtrr_type_lookup(start, end);
  230. if (mtrr_type != MTRR_TYPE_WRBACK)
  231. return _PAGE_CACHE_MODE_UC_MINUS;
  232. return _PAGE_CACHE_MODE_WB;
  233. }
  234. return req_type;
  235. }
  236. struct pagerange_state {
  237. unsigned long cur_pfn;
  238. int ram;
  239. int not_ram;
  240. };
  241. static int
  242. pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
  243. {
  244. struct pagerange_state *state = arg;
  245. state->not_ram |= initial_pfn > state->cur_pfn;
  246. state->ram |= total_nr_pages > 0;
  247. state->cur_pfn = initial_pfn + total_nr_pages;
  248. return state->ram && state->not_ram;
  249. }
  250. static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
  251. {
  252. int ret = 0;
  253. unsigned long start_pfn = start >> PAGE_SHIFT;
  254. unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
  255. struct pagerange_state state = {start_pfn, 0, 0};
  256. /*
  257. * For legacy reasons, physical address range in the legacy ISA
  258. * region is tracked as non-RAM. This will allow users of
  259. * /dev/mem to map portions of legacy ISA region, even when
  260. * some of those portions are listed(or not even listed) with
  261. * different e820 types(RAM/reserved/..)
  262. */
  263. if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
  264. start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
  265. if (start_pfn < end_pfn) {
  266. ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
  267. &state, pagerange_is_ram_callback);
  268. }
  269. return (ret > 0) ? -1 : (state.ram ? 1 : 0);
  270. }
  271. /*
  272. * For RAM pages, we use page flags to mark the pages with appropriate type.
  273. * Here we do two pass:
  274. * - Find the memtype of all the pages in the range, look for any conflicts
  275. * - In case of no conflicts, set the new memtype for pages in the range
  276. */
  277. static int reserve_ram_pages_type(u64 start, u64 end,
  278. enum page_cache_mode req_type,
  279. enum page_cache_mode *new_type)
  280. {
  281. struct page *page;
  282. u64 pfn;
  283. if (req_type == _PAGE_CACHE_MODE_UC) {
  284. /* We do not support strong UC */
  285. WARN_ON_ONCE(1);
  286. req_type = _PAGE_CACHE_MODE_UC_MINUS;
  287. }
  288. for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
  289. enum page_cache_mode type;
  290. page = pfn_to_page(pfn);
  291. type = get_page_memtype(page);
  292. if (type != -1) {
  293. pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
  294. start, end - 1, type, req_type);
  295. if (new_type)
  296. *new_type = type;
  297. return -EBUSY;
  298. }
  299. }
  300. if (new_type)
  301. *new_type = req_type;
  302. for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
  303. page = pfn_to_page(pfn);
  304. set_page_memtype(page, req_type);
  305. }
  306. return 0;
  307. }
  308. static int free_ram_pages_type(u64 start, u64 end)
  309. {
  310. struct page *page;
  311. u64 pfn;
  312. for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
  313. page = pfn_to_page(pfn);
  314. set_page_memtype(page, -1);
  315. }
  316. return 0;
  317. }
  318. /*
  319. * req_type typically has one of the:
  320. * - _PAGE_CACHE_MODE_WB
  321. * - _PAGE_CACHE_MODE_WC
  322. * - _PAGE_CACHE_MODE_UC_MINUS
  323. * - _PAGE_CACHE_MODE_UC
  324. *
  325. * If new_type is NULL, function will return an error if it cannot reserve the
  326. * region with req_type. If new_type is non-NULL, function will return
  327. * available type in new_type in case of no error. In case of any error
  328. * it will return a negative return value.
  329. */
  330. int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
  331. enum page_cache_mode *new_type)
  332. {
  333. struct memtype *new;
  334. enum page_cache_mode actual_type;
  335. int is_range_ram;
  336. int err = 0;
  337. BUG_ON(start >= end); /* end is exclusive */
  338. if (!pat_enabled) {
  339. /* This is identical to page table setting without PAT */
  340. if (new_type) {
  341. if (req_type == _PAGE_CACHE_MODE_WC)
  342. *new_type = _PAGE_CACHE_MODE_UC_MINUS;
  343. else
  344. *new_type = req_type;
  345. }
  346. return 0;
  347. }
  348. /* Low ISA region is always mapped WB in page table. No need to track */
  349. if (x86_platform.is_untracked_pat_range(start, end)) {
  350. if (new_type)
  351. *new_type = _PAGE_CACHE_MODE_WB;
  352. return 0;
  353. }
  354. /*
  355. * Call mtrr_lookup to get the type hint. This is an
  356. * optimization for /dev/mem mmap'ers into WB memory (BIOS
  357. * tools and ACPI tools). Use WB request for WB memory and use
  358. * UC_MINUS otherwise.
  359. */
  360. actual_type = pat_x_mtrr_type(start, end, req_type);
  361. if (new_type)
  362. *new_type = actual_type;
  363. is_range_ram = pat_pagerange_is_ram(start, end);
  364. if (is_range_ram == 1) {
  365. err = reserve_ram_pages_type(start, end, req_type, new_type);
  366. return err;
  367. } else if (is_range_ram < 0) {
  368. return -EINVAL;
  369. }
  370. new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
  371. if (!new)
  372. return -ENOMEM;
  373. new->start = start;
  374. new->end = end;
  375. new->type = actual_type;
  376. spin_lock(&memtype_lock);
  377. err = rbt_memtype_check_insert(new, new_type);
  378. if (err) {
  379. printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
  380. start, end - 1,
  381. cattr_name(new->type), cattr_name(req_type));
  382. kfree(new);
  383. spin_unlock(&memtype_lock);
  384. return err;
  385. }
  386. spin_unlock(&memtype_lock);
  387. dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
  388. start, end - 1, cattr_name(new->type), cattr_name(req_type),
  389. new_type ? cattr_name(*new_type) : "-");
  390. return err;
  391. }
  392. int free_memtype(u64 start, u64 end)
  393. {
  394. int err = -EINVAL;
  395. int is_range_ram;
  396. struct memtype *entry;
  397. if (!pat_enabled)
  398. return 0;
  399. /* Low ISA region is always mapped WB. No need to track */
  400. if (x86_platform.is_untracked_pat_range(start, end))
  401. return 0;
  402. is_range_ram = pat_pagerange_is_ram(start, end);
  403. if (is_range_ram == 1) {
  404. err = free_ram_pages_type(start, end);
  405. return err;
  406. } else if (is_range_ram < 0) {
  407. return -EINVAL;
  408. }
  409. spin_lock(&memtype_lock);
  410. entry = rbt_memtype_erase(start, end);
  411. spin_unlock(&memtype_lock);
  412. if (!entry) {
  413. printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
  414. current->comm, current->pid, start, end - 1);
  415. return -EINVAL;
  416. }
  417. kfree(entry);
  418. dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
  419. return 0;
  420. }
  421. /**
  422. * lookup_memtype - Looksup the memory type for a physical address
  423. * @paddr: physical address of which memory type needs to be looked up
  424. *
  425. * Only to be called when PAT is enabled
  426. *
  427. * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
  428. * or _PAGE_CACHE_MODE_UC
  429. */
  430. static enum page_cache_mode lookup_memtype(u64 paddr)
  431. {
  432. enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB;
  433. struct memtype *entry;
  434. if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
  435. return rettype;
  436. if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
  437. struct page *page;
  438. page = pfn_to_page(paddr >> PAGE_SHIFT);
  439. rettype = get_page_memtype(page);
  440. /*
  441. * -1 from get_page_memtype() implies RAM page is in its
  442. * default state and not reserved, and hence of type WB
  443. */
  444. if (rettype == -1)
  445. rettype = _PAGE_CACHE_MODE_WB;
  446. return rettype;
  447. }
  448. spin_lock(&memtype_lock);
  449. entry = rbt_memtype_lookup(paddr);
  450. if (entry != NULL)
  451. rettype = entry->type;
  452. else
  453. rettype = _PAGE_CACHE_MODE_UC_MINUS;
  454. spin_unlock(&memtype_lock);
  455. return rettype;
  456. }
  457. /**
  458. * io_reserve_memtype - Request a memory type mapping for a region of memory
  459. * @start: start (physical address) of the region
  460. * @end: end (physical address) of the region
  461. * @type: A pointer to memtype, with requested type. On success, requested
  462. * or any other compatible type that was available for the region is returned
  463. *
  464. * On success, returns 0
  465. * On failure, returns non-zero
  466. */
  467. int io_reserve_memtype(resource_size_t start, resource_size_t end,
  468. enum page_cache_mode *type)
  469. {
  470. resource_size_t size = end - start;
  471. enum page_cache_mode req_type = *type;
  472. enum page_cache_mode new_type;
  473. int ret;
  474. WARN_ON_ONCE(iomem_map_sanity_check(start, size));
  475. ret = reserve_memtype(start, end, req_type, &new_type);
  476. if (ret)
  477. goto out_err;
  478. if (!is_new_memtype_allowed(start, size, req_type, new_type))
  479. goto out_free;
  480. if (kernel_map_sync_memtype(start, size, new_type) < 0)
  481. goto out_free;
  482. *type = new_type;
  483. return 0;
  484. out_free:
  485. free_memtype(start, end);
  486. ret = -EBUSY;
  487. out_err:
  488. return ret;
  489. }
  490. /**
  491. * io_free_memtype - Release a memory type mapping for a region of memory
  492. * @start: start (physical address) of the region
  493. * @end: end (physical address) of the region
  494. */
  495. void io_free_memtype(resource_size_t start, resource_size_t end)
  496. {
  497. free_memtype(start, end);
  498. }
  499. pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  500. unsigned long size, pgprot_t vma_prot)
  501. {
  502. return vma_prot;
  503. }
  504. #ifdef CONFIG_STRICT_DEVMEM
  505. /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
  506. static inline int range_is_allowed(unsigned long pfn, unsigned long size)
  507. {
  508. return 1;
  509. }
  510. #else
  511. /* This check is needed to avoid cache aliasing when PAT is enabled */
  512. static inline int range_is_allowed(unsigned long pfn, unsigned long size)
  513. {
  514. u64 from = ((u64)pfn) << PAGE_SHIFT;
  515. u64 to = from + size;
  516. u64 cursor = from;
  517. if (!pat_enabled)
  518. return 1;
  519. while (cursor < to) {
  520. if (!devmem_is_allowed(pfn)) {
  521. printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
  522. current->comm, from, to - 1);
  523. return 0;
  524. }
  525. cursor += PAGE_SIZE;
  526. pfn++;
  527. }
  528. return 1;
  529. }
  530. #endif /* CONFIG_STRICT_DEVMEM */
  531. int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
  532. unsigned long size, pgprot_t *vma_prot)
  533. {
  534. enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
  535. if (!range_is_allowed(pfn, size))
  536. return 0;
  537. if (file->f_flags & O_DSYNC)
  538. pcm = _PAGE_CACHE_MODE_UC_MINUS;
  539. #ifdef CONFIG_X86_32
  540. /*
  541. * On the PPro and successors, the MTRRs are used to set
  542. * memory types for physical addresses outside main memory,
  543. * so blindly setting UC or PWT on those pages is wrong.
  544. * For Pentiums and earlier, the surround logic should disable
  545. * caching for the high addresses through the KEN pin, but
  546. * we maintain the tradition of paranoia in this code.
  547. */
  548. if (!pat_enabled &&
  549. !(boot_cpu_has(X86_FEATURE_MTRR) ||
  550. boot_cpu_has(X86_FEATURE_K6_MTRR) ||
  551. boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
  552. boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
  553. (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
  554. pcm = _PAGE_CACHE_MODE_UC;
  555. }
  556. #endif
  557. *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
  558. cachemode2protval(pcm));
  559. return 1;
  560. }
  561. /*
  562. * Change the memory type for the physial address range in kernel identity
  563. * mapping space if that range is a part of identity map.
  564. */
  565. int kernel_map_sync_memtype(u64 base, unsigned long size,
  566. enum page_cache_mode pcm)
  567. {
  568. unsigned long id_sz;
  569. if (base > __pa(high_memory-1))
  570. return 0;
  571. /*
  572. * some areas in the middle of the kernel identity range
  573. * are not mapped, like the PCI space.
  574. */
  575. if (!page_is_ram(base >> PAGE_SHIFT))
  576. return 0;
  577. id_sz = (__pa(high_memory-1) <= base + size) ?
  578. __pa(high_memory) - base :
  579. size;
  580. if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
  581. printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
  582. "for [mem %#010Lx-%#010Lx]\n",
  583. current->comm, current->pid,
  584. cattr_name(pcm),
  585. base, (unsigned long long)(base + size-1));
  586. return -EINVAL;
  587. }
  588. return 0;
  589. }
  590. /*
  591. * Internal interface to reserve a range of physical memory with prot.
  592. * Reserved non RAM regions only and after successful reserve_memtype,
  593. * this func also keeps identity mapping (if any) in sync with this new prot.
  594. */
  595. static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
  596. int strict_prot)
  597. {
  598. int is_ram = 0;
  599. int ret;
  600. enum page_cache_mode want_pcm = pgprot2cachemode(*vma_prot);
  601. enum page_cache_mode pcm = want_pcm;
  602. is_ram = pat_pagerange_is_ram(paddr, paddr + size);
  603. /*
  604. * reserve_pfn_range() for RAM pages. We do not refcount to keep
  605. * track of number of mappings of RAM pages. We can assert that
  606. * the type requested matches the type of first page in the range.
  607. */
  608. if (is_ram) {
  609. if (!pat_enabled)
  610. return 0;
  611. pcm = lookup_memtype(paddr);
  612. if (want_pcm != pcm) {
  613. printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
  614. current->comm, current->pid,
  615. cattr_name(want_pcm),
  616. (unsigned long long)paddr,
  617. (unsigned long long)(paddr + size - 1),
  618. cattr_name(pcm));
  619. *vma_prot = __pgprot((pgprot_val(*vma_prot) &
  620. (~_PAGE_CACHE_MASK)) |
  621. cachemode2protval(pcm));
  622. }
  623. return 0;
  624. }
  625. ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm);
  626. if (ret)
  627. return ret;
  628. if (pcm != want_pcm) {
  629. if (strict_prot ||
  630. !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
  631. free_memtype(paddr, paddr + size);
  632. printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
  633. " for [mem %#010Lx-%#010Lx], got %s\n",
  634. current->comm, current->pid,
  635. cattr_name(want_pcm),
  636. (unsigned long long)paddr,
  637. (unsigned long long)(paddr + size - 1),
  638. cattr_name(pcm));
  639. return -EINVAL;
  640. }
  641. /*
  642. * We allow returning different type than the one requested in
  643. * non strict case.
  644. */
  645. *vma_prot = __pgprot((pgprot_val(*vma_prot) &
  646. (~_PAGE_CACHE_MASK)) |
  647. cachemode2protval(pcm));
  648. }
  649. if (kernel_map_sync_memtype(paddr, size, pcm) < 0) {
  650. free_memtype(paddr, paddr + size);
  651. return -EINVAL;
  652. }
  653. return 0;
  654. }
  655. /*
  656. * Internal interface to free a range of physical memory.
  657. * Frees non RAM regions only.
  658. */
  659. static void free_pfn_range(u64 paddr, unsigned long size)
  660. {
  661. int is_ram;
  662. is_ram = pat_pagerange_is_ram(paddr, paddr + size);
  663. if (is_ram == 0)
  664. free_memtype(paddr, paddr + size);
  665. }
  666. /*
  667. * track_pfn_copy is called when vma that is covering the pfnmap gets
  668. * copied through copy_page_range().
  669. *
  670. * If the vma has a linear pfn mapping for the entire range, we get the prot
  671. * from pte and reserve the entire vma range with single reserve_pfn_range call.
  672. */
  673. int track_pfn_copy(struct vm_area_struct *vma)
  674. {
  675. resource_size_t paddr;
  676. unsigned long prot;
  677. unsigned long vma_size = vma->vm_end - vma->vm_start;
  678. pgprot_t pgprot;
  679. if (vma->vm_flags & VM_PAT) {
  680. /*
  681. * reserve the whole chunk covered by vma. We need the
  682. * starting address and protection from pte.
  683. */
  684. if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
  685. WARN_ON_ONCE(1);
  686. return -EINVAL;
  687. }
  688. pgprot = __pgprot(prot);
  689. return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
  690. }
  691. return 0;
  692. }
  693. /*
  694. * prot is passed in as a parameter for the new mapping. If the vma has a
  695. * linear pfn mapping for the entire range reserve the entire vma range with
  696. * single reserve_pfn_range call.
  697. */
  698. int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
  699. unsigned long pfn, unsigned long addr, unsigned long size)
  700. {
  701. resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
  702. enum page_cache_mode pcm;
  703. /* reserve the whole chunk starting from paddr */
  704. if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
  705. int ret;
  706. ret = reserve_pfn_range(paddr, size, prot, 0);
  707. if (!ret)
  708. vma->vm_flags |= VM_PAT;
  709. return ret;
  710. }
  711. if (!pat_enabled)
  712. return 0;
  713. /*
  714. * For anything smaller than the vma size we set prot based on the
  715. * lookup.
  716. */
  717. pcm = lookup_memtype(paddr);
  718. /* Check memtype for the remaining pages */
  719. while (size > PAGE_SIZE) {
  720. size -= PAGE_SIZE;
  721. paddr += PAGE_SIZE;
  722. if (pcm != lookup_memtype(paddr))
  723. return -EINVAL;
  724. }
  725. *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
  726. cachemode2protval(pcm));
  727. return 0;
  728. }
  729. int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
  730. unsigned long pfn)
  731. {
  732. enum page_cache_mode pcm;
  733. if (!pat_enabled)
  734. return 0;
  735. /* Set prot based on lookup */
  736. pcm = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
  737. *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
  738. cachemode2protval(pcm));
  739. return 0;
  740. }
  741. /*
  742. * untrack_pfn is called while unmapping a pfnmap for a region.
  743. * untrack can be called for a specific region indicated by pfn and size or
  744. * can be for the entire vma (in which case pfn, size are zero).
  745. */
  746. void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
  747. unsigned long size)
  748. {
  749. resource_size_t paddr;
  750. unsigned long prot;
  751. if (!(vma->vm_flags & VM_PAT))
  752. return;
  753. /* free the chunk starting from pfn or the whole chunk */
  754. paddr = (resource_size_t)pfn << PAGE_SHIFT;
  755. if (!paddr && !size) {
  756. if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
  757. WARN_ON_ONCE(1);
  758. return;
  759. }
  760. size = vma->vm_end - vma->vm_start;
  761. }
  762. free_pfn_range(paddr, size);
  763. vma->vm_flags &= ~VM_PAT;
  764. }
  765. pgprot_t pgprot_writecombine(pgprot_t prot)
  766. {
  767. if (pat_enabled)
  768. return __pgprot(pgprot_val(prot) |
  769. cachemode2protval(_PAGE_CACHE_MODE_WC));
  770. else
  771. return pgprot_noncached(prot);
  772. }
  773. EXPORT_SYMBOL_GPL(pgprot_writecombine);
  774. #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
  775. static struct memtype *memtype_get_idx(loff_t pos)
  776. {
  777. struct memtype *print_entry;
  778. int ret;
  779. print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
  780. if (!print_entry)
  781. return NULL;
  782. spin_lock(&memtype_lock);
  783. ret = rbt_memtype_copy_nth_element(print_entry, pos);
  784. spin_unlock(&memtype_lock);
  785. if (!ret) {
  786. return print_entry;
  787. } else {
  788. kfree(print_entry);
  789. return NULL;
  790. }
  791. }
  792. static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
  793. {
  794. if (*pos == 0) {
  795. ++*pos;
  796. seq_puts(seq, "PAT memtype list:\n");
  797. }
  798. return memtype_get_idx(*pos);
  799. }
  800. static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  801. {
  802. ++*pos;
  803. return memtype_get_idx(*pos);
  804. }
  805. static void memtype_seq_stop(struct seq_file *seq, void *v)
  806. {
  807. }
  808. static int memtype_seq_show(struct seq_file *seq, void *v)
  809. {
  810. struct memtype *print_entry = (struct memtype *)v;
  811. seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
  812. print_entry->start, print_entry->end);
  813. kfree(print_entry);
  814. return 0;
  815. }
  816. static const struct seq_operations memtype_seq_ops = {
  817. .start = memtype_seq_start,
  818. .next = memtype_seq_next,
  819. .stop = memtype_seq_stop,
  820. .show = memtype_seq_show,
  821. };
  822. static int memtype_seq_open(struct inode *inode, struct file *file)
  823. {
  824. return seq_open(file, &memtype_seq_ops);
  825. }
  826. static const struct file_operations memtype_fops = {
  827. .open = memtype_seq_open,
  828. .read = seq_read,
  829. .llseek = seq_lseek,
  830. .release = seq_release,
  831. };
  832. static int __init pat_memtype_list_init(void)
  833. {
  834. if (pat_enabled) {
  835. debugfs_create_file("pat_memtype_list", S_IRUSR,
  836. arch_debugfs_dir, NULL, &memtype_fops);
  837. }
  838. return 0;
  839. }
  840. late_initcall(pat_memtype_list_init);
  841. #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */