memremap.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #ifndef _LINUX_MEMREMAP_H_
  2. #define _LINUX_MEMREMAP_H_
  3. #include <linux/mm.h>
  4. #include <linux/ioport.h>
  5. #include <linux/percpu-refcount.h>
  6. #include <asm/pgtable.h>
  7. struct resource;
  8. struct device;
  9. /**
  10. * struct vmem_altmap - pre-allocated storage for vmemmap_populate
  11. * @base_pfn: base of the entire dev_pagemap mapping
  12. * @reserve: pages mapped, but reserved for driver use (relative to @base)
  13. * @free: free pages set aside in the mapping for memmap storage
  14. * @align: pages reserved to meet allocation alignments
  15. * @alloc: track pages consumed, private to vmemmap_populate()
  16. */
  17. struct vmem_altmap {
  18. const unsigned long base_pfn;
  19. const unsigned long reserve;
  20. unsigned long free;
  21. unsigned long align;
  22. unsigned long alloc;
  23. };
  24. unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
  25. void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
  26. #ifdef CONFIG_ZONE_DEVICE
  27. struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
  28. #else
  29. static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
  30. {
  31. return NULL;
  32. }
  33. #endif
  34. /*
  35. * Specialize ZONE_DEVICE memory into multiple types each having differents
  36. * usage.
  37. *
  38. * MEMORY_DEVICE_HOST:
  39. * Persistent device memory (pmem): struct page might be allocated in different
  40. * memory and architecture might want to perform special actions. It is similar
  41. * to regular memory, in that the CPU can access it transparently. However,
  42. * it is likely to have different bandwidth and latency than regular memory.
  43. * See Documentation/nvdimm/nvdimm.txt for more information.
  44. *
  45. * MEMORY_DEVICE_PRIVATE:
  46. * Device memory that is not directly addressable by the CPU: CPU can neither
  47. * read nor write private memory. In this case, we do still have struct pages
  48. * backing the device memory. Doing so simplifies the implementation, but it is
  49. * important to remember that there are certain points at which the struct page
  50. * must be treated as an opaque object, rather than a "normal" struct page.
  51. *
  52. * A more complete discussion of unaddressable memory may be found in
  53. * include/linux/hmm.h and Documentation/vm/hmm.txt.
  54. */
  55. enum memory_type {
  56. MEMORY_DEVICE_HOST = 0,
  57. MEMORY_DEVICE_PRIVATE,
  58. };
  59. /*
  60. * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two
  61. * callbacks:
  62. * page_fault()
  63. * page_free()
  64. *
  65. * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
  66. * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief
  67. * explanation in include/linux/memory_hotplug.h.
  68. *
  69. * The page_fault() callback must migrate page back, from device memory to
  70. * system memory, so that the CPU can access it. This might fail for various
  71. * reasons (device issues, device have been unplugged, ...). When such error
  72. * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and
  73. * set the CPU page table entry to "poisoned".
  74. *
  75. * Note that because memory cgroup charges are transferred to the device memory,
  76. * this should never fail due to memory restrictions. However, allocation
  77. * of a regular system page might still fail because we are out of memory. If
  78. * that happens, the page_fault() callback must return VM_FAULT_OOM.
  79. *
  80. * The page_fault() callback can also try to migrate back multiple pages in one
  81. * chunk, as an optimization. It must, however, prioritize the faulting address
  82. * over all the others.
  83. *
  84. *
  85. * The page_free() callback is called once the page refcount reaches 1
  86. * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
  87. * This allows the device driver to implement its own memory management.)
  88. */
  89. typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
  90. unsigned long addr,
  91. const struct page *page,
  92. unsigned int flags,
  93. pmd_t *pmdp);
  94. typedef void (*dev_page_free_t)(struct page *page, void *data);
  95. /**
  96. * struct dev_pagemap - metadata for ZONE_DEVICE mappings
  97. * @page_fault: callback when CPU fault on an unaddressable device page
  98. * @page_free: free page callback when page refcount reaches 1
  99. * @altmap: pre-allocated/reserved memory for vmemmap allocations
  100. * @res: physical address range covered by @ref
  101. * @ref: reference count that pins the devm_memremap_pages() mapping
  102. * @dev: host device of the mapping for debug
  103. * @data: private data pointer for page_free()
  104. * @type: memory type: see MEMORY_* in memory_hotplug.h
  105. */
  106. struct dev_pagemap {
  107. dev_page_fault_t page_fault;
  108. dev_page_free_t page_free;
  109. struct vmem_altmap *altmap;
  110. const struct resource *res;
  111. struct percpu_ref *ref;
  112. struct device *dev;
  113. void *data;
  114. enum memory_type type;
  115. };
  116. #ifdef CONFIG_ZONE_DEVICE
  117. void *devm_memremap_pages(struct device *dev, struct resource *res,
  118. struct percpu_ref *ref, struct vmem_altmap *altmap);
  119. struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
  120. static inline bool is_zone_device_page(const struct page *page);
  121. static inline bool is_device_private_page(const struct page *page)
  122. {
  123. return is_zone_device_page(page) &&
  124. page->pgmap->type == MEMORY_DEVICE_PRIVATE;
  125. }
  126. #else
  127. static inline void *devm_memremap_pages(struct device *dev,
  128. struct resource *res, struct percpu_ref *ref,
  129. struct vmem_altmap *altmap)
  130. {
  131. /*
  132. * Fail attempts to call devm_memremap_pages() without
  133. * ZONE_DEVICE support enabled, this requires callers to fall
  134. * back to plain devm_memremap() based on config
  135. */
  136. WARN_ON_ONCE(1);
  137. return ERR_PTR(-ENXIO);
  138. }
  139. static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
  140. {
  141. return NULL;
  142. }
  143. static inline bool is_device_private_page(const struct page *page)
  144. {
  145. return false;
  146. }
  147. #endif
  148. /**
  149. * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
  150. * @pfn: page frame number to lookup page_map
  151. * @pgmap: optional known pgmap that already has a reference
  152. *
  153. * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
  154. * same mapping.
  155. */
  156. static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  157. struct dev_pagemap *pgmap)
  158. {
  159. const struct resource *res = pgmap ? pgmap->res : NULL;
  160. resource_size_t phys = PFN_PHYS(pfn);
  161. /*
  162. * In the cached case we're already holding a live reference so
  163. * we can simply do a blind increment
  164. */
  165. if (res && phys >= res->start && phys <= res->end) {
  166. percpu_ref_get(pgmap->ref);
  167. return pgmap;
  168. }
  169. /* fall back to slow path lookup */
  170. rcu_read_lock();
  171. pgmap = find_dev_pagemap(phys);
  172. if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
  173. pgmap = NULL;
  174. rcu_read_unlock();
  175. return pgmap;
  176. }
  177. static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
  178. {
  179. if (pgmap)
  180. percpu_ref_put(pgmap->ref);
  181. }
  182. #endif /* _LINUX_MEMREMAP_H_ */