util.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. #include <linux/mm.h>
  2. #include <linux/slab.h>
  3. #include <linux/string.h>
  4. #include <linux/compiler.h>
  5. #include <linux/export.h>
  6. #include <linux/err.h>
  7. #include <linux/sched.h>
  8. #include <linux/security.h>
  9. #include <linux/swap.h>
  10. #include <linux/swapops.h>
  11. #include <linux/mman.h>
  12. #include <linux/hugetlb.h>
  13. #include <asm/uaccess.h>
  14. #include "internal.h"
  15. #define CREATE_TRACE_POINTS
  16. #include <trace/events/kmem.h>
  17. /**
  18. * kstrdup - allocate space for and copy an existing string
  19. * @s: the string to duplicate
  20. * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  21. */
  22. char *kstrdup(const char *s, gfp_t gfp)
  23. {
  24. size_t len;
  25. char *buf;
  26. if (!s)
  27. return NULL;
  28. len = strlen(s) + 1;
  29. buf = kmalloc_track_caller(len, gfp);
  30. if (buf)
  31. memcpy(buf, s, len);
  32. return buf;
  33. }
  34. EXPORT_SYMBOL(kstrdup);
  35. /**
  36. * kstrndup - allocate space for and copy an existing string
  37. * @s: the string to duplicate
  38. * @max: read at most @max chars from @s
  39. * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  40. */
  41. char *kstrndup(const char *s, size_t max, gfp_t gfp)
  42. {
  43. size_t len;
  44. char *buf;
  45. if (!s)
  46. return NULL;
  47. len = strnlen(s, max);
  48. buf = kmalloc_track_caller(len+1, gfp);
  49. if (buf) {
  50. memcpy(buf, s, len);
  51. buf[len] = '\0';
  52. }
  53. return buf;
  54. }
  55. EXPORT_SYMBOL(kstrndup);
  56. /**
  57. * kmemdup - duplicate region of memory
  58. *
  59. * @src: memory region to duplicate
  60. * @len: memory region length
  61. * @gfp: GFP mask to use
  62. */
  63. void *kmemdup(const void *src, size_t len, gfp_t gfp)
  64. {
  65. void *p;
  66. p = kmalloc_track_caller(len, gfp);
  67. if (p)
  68. memcpy(p, src, len);
  69. return p;
  70. }
  71. EXPORT_SYMBOL(kmemdup);
  72. /**
  73. * memdup_user - duplicate memory region from user space
  74. *
  75. * @src: source address in user space
  76. * @len: number of bytes to copy
  77. *
  78. * Returns an ERR_PTR() on failure.
  79. */
  80. void *memdup_user(const void __user *src, size_t len)
  81. {
  82. void *p;
  83. /*
  84. * Always use GFP_KERNEL, since copy_from_user() can sleep and
  85. * cause pagefault, which makes it pointless to use GFP_NOFS
  86. * or GFP_ATOMIC.
  87. */
  88. p = kmalloc_track_caller(len, GFP_KERNEL);
  89. if (!p)
  90. return ERR_PTR(-ENOMEM);
  91. if (copy_from_user(p, src, len)) {
  92. kfree(p);
  93. return ERR_PTR(-EFAULT);
  94. }
  95. return p;
  96. }
  97. EXPORT_SYMBOL(memdup_user);
  98. static __always_inline void *__do_krealloc(const void *p, size_t new_size,
  99. gfp_t flags)
  100. {
  101. void *ret;
  102. size_t ks = 0;
  103. if (p)
  104. ks = ksize(p);
  105. if (ks >= new_size)
  106. return (void *)p;
  107. ret = kmalloc_track_caller(new_size, flags);
  108. if (ret && p)
  109. memcpy(ret, p, ks);
  110. return ret;
  111. }
  112. /**
  113. * __krealloc - like krealloc() but don't free @p.
  114. * @p: object to reallocate memory for.
  115. * @new_size: how many bytes of memory are required.
  116. * @flags: the type of memory to allocate.
  117. *
  118. * This function is like krealloc() except it never frees the originally
  119. * allocated buffer. Use this if you don't want to free the buffer immediately
  120. * like, for example, with RCU.
  121. */
  122. void *__krealloc(const void *p, size_t new_size, gfp_t flags)
  123. {
  124. if (unlikely(!new_size))
  125. return ZERO_SIZE_PTR;
  126. return __do_krealloc(p, new_size, flags);
  127. }
  128. EXPORT_SYMBOL(__krealloc);
  129. /**
  130. * krealloc - reallocate memory. The contents will remain unchanged.
  131. * @p: object to reallocate memory for.
  132. * @new_size: how many bytes of memory are required.
  133. * @flags: the type of memory to allocate.
  134. *
  135. * The contents of the object pointed to are preserved up to the
  136. * lesser of the new and old sizes. If @p is %NULL, krealloc()
  137. * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
  138. * %NULL pointer, the object pointed to is freed.
  139. */
  140. void *krealloc(const void *p, size_t new_size, gfp_t flags)
  141. {
  142. void *ret;
  143. if (unlikely(!new_size)) {
  144. kfree(p);
  145. return ZERO_SIZE_PTR;
  146. }
  147. ret = __do_krealloc(p, new_size, flags);
  148. if (ret && p != ret)
  149. kfree(p);
  150. return ret;
  151. }
  152. EXPORT_SYMBOL(krealloc);
  153. /**
  154. * kzfree - like kfree but zero memory
  155. * @p: object to free memory of
  156. *
  157. * The memory of the object @p points to is zeroed before freed.
  158. * If @p is %NULL, kzfree() does nothing.
  159. *
  160. * Note: this function zeroes the whole allocated buffer which can be a good
  161. * deal bigger than the requested buffer size passed to kmalloc(). So be
  162. * careful when using this function in performance sensitive code.
  163. */
  164. void kzfree(const void *p)
  165. {
  166. size_t ks;
  167. void *mem = (void *)p;
  168. if (unlikely(ZERO_OR_NULL_PTR(mem)))
  169. return;
  170. ks = ksize(mem);
  171. memset(mem, 0, ks);
  172. kfree(mem);
  173. }
  174. EXPORT_SYMBOL(kzfree);
  175. /*
  176. * strndup_user - duplicate an existing string from user space
  177. * @s: The string to duplicate
  178. * @n: Maximum number of bytes to copy, including the trailing NUL.
  179. */
  180. char *strndup_user(const char __user *s, long n)
  181. {
  182. char *p;
  183. long length;
  184. length = strnlen_user(s, n);
  185. if (!length)
  186. return ERR_PTR(-EFAULT);
  187. if (length > n)
  188. return ERR_PTR(-EINVAL);
  189. p = memdup_user(s, length);
  190. if (IS_ERR(p))
  191. return p;
  192. p[length - 1] = '\0';
  193. return p;
  194. }
  195. EXPORT_SYMBOL(strndup_user);
  196. void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
  197. struct vm_area_struct *prev, struct rb_node *rb_parent)
  198. {
  199. struct vm_area_struct *next;
  200. vma->vm_prev = prev;
  201. if (prev) {
  202. next = prev->vm_next;
  203. prev->vm_next = vma;
  204. } else {
  205. mm->mmap = vma;
  206. if (rb_parent)
  207. next = rb_entry(rb_parent,
  208. struct vm_area_struct, vm_rb);
  209. else
  210. next = NULL;
  211. }
  212. vma->vm_next = next;
  213. if (next)
  214. next->vm_prev = vma;
  215. }
  216. /* Check if the vma is being used as a stack by this task */
  217. static int vm_is_stack_for_task(struct task_struct *t,
  218. struct vm_area_struct *vma)
  219. {
  220. return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
  221. }
  222. /*
  223. * Check if the vma is being used as a stack.
  224. * If is_group is non-zero, check in the entire thread group or else
  225. * just check in the current task. Returns the pid of the task that
  226. * the vma is stack for.
  227. */
  228. pid_t vm_is_stack(struct task_struct *task,
  229. struct vm_area_struct *vma, int in_group)
  230. {
  231. pid_t ret = 0;
  232. if (vm_is_stack_for_task(task, vma))
  233. return task->pid;
  234. if (in_group) {
  235. struct task_struct *t;
  236. rcu_read_lock();
  237. if (!pid_alive(task))
  238. goto done;
  239. t = task;
  240. do {
  241. if (vm_is_stack_for_task(t, vma)) {
  242. ret = t->pid;
  243. goto done;
  244. }
  245. } while_each_thread(task, t);
  246. done:
  247. rcu_read_unlock();
  248. }
  249. return ret;
  250. }
  251. #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
  252. void arch_pick_mmap_layout(struct mm_struct *mm)
  253. {
  254. mm->mmap_base = TASK_UNMAPPED_BASE;
  255. mm->get_unmapped_area = arch_get_unmapped_area;
  256. }
  257. #endif
  258. /*
  259. * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  260. * back to the regular GUP.
  261. * If the architecture not support this function, simply return with no
  262. * page pinned
  263. */
  264. int __weak __get_user_pages_fast(unsigned long start,
  265. int nr_pages, int write, struct page **pages)
  266. {
  267. return 0;
  268. }
  269. EXPORT_SYMBOL_GPL(__get_user_pages_fast);
  270. /**
  271. * get_user_pages_fast() - pin user pages in memory
  272. * @start: starting user address
  273. * @nr_pages: number of pages from start to pin
  274. * @write: whether pages will be written to
  275. * @pages: array that receives pointers to the pages pinned.
  276. * Should be at least nr_pages long.
  277. *
  278. * Returns number of pages pinned. This may be fewer than the number
  279. * requested. If nr_pages is 0 or negative, returns 0. If no pages
  280. * were pinned, returns -errno.
  281. *
  282. * get_user_pages_fast provides equivalent functionality to get_user_pages,
  283. * operating on current and current->mm, with force=0 and vma=NULL. However
  284. * unlike get_user_pages, it must be called without mmap_sem held.
  285. *
  286. * get_user_pages_fast may take mmap_sem and page table locks, so no
  287. * assumptions can be made about lack of locking. get_user_pages_fast is to be
  288. * implemented in a way that is advantageous (vs get_user_pages()) when the
  289. * user memory area is already faulted in and present in ptes. However if the
  290. * pages have to be faulted in, it may turn out to be slightly slower so
  291. * callers need to carefully consider what to use. On many architectures,
  292. * get_user_pages_fast simply falls back to get_user_pages.
  293. */
  294. int __weak get_user_pages_fast(unsigned long start,
  295. int nr_pages, int write, struct page **pages)
  296. {
  297. struct mm_struct *mm = current->mm;
  298. int ret;
  299. down_read(&mm->mmap_sem);
  300. ret = get_user_pages(current, mm, start, nr_pages,
  301. write, 0, pages, NULL);
  302. up_read(&mm->mmap_sem);
  303. return ret;
  304. }
  305. EXPORT_SYMBOL_GPL(get_user_pages_fast);
  306. unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
  307. unsigned long len, unsigned long prot,
  308. unsigned long flag, unsigned long pgoff)
  309. {
  310. unsigned long ret;
  311. struct mm_struct *mm = current->mm;
  312. unsigned long populate;
  313. ret = security_mmap_file(file, prot, flag);
  314. if (!ret) {
  315. down_write(&mm->mmap_sem);
  316. ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
  317. &populate);
  318. up_write(&mm->mmap_sem);
  319. if (populate)
  320. mm_populate(ret, populate);
  321. }
  322. return ret;
  323. }
  324. unsigned long vm_mmap(struct file *file, unsigned long addr,
  325. unsigned long len, unsigned long prot,
  326. unsigned long flag, unsigned long offset)
  327. {
  328. if (unlikely(offset + PAGE_ALIGN(len) < offset))
  329. return -EINVAL;
  330. if (unlikely(offset & ~PAGE_MASK))
  331. return -EINVAL;
  332. return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
  333. }
  334. EXPORT_SYMBOL(vm_mmap);
  335. struct address_space *page_mapping(struct page *page)
  336. {
  337. struct address_space *mapping = page->mapping;
  338. /* This happens if someone calls flush_dcache_page on slab page */
  339. if (unlikely(PageSlab(page)))
  340. return NULL;
  341. if (unlikely(PageSwapCache(page))) {
  342. swp_entry_t entry;
  343. entry.val = page_private(page);
  344. mapping = swap_address_space(entry);
  345. } else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
  346. mapping = NULL;
  347. return mapping;
  348. }
  349. int overcommit_ratio_handler(struct ctl_table *table, int write,
  350. void __user *buffer, size_t *lenp,
  351. loff_t *ppos)
  352. {
  353. int ret;
  354. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  355. if (ret == 0 && write)
  356. sysctl_overcommit_kbytes = 0;
  357. return ret;
  358. }
  359. int overcommit_kbytes_handler(struct ctl_table *table, int write,
  360. void __user *buffer, size_t *lenp,
  361. loff_t *ppos)
  362. {
  363. int ret;
  364. ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
  365. if (ret == 0 && write)
  366. sysctl_overcommit_ratio = 0;
  367. return ret;
  368. }
  369. /*
  370. * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
  371. */
  372. unsigned long vm_commit_limit(void)
  373. {
  374. unsigned long allowed;
  375. if (sysctl_overcommit_kbytes)
  376. allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
  377. else
  378. allowed = ((totalram_pages - hugetlb_total_pages())
  379. * sysctl_overcommit_ratio / 100);
  380. allowed += total_swap_pages;
  381. return allowed;
  382. }
  383. /* Tracepoints definitions. */
  384. EXPORT_TRACEPOINT_SYMBOL(kmalloc);
  385. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
  386. EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
  387. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
  388. EXPORT_TRACEPOINT_SYMBOL(kfree);
  389. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);