xdp_umem.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* XDP user-space packet buffer
  3. * Copyright(c) 2018 Intel Corporation.
  4. */
  5. #include <linux/init.h>
  6. #include <linux/sched/mm.h>
  7. #include <linux/sched/signal.h>
  8. #include <linux/sched/task.h>
  9. #include <linux/uaccess.h>
  10. #include <linux/slab.h>
  11. #include <linux/bpf.h>
  12. #include <linux/mm.h>
  13. #include "xdp_umem.h"
  14. #include "xsk_queue.h"
  15. #define XDP_UMEM_MIN_CHUNK_SIZE 2048
  16. void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  17. {
  18. unsigned long flags;
  19. spin_lock_irqsave(&umem->xsk_list_lock, flags);
  20. list_add_rcu(&xs->list, &umem->xsk_list);
  21. spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  22. }
  23. void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  24. {
  25. unsigned long flags;
  26. if (xs->dev) {
  27. spin_lock_irqsave(&umem->xsk_list_lock, flags);
  28. list_del_rcu(&xs->list);
  29. spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  30. if (umem->zc)
  31. synchronize_net();
  32. }
  33. }
  34. int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
  35. u32 queue_id, u16 flags)
  36. {
  37. bool force_zc, force_copy;
  38. struct netdev_bpf bpf;
  39. int err;
  40. force_zc = flags & XDP_ZEROCOPY;
  41. force_copy = flags & XDP_COPY;
  42. if (force_zc && force_copy)
  43. return -EINVAL;
  44. if (force_copy)
  45. return 0;
  46. dev_hold(dev);
  47. if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
  48. bpf.command = XDP_QUERY_XSK_UMEM;
  49. rtnl_lock();
  50. err = dev->netdev_ops->ndo_bpf(dev, &bpf);
  51. rtnl_unlock();
  52. if (err) {
  53. dev_put(dev);
  54. return force_zc ? -ENOTSUPP : 0;
  55. }
  56. bpf.command = XDP_SETUP_XSK_UMEM;
  57. bpf.xsk.umem = umem;
  58. bpf.xsk.queue_id = queue_id;
  59. rtnl_lock();
  60. err = dev->netdev_ops->ndo_bpf(dev, &bpf);
  61. rtnl_unlock();
  62. if (err) {
  63. dev_put(dev);
  64. return force_zc ? err : 0; /* fail or fallback */
  65. }
  66. umem->dev = dev;
  67. umem->queue_id = queue_id;
  68. umem->zc = true;
  69. return 0;
  70. }
  71. dev_put(dev);
  72. return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
  73. }
  74. static void xdp_umem_clear_dev(struct xdp_umem *umem)
  75. {
  76. struct netdev_bpf bpf;
  77. int err;
  78. if (umem->dev) {
  79. bpf.command = XDP_SETUP_XSK_UMEM;
  80. bpf.xsk.umem = NULL;
  81. bpf.xsk.queue_id = umem->queue_id;
  82. rtnl_lock();
  83. err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
  84. rtnl_unlock();
  85. if (err)
  86. WARN(1, "failed to disable umem!\n");
  87. dev_put(umem->dev);
  88. umem->dev = NULL;
  89. }
  90. }
  91. static void xdp_umem_unpin_pages(struct xdp_umem *umem)
  92. {
  93. unsigned int i;
  94. for (i = 0; i < umem->npgs; i++) {
  95. struct page *page = umem->pgs[i];
  96. set_page_dirty_lock(page);
  97. put_page(page);
  98. }
  99. kfree(umem->pgs);
  100. umem->pgs = NULL;
  101. }
  102. static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
  103. {
  104. if (umem->user) {
  105. atomic_long_sub(umem->npgs, &umem->user->locked_vm);
  106. free_uid(umem->user);
  107. }
  108. }
  109. static void xdp_umem_release(struct xdp_umem *umem)
  110. {
  111. struct task_struct *task;
  112. struct mm_struct *mm;
  113. xdp_umem_clear_dev(umem);
  114. if (umem->fq) {
  115. xskq_destroy(umem->fq);
  116. umem->fq = NULL;
  117. }
  118. if (umem->cq) {
  119. xskq_destroy(umem->cq);
  120. umem->cq = NULL;
  121. }
  122. xdp_umem_unpin_pages(umem);
  123. task = get_pid_task(umem->pid, PIDTYPE_PID);
  124. put_pid(umem->pid);
  125. if (!task)
  126. goto out;
  127. mm = get_task_mm(task);
  128. put_task_struct(task);
  129. if (!mm)
  130. goto out;
  131. mmput(mm);
  132. kfree(umem->pages);
  133. umem->pages = NULL;
  134. xdp_umem_unaccount_pages(umem);
  135. out:
  136. kfree(umem);
  137. }
  138. static void xdp_umem_release_deferred(struct work_struct *work)
  139. {
  140. struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
  141. xdp_umem_release(umem);
  142. }
  143. void xdp_get_umem(struct xdp_umem *umem)
  144. {
  145. refcount_inc(&umem->users);
  146. }
  147. void xdp_put_umem(struct xdp_umem *umem)
  148. {
  149. if (!umem)
  150. return;
  151. if (refcount_dec_and_test(&umem->users)) {
  152. INIT_WORK(&umem->work, xdp_umem_release_deferred);
  153. schedule_work(&umem->work);
  154. }
  155. }
  156. static int xdp_umem_pin_pages(struct xdp_umem *umem)
  157. {
  158. unsigned int gup_flags = FOLL_WRITE;
  159. long npgs;
  160. int err;
  161. umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
  162. GFP_KERNEL | __GFP_NOWARN);
  163. if (!umem->pgs)
  164. return -ENOMEM;
  165. down_write(&current->mm->mmap_sem);
  166. npgs = get_user_pages(umem->address, umem->npgs,
  167. gup_flags, &umem->pgs[0], NULL);
  168. up_write(&current->mm->mmap_sem);
  169. if (npgs != umem->npgs) {
  170. if (npgs >= 0) {
  171. umem->npgs = npgs;
  172. err = -ENOMEM;
  173. goto out_pin;
  174. }
  175. err = npgs;
  176. goto out_pgs;
  177. }
  178. return 0;
  179. out_pin:
  180. xdp_umem_unpin_pages(umem);
  181. out_pgs:
  182. kfree(umem->pgs);
  183. umem->pgs = NULL;
  184. return err;
  185. }
  186. static int xdp_umem_account_pages(struct xdp_umem *umem)
  187. {
  188. unsigned long lock_limit, new_npgs, old_npgs;
  189. if (capable(CAP_IPC_LOCK))
  190. return 0;
  191. lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  192. umem->user = get_uid(current_user());
  193. do {
  194. old_npgs = atomic_long_read(&umem->user->locked_vm);
  195. new_npgs = old_npgs + umem->npgs;
  196. if (new_npgs > lock_limit) {
  197. free_uid(umem->user);
  198. umem->user = NULL;
  199. return -ENOBUFS;
  200. }
  201. } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
  202. new_npgs) != old_npgs);
  203. return 0;
  204. }
  205. static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
  206. {
  207. u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
  208. unsigned int chunks, chunks_per_page;
  209. u64 addr = mr->addr, size = mr->len;
  210. int size_chk, err, i;
  211. if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
  212. /* Strictly speaking we could support this, if:
  213. * - huge pages, or*
  214. * - using an IOMMU, or
  215. * - making sure the memory area is consecutive
  216. * but for now, we simply say "computer says no".
  217. */
  218. return -EINVAL;
  219. }
  220. if (!is_power_of_2(chunk_size))
  221. return -EINVAL;
  222. if (!PAGE_ALIGNED(addr)) {
  223. /* Memory area has to be page size aligned. For
  224. * simplicity, this might change.
  225. */
  226. return -EINVAL;
  227. }
  228. if ((addr + size) < addr)
  229. return -EINVAL;
  230. chunks = (unsigned int)div_u64(size, chunk_size);
  231. if (chunks == 0)
  232. return -EINVAL;
  233. chunks_per_page = PAGE_SIZE / chunk_size;
  234. if (chunks < chunks_per_page || chunks % chunks_per_page)
  235. return -EINVAL;
  236. headroom = ALIGN(headroom, 64);
  237. size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
  238. if (size_chk < 0)
  239. return -EINVAL;
  240. umem->pid = get_task_pid(current, PIDTYPE_PID);
  241. umem->address = (unsigned long)addr;
  242. umem->props.chunk_mask = ~((u64)chunk_size - 1);
  243. umem->props.size = size;
  244. umem->headroom = headroom;
  245. umem->chunk_size_nohr = chunk_size - headroom;
  246. umem->npgs = size / PAGE_SIZE;
  247. umem->pgs = NULL;
  248. umem->user = NULL;
  249. INIT_LIST_HEAD(&umem->xsk_list);
  250. spin_lock_init(&umem->xsk_list_lock);
  251. refcount_set(&umem->users, 1);
  252. err = xdp_umem_account_pages(umem);
  253. if (err)
  254. goto out;
  255. err = xdp_umem_pin_pages(umem);
  256. if (err)
  257. goto out_account;
  258. umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
  259. if (!umem->pages) {
  260. err = -ENOMEM;
  261. goto out_account;
  262. }
  263. for (i = 0; i < umem->npgs; i++)
  264. umem->pages[i].addr = page_address(umem->pgs[i]);
  265. return 0;
  266. out_account:
  267. xdp_umem_unaccount_pages(umem);
  268. out:
  269. put_pid(umem->pid);
  270. return err;
  271. }
  272. struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
  273. {
  274. struct xdp_umem *umem;
  275. int err;
  276. umem = kzalloc(sizeof(*umem), GFP_KERNEL);
  277. if (!umem)
  278. return ERR_PTR(-ENOMEM);
  279. err = xdp_umem_reg(umem, mr);
  280. if (err) {
  281. kfree(umem);
  282. return ERR_PTR(err);
  283. }
  284. return umem;
  285. }
  286. bool xdp_umem_validate_queues(struct xdp_umem *umem)
  287. {
  288. return umem->fq && umem->cq;
  289. }