mmu_rb.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /*
  2. * Copyright(c) 2016 - 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <linux/list.h>
  48. #include <linux/rculist.h>
  49. #include <linux/mmu_notifier.h>
  50. #include <linux/interval_tree_generic.h>
  51. #include "mmu_rb.h"
  52. #include "trace.h"
  53. struct mmu_rb_handler {
  54. struct mmu_notifier mn;
  55. struct rb_root_cached root;
  56. void *ops_arg;
  57. spinlock_t lock; /* protect the RB tree */
  58. struct mmu_rb_ops *ops;
  59. struct mm_struct *mm;
  60. struct list_head lru_list;
  61. struct work_struct del_work;
  62. struct list_head del_list;
  63. struct workqueue_struct *wq;
  64. };
  65. static unsigned long mmu_node_start(struct mmu_rb_node *);
  66. static unsigned long mmu_node_last(struct mmu_rb_node *);
  67. static inline void mmu_notifier_range_start(struct mmu_notifier *,
  68. struct mm_struct *,
  69. unsigned long, unsigned long);
  70. static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
  71. struct mm_struct *,
  72. unsigned long, unsigned long);
  73. static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
  74. unsigned long, unsigned long);
  75. static void do_remove(struct mmu_rb_handler *handler,
  76. struct list_head *del_list);
  77. static void handle_remove(struct work_struct *work);
  78. static const struct mmu_notifier_ops mn_opts = {
  79. .invalidate_range_start = mmu_notifier_range_start,
  80. };
  81. INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
  82. mmu_node_start, mmu_node_last, static, __mmu_int_rb);
  83. static unsigned long mmu_node_start(struct mmu_rb_node *node)
  84. {
  85. return node->addr & PAGE_MASK;
  86. }
  87. static unsigned long mmu_node_last(struct mmu_rb_node *node)
  88. {
  89. return PAGE_ALIGN(node->addr + node->len) - 1;
  90. }
  91. int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
  92. struct mmu_rb_ops *ops,
  93. struct workqueue_struct *wq,
  94. struct mmu_rb_handler **handler)
  95. {
  96. struct mmu_rb_handler *handlr;
  97. int ret;
  98. handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
  99. if (!handlr)
  100. return -ENOMEM;
  101. handlr->root = RB_ROOT_CACHED;
  102. handlr->ops = ops;
  103. handlr->ops_arg = ops_arg;
  104. INIT_HLIST_NODE(&handlr->mn.hlist);
  105. spin_lock_init(&handlr->lock);
  106. handlr->mn.ops = &mn_opts;
  107. handlr->mm = mm;
  108. INIT_WORK(&handlr->del_work, handle_remove);
  109. INIT_LIST_HEAD(&handlr->del_list);
  110. INIT_LIST_HEAD(&handlr->lru_list);
  111. handlr->wq = wq;
  112. ret = mmu_notifier_register(&handlr->mn, handlr->mm);
  113. if (ret) {
  114. kfree(handlr);
  115. return ret;
  116. }
  117. *handler = handlr;
  118. return 0;
  119. }
  120. void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
  121. {
  122. struct mmu_rb_node *rbnode;
  123. struct rb_node *node;
  124. unsigned long flags;
  125. struct list_head del_list;
  126. /* Unregister first so we don't get any more notifications. */
  127. mmu_notifier_unregister(&handler->mn, handler->mm);
  128. /*
  129. * Make sure the wq delete handler is finished running. It will not
  130. * be triggered once the mmu notifiers are unregistered above.
  131. */
  132. flush_work(&handler->del_work);
  133. INIT_LIST_HEAD(&del_list);
  134. spin_lock_irqsave(&handler->lock, flags);
  135. while ((node = rb_first_cached(&handler->root))) {
  136. rbnode = rb_entry(node, struct mmu_rb_node, node);
  137. rb_erase_cached(node, &handler->root);
  138. /* move from LRU list to delete list */
  139. list_move(&rbnode->list, &del_list);
  140. }
  141. spin_unlock_irqrestore(&handler->lock, flags);
  142. do_remove(handler, &del_list);
  143. kfree(handler);
  144. }
  145. int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
  146. struct mmu_rb_node *mnode)
  147. {
  148. struct mmu_rb_node *node;
  149. unsigned long flags;
  150. int ret = 0;
  151. trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
  152. spin_lock_irqsave(&handler->lock, flags);
  153. node = __mmu_rb_search(handler, mnode->addr, mnode->len);
  154. if (node) {
  155. ret = -EINVAL;
  156. goto unlock;
  157. }
  158. __mmu_int_rb_insert(mnode, &handler->root);
  159. list_add(&mnode->list, &handler->lru_list);
  160. ret = handler->ops->insert(handler->ops_arg, mnode);
  161. if (ret) {
  162. __mmu_int_rb_remove(mnode, &handler->root);
  163. list_del(&mnode->list); /* remove from LRU list */
  164. }
  165. unlock:
  166. spin_unlock_irqrestore(&handler->lock, flags);
  167. return ret;
  168. }
  169. /* Caller must hold handler lock */
  170. static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
  171. unsigned long addr,
  172. unsigned long len)
  173. {
  174. struct mmu_rb_node *node = NULL;
  175. trace_hfi1_mmu_rb_search(addr, len);
  176. if (!handler->ops->filter) {
  177. node = __mmu_int_rb_iter_first(&handler->root, addr,
  178. (addr + len) - 1);
  179. } else {
  180. for (node = __mmu_int_rb_iter_first(&handler->root, addr,
  181. (addr + len) - 1);
  182. node;
  183. node = __mmu_int_rb_iter_next(node, addr,
  184. (addr + len) - 1)) {
  185. if (handler->ops->filter(node, addr, len))
  186. return node;
  187. }
  188. }
  189. return node;
  190. }
  191. bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
  192. unsigned long addr, unsigned long len,
  193. struct mmu_rb_node **rb_node)
  194. {
  195. struct mmu_rb_node *node;
  196. unsigned long flags;
  197. bool ret = false;
  198. spin_lock_irqsave(&handler->lock, flags);
  199. node = __mmu_rb_search(handler, addr, len);
  200. if (node) {
  201. if (node->addr == addr && node->len == len)
  202. goto unlock;
  203. __mmu_int_rb_remove(node, &handler->root);
  204. list_del(&node->list); /* remove from LRU list */
  205. ret = true;
  206. }
  207. unlock:
  208. spin_unlock_irqrestore(&handler->lock, flags);
  209. *rb_node = node;
  210. return ret;
  211. }
  212. void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
  213. {
  214. struct mmu_rb_node *rbnode, *ptr;
  215. struct list_head del_list;
  216. unsigned long flags;
  217. bool stop = false;
  218. INIT_LIST_HEAD(&del_list);
  219. spin_lock_irqsave(&handler->lock, flags);
  220. list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
  221. list) {
  222. if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
  223. &stop)) {
  224. __mmu_int_rb_remove(rbnode, &handler->root);
  225. /* move from LRU list to delete list */
  226. list_move(&rbnode->list, &del_list);
  227. }
  228. if (stop)
  229. break;
  230. }
  231. spin_unlock_irqrestore(&handler->lock, flags);
  232. while (!list_empty(&del_list)) {
  233. rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
  234. list_del(&rbnode->list);
  235. handler->ops->remove(handler->ops_arg, rbnode);
  236. }
  237. }
  238. /*
  239. * It is up to the caller to ensure that this function does not race with the
  240. * mmu invalidate notifier which may be calling the users remove callback on
  241. * 'node'.
  242. */
  243. void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
  244. struct mmu_rb_node *node)
  245. {
  246. unsigned long flags;
  247. /* Validity of handler and node pointers has been checked by caller. */
  248. trace_hfi1_mmu_rb_remove(node->addr, node->len);
  249. spin_lock_irqsave(&handler->lock, flags);
  250. __mmu_int_rb_remove(node, &handler->root);
  251. list_del(&node->list); /* remove from LRU list */
  252. spin_unlock_irqrestore(&handler->lock, flags);
  253. handler->ops->remove(handler->ops_arg, node);
  254. }
  255. static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
  256. struct mm_struct *mm,
  257. unsigned long start,
  258. unsigned long end)
  259. {
  260. mmu_notifier_mem_invalidate(mn, mm, start, end);
  261. }
  262. static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
  263. struct mm_struct *mm,
  264. unsigned long start, unsigned long end)
  265. {
  266. struct mmu_rb_handler *handler =
  267. container_of(mn, struct mmu_rb_handler, mn);
  268. struct rb_root_cached *root = &handler->root;
  269. struct mmu_rb_node *node, *ptr = NULL;
  270. unsigned long flags;
  271. bool added = false;
  272. spin_lock_irqsave(&handler->lock, flags);
  273. for (node = __mmu_int_rb_iter_first(root, start, end - 1);
  274. node; node = ptr) {
  275. /* Guard against node removal. */
  276. ptr = __mmu_int_rb_iter_next(node, start, end - 1);
  277. trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
  278. if (handler->ops->invalidate(handler->ops_arg, node)) {
  279. __mmu_int_rb_remove(node, root);
  280. /* move from LRU list to delete list */
  281. list_move(&node->list, &handler->del_list);
  282. added = true;
  283. }
  284. }
  285. spin_unlock_irqrestore(&handler->lock, flags);
  286. if (added)
  287. queue_work(handler->wq, &handler->del_work);
  288. }
  289. /*
  290. * Call the remove function for the given handler and the list. This
  291. * is expected to be called with a delete list extracted from handler.
  292. * The caller should not be holding the handler lock.
  293. */
  294. static void do_remove(struct mmu_rb_handler *handler,
  295. struct list_head *del_list)
  296. {
  297. struct mmu_rb_node *node;
  298. while (!list_empty(del_list)) {
  299. node = list_first_entry(del_list, struct mmu_rb_node, list);
  300. list_del(&node->list);
  301. handler->ops->remove(handler->ops_arg, node);
  302. }
  303. }
  304. /*
  305. * Work queue function to remove all nodes that have been queued up to
  306. * be removed. The key feature is that mm->mmap_sem is not being held
  307. * and the remove callback can sleep while taking it, if needed.
  308. */
  309. static void handle_remove(struct work_struct *work)
  310. {
  311. struct mmu_rb_handler *handler = container_of(work,
  312. struct mmu_rb_handler,
  313. del_work);
  314. struct list_head del_list;
  315. unsigned long flags;
  316. /* remove anything that is queued to get removed */
  317. spin_lock_irqsave(&handler->lock, flags);
  318. list_replace_init(&handler->del_list, &del_list);
  319. spin_unlock_irqrestore(&handler->lock, flags);
  320. do_remove(handler, &del_list);
  321. }