radix-tree.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630
  1. /*
  2. * Copyright (C) 2001 Momchil Velikov
  3. * Portions Copyright (C) 2001 Christoph Hellwig
  4. * Copyright (C) 2005 SGI, Christoph Lameter
  5. * Copyright (C) 2006 Nick Piggin
  6. * Copyright (C) 2012 Konstantin Khlebnikov
  7. * Copyright (C) 2016 Intel, Matthew Wilcox
  8. * Copyright (C) 2016 Intel, Ross Zwisler
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License as
  12. * published by the Free Software Foundation; either version 2, or (at
  13. * your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  23. */
  24. #include <linux/bitmap.h>
  25. #include <linux/bitops.h>
  26. #include <linux/bug.h>
  27. #include <linux/cpu.h>
  28. #include <linux/errno.h>
  29. #include <linux/export.h>
  30. #include <linux/idr.h>
  31. #include <linux/init.h>
  32. #include <linux/kernel.h>
  33. #include <linux/kmemleak.h>
  34. #include <linux/percpu.h>
  35. #include <linux/preempt.h> /* in_interrupt() */
  36. #include <linux/radix-tree.h>
  37. #include <linux/rcupdate.h>
  38. #include <linux/slab.h>
  39. #include <linux/string.h>
  40. #include <linux/xarray.h>
  41. /*
  42. * Radix tree node cache.
  43. */
  44. struct kmem_cache *radix_tree_node_cachep;
  45. /*
  46. * The radix tree is variable-height, so an insert operation not only has
  47. * to build the branch to its corresponding item, it also has to build the
  48. * branch to existing items if the size has to be increased (by
  49. * radix_tree_extend).
  50. *
  51. * The worst case is a zero height tree with just a single item at index 0,
  52. * and then inserting an item at index ULONG_MAX. This requires 2 new branches
  53. * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
  54. * Hence:
  55. */
  56. #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
  57. /*
  58. * The IDR does not have to be as high as the radix tree since it uses
  59. * signed integers, not unsigned longs.
  60. */
  61. #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1)
  62. #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \
  63. RADIX_TREE_MAP_SHIFT))
  64. #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1)
  65. /*
  66. * The IDA is even shorter since it uses a bitmap at the last level.
  67. */
  68. #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS))
  69. #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \
  70. RADIX_TREE_MAP_SHIFT))
  71. #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1)
  72. /*
  73. * Per-cpu pool of preloaded nodes
  74. */
  75. struct radix_tree_preload {
  76. unsigned nr;
  77. /* nodes->parent points to next preallocated node */
  78. struct radix_tree_node *nodes;
  79. };
  80. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  81. static inline struct radix_tree_node *entry_to_node(void *ptr)
  82. {
  83. return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
  84. }
  85. static inline void *node_to_entry(void *ptr)
  86. {
  87. return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
  88. }
  89. #define RADIX_TREE_RETRY XA_RETRY_ENTRY
  90. static inline unsigned long
  91. get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
  92. {
  93. return parent ? slot - parent->slots : 0;
  94. }
  95. static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
  96. struct radix_tree_node **nodep, unsigned long index)
  97. {
  98. unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
  99. void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
  100. *nodep = (void *)entry;
  101. return offset;
  102. }
  103. static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
  104. {
  105. return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
  106. }
  107. static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
  108. int offset)
  109. {
  110. __set_bit(offset, node->tags[tag]);
  111. }
  112. static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
  113. int offset)
  114. {
  115. __clear_bit(offset, node->tags[tag]);
  116. }
  117. static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
  118. int offset)
  119. {
  120. return test_bit(offset, node->tags[tag]);
  121. }
  122. static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
  123. {
  124. root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
  125. }
  126. static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
  127. {
  128. root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
  129. }
  130. static inline void root_tag_clear_all(struct radix_tree_root *root)
  131. {
  132. root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
  133. }
  134. static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
  135. {
  136. return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
  137. }
  138. static inline unsigned root_tags_get(const struct radix_tree_root *root)
  139. {
  140. return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
  141. }
  142. static inline bool is_idr(const struct radix_tree_root *root)
  143. {
  144. return !!(root->xa_flags & ROOT_IS_IDR);
  145. }
  146. /*
  147. * Returns 1 if any slot in the node has this tag set.
  148. * Otherwise returns 0.
  149. */
  150. static inline int any_tag_set(const struct radix_tree_node *node,
  151. unsigned int tag)
  152. {
  153. unsigned idx;
  154. for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
  155. if (node->tags[tag][idx])
  156. return 1;
  157. }
  158. return 0;
  159. }
  160. static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
  161. {
  162. bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
  163. }
  164. /**
  165. * radix_tree_find_next_bit - find the next set bit in a memory region
  166. *
  167. * @addr: The address to base the search on
  168. * @size: The bitmap size in bits
  169. * @offset: The bitnumber to start searching at
  170. *
  171. * Unrollable variant of find_next_bit() for constant size arrays.
  172. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
  173. * Returns next bit offset, or size if nothing found.
  174. */
  175. static __always_inline unsigned long
  176. radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
  177. unsigned long offset)
  178. {
  179. const unsigned long *addr = node->tags[tag];
  180. if (offset < RADIX_TREE_MAP_SIZE) {
  181. unsigned long tmp;
  182. addr += offset / BITS_PER_LONG;
  183. tmp = *addr >> (offset % BITS_PER_LONG);
  184. if (tmp)
  185. return __ffs(tmp) + offset;
  186. offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
  187. while (offset < RADIX_TREE_MAP_SIZE) {
  188. tmp = *++addr;
  189. if (tmp)
  190. return __ffs(tmp) + offset;
  191. offset += BITS_PER_LONG;
  192. }
  193. }
  194. return RADIX_TREE_MAP_SIZE;
  195. }
  196. static unsigned int iter_offset(const struct radix_tree_iter *iter)
  197. {
  198. return iter->index & RADIX_TREE_MAP_MASK;
  199. }
  200. /*
  201. * The maximum index which can be stored in a radix tree
  202. */
  203. static inline unsigned long shift_maxindex(unsigned int shift)
  204. {
  205. return (RADIX_TREE_MAP_SIZE << shift) - 1;
  206. }
  207. static inline unsigned long node_maxindex(const struct radix_tree_node *node)
  208. {
  209. return shift_maxindex(node->shift);
  210. }
  211. static unsigned long next_index(unsigned long index,
  212. const struct radix_tree_node *node,
  213. unsigned long offset)
  214. {
  215. return (index & ~node_maxindex(node)) + (offset << node->shift);
  216. }
  217. /*
  218. * This assumes that the caller has performed appropriate preallocation, and
  219. * that the caller has pinned this thread of control to the current CPU.
  220. */
  221. static struct radix_tree_node *
  222. radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
  223. struct radix_tree_root *root,
  224. unsigned int shift, unsigned int offset,
  225. unsigned int count, unsigned int nr_values)
  226. {
  227. struct radix_tree_node *ret = NULL;
  228. /*
  229. * Preload code isn't irq safe and it doesn't make sense to use
  230. * preloading during an interrupt anyway as all the allocations have
  231. * to be atomic. So just do normal allocation when in interrupt.
  232. */
  233. if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
  234. struct radix_tree_preload *rtp;
  235. /*
  236. * Even if the caller has preloaded, try to allocate from the
  237. * cache first for the new node to get accounted to the memory
  238. * cgroup.
  239. */
  240. ret = kmem_cache_alloc(radix_tree_node_cachep,
  241. gfp_mask | __GFP_NOWARN);
  242. if (ret)
  243. goto out;
  244. /*
  245. * Provided the caller has preloaded here, we will always
  246. * succeed in getting a node here (and never reach
  247. * kmem_cache_alloc)
  248. */
  249. rtp = this_cpu_ptr(&radix_tree_preloads);
  250. if (rtp->nr) {
  251. ret = rtp->nodes;
  252. rtp->nodes = ret->parent;
  253. rtp->nr--;
  254. }
  255. /*
  256. * Update the allocation stack trace as this is more useful
  257. * for debugging.
  258. */
  259. kmemleak_update_trace(ret);
  260. goto out;
  261. }
  262. ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  263. out:
  264. BUG_ON(radix_tree_is_internal_node(ret));
  265. if (ret) {
  266. ret->shift = shift;
  267. ret->offset = offset;
  268. ret->count = count;
  269. ret->nr_values = nr_values;
  270. ret->parent = parent;
  271. ret->array = root;
  272. }
  273. return ret;
  274. }
  275. void radix_tree_node_rcu_free(struct rcu_head *head)
  276. {
  277. struct radix_tree_node *node =
  278. container_of(head, struct radix_tree_node, rcu_head);
  279. /*
  280. * Must only free zeroed nodes into the slab. We can be left with
  281. * non-NULL entries by radix_tree_free_nodes, so clear the entries
  282. * and tags here.
  283. */
  284. memset(node->slots, 0, sizeof(node->slots));
  285. memset(node->tags, 0, sizeof(node->tags));
  286. INIT_LIST_HEAD(&node->private_list);
  287. kmem_cache_free(radix_tree_node_cachep, node);
  288. }
  289. static inline void
  290. radix_tree_node_free(struct radix_tree_node *node)
  291. {
  292. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  293. }
  294. /*
  295. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  296. * ensure that the addition of a single element in the tree cannot fail. On
  297. * success, return zero, with preemption disabled. On error, return -ENOMEM
  298. * with preemption not disabled.
  299. *
  300. * To make use of this facility, the radix tree must be initialised without
  301. * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  302. */
  303. static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
  304. {
  305. struct radix_tree_preload *rtp;
  306. struct radix_tree_node *node;
  307. int ret = -ENOMEM;
  308. /*
  309. * Nodes preloaded by one cgroup can be be used by another cgroup, so
  310. * they should never be accounted to any particular memory cgroup.
  311. */
  312. gfp_mask &= ~__GFP_ACCOUNT;
  313. preempt_disable();
  314. rtp = this_cpu_ptr(&radix_tree_preloads);
  315. while (rtp->nr < nr) {
  316. preempt_enable();
  317. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  318. if (node == NULL)
  319. goto out;
  320. preempt_disable();
  321. rtp = this_cpu_ptr(&radix_tree_preloads);
  322. if (rtp->nr < nr) {
  323. node->parent = rtp->nodes;
  324. rtp->nodes = node;
  325. rtp->nr++;
  326. } else {
  327. kmem_cache_free(radix_tree_node_cachep, node);
  328. }
  329. }
  330. ret = 0;
  331. out:
  332. return ret;
  333. }
  334. /*
  335. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  336. * ensure that the addition of a single element in the tree cannot fail. On
  337. * success, return zero, with preemption disabled. On error, return -ENOMEM
  338. * with preemption not disabled.
  339. *
  340. * To make use of this facility, the radix tree must be initialised without
  341. * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  342. */
  343. int radix_tree_preload(gfp_t gfp_mask)
  344. {
  345. /* Warn on non-sensical use... */
  346. WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
  347. return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
  348. }
  349. EXPORT_SYMBOL(radix_tree_preload);
  350. /*
  351. * The same as above function, except we don't guarantee preloading happens.
  352. * We do it, if we decide it helps. On success, return zero with preemption
  353. * disabled. On error, return -ENOMEM with preemption not disabled.
  354. */
  355. int radix_tree_maybe_preload(gfp_t gfp_mask)
  356. {
  357. if (gfpflags_allow_blocking(gfp_mask))
  358. return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
  359. /* Preloading doesn't help anything with this gfp mask, skip it */
  360. preempt_disable();
  361. return 0;
  362. }
  363. EXPORT_SYMBOL(radix_tree_maybe_preload);
  364. static unsigned radix_tree_load_root(const struct radix_tree_root *root,
  365. struct radix_tree_node **nodep, unsigned long *maxindex)
  366. {
  367. struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
  368. *nodep = node;
  369. if (likely(radix_tree_is_internal_node(node))) {
  370. node = entry_to_node(node);
  371. *maxindex = node_maxindex(node);
  372. return node->shift + RADIX_TREE_MAP_SHIFT;
  373. }
  374. *maxindex = 0;
  375. return 0;
  376. }
  377. /*
  378. * Extend a radix tree so it can store key @index.
  379. */
  380. static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
  381. unsigned long index, unsigned int shift)
  382. {
  383. void *entry;
  384. unsigned int maxshift;
  385. int tag;
  386. /* Figure out what the shift should be. */
  387. maxshift = shift;
  388. while (index > shift_maxindex(maxshift))
  389. maxshift += RADIX_TREE_MAP_SHIFT;
  390. entry = rcu_dereference_raw(root->xa_head);
  391. if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
  392. goto out;
  393. do {
  394. struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
  395. root, shift, 0, 1, 0);
  396. if (!node)
  397. return -ENOMEM;
  398. if (is_idr(root)) {
  399. all_tag_set(node, IDR_FREE);
  400. if (!root_tag_get(root, IDR_FREE)) {
  401. tag_clear(node, IDR_FREE, 0);
  402. root_tag_set(root, IDR_FREE);
  403. }
  404. } else {
  405. /* Propagate the aggregated tag info to the new child */
  406. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  407. if (root_tag_get(root, tag))
  408. tag_set(node, tag, 0);
  409. }
  410. }
  411. BUG_ON(shift > BITS_PER_LONG);
  412. if (radix_tree_is_internal_node(entry)) {
  413. entry_to_node(entry)->parent = node;
  414. } else if (xa_is_value(entry)) {
  415. /* Moving a value entry root->xa_head to a node */
  416. node->nr_values = 1;
  417. }
  418. /*
  419. * entry was already in the radix tree, so we do not need
  420. * rcu_assign_pointer here
  421. */
  422. node->slots[0] = (void __rcu *)entry;
  423. entry = node_to_entry(node);
  424. rcu_assign_pointer(root->xa_head, entry);
  425. shift += RADIX_TREE_MAP_SHIFT;
  426. } while (shift <= maxshift);
  427. out:
  428. return maxshift + RADIX_TREE_MAP_SHIFT;
  429. }
  430. /**
  431. * radix_tree_shrink - shrink radix tree to minimum height
  432. * @root radix tree root
  433. */
  434. static inline bool radix_tree_shrink(struct radix_tree_root *root)
  435. {
  436. bool shrunk = false;
  437. for (;;) {
  438. struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
  439. struct radix_tree_node *child;
  440. if (!radix_tree_is_internal_node(node))
  441. break;
  442. node = entry_to_node(node);
  443. /*
  444. * The candidate node has more than one child, or its child
  445. * is not at the leftmost slot, we cannot shrink.
  446. */
  447. if (node->count != 1)
  448. break;
  449. child = rcu_dereference_raw(node->slots[0]);
  450. if (!child)
  451. break;
  452. /*
  453. * For an IDR, we must not shrink entry 0 into the root in
  454. * case somebody calls idr_replace() with a pointer that
  455. * appears to be an internal entry
  456. */
  457. if (!node->shift && is_idr(root))
  458. break;
  459. if (radix_tree_is_internal_node(child))
  460. entry_to_node(child)->parent = NULL;
  461. /*
  462. * We don't need rcu_assign_pointer(), since we are simply
  463. * moving the node from one part of the tree to another: if it
  464. * was safe to dereference the old pointer to it
  465. * (node->slots[0]), it will be safe to dereference the new
  466. * one (root->xa_head) as far as dependent read barriers go.
  467. */
  468. root->xa_head = (void __rcu *)child;
  469. if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
  470. root_tag_clear(root, IDR_FREE);
  471. /*
  472. * We have a dilemma here. The node's slot[0] must not be
  473. * NULLed in case there are concurrent lookups expecting to
  474. * find the item. However if this was a bottom-level node,
  475. * then it may be subject to the slot pointer being visible
  476. * to callers dereferencing it. If item corresponding to
  477. * slot[0] is subsequently deleted, these callers would expect
  478. * their slot to become empty sooner or later.
  479. *
  480. * For example, lockless pagecache will look up a slot, deref
  481. * the page pointer, and if the page has 0 refcount it means it
  482. * was concurrently deleted from pagecache so try the deref
  483. * again. Fortunately there is already a requirement for logic
  484. * to retry the entire slot lookup -- the indirect pointer
  485. * problem (replacing direct root node with an indirect pointer
  486. * also results in a stale slot). So tag the slot as indirect
  487. * to force callers to retry.
  488. */
  489. node->count = 0;
  490. if (!radix_tree_is_internal_node(child)) {
  491. node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
  492. }
  493. WARN_ON_ONCE(!list_empty(&node->private_list));
  494. radix_tree_node_free(node);
  495. shrunk = true;
  496. }
  497. return shrunk;
  498. }
  499. static bool delete_node(struct radix_tree_root *root,
  500. struct radix_tree_node *node)
  501. {
  502. bool deleted = false;
  503. do {
  504. struct radix_tree_node *parent;
  505. if (node->count) {
  506. if (node_to_entry(node) ==
  507. rcu_dereference_raw(root->xa_head))
  508. deleted |= radix_tree_shrink(root);
  509. return deleted;
  510. }
  511. parent = node->parent;
  512. if (parent) {
  513. parent->slots[node->offset] = NULL;
  514. parent->count--;
  515. } else {
  516. /*
  517. * Shouldn't the tags already have all been cleared
  518. * by the caller?
  519. */
  520. if (!is_idr(root))
  521. root_tag_clear_all(root);
  522. root->xa_head = NULL;
  523. }
  524. WARN_ON_ONCE(!list_empty(&node->private_list));
  525. radix_tree_node_free(node);
  526. deleted = true;
  527. node = parent;
  528. } while (node);
  529. return deleted;
  530. }
  531. /**
  532. * __radix_tree_create - create a slot in a radix tree
  533. * @root: radix tree root
  534. * @index: index key
  535. * @nodep: returns node
  536. * @slotp: returns slot
  537. *
  538. * Create, if necessary, and return the node and slot for an item
  539. * at position @index in the radix tree @root.
  540. *
  541. * Until there is more than one item in the tree, no nodes are
  542. * allocated and @root->xa_head is used as a direct slot instead of
  543. * pointing to a node, in which case *@nodep will be NULL.
  544. *
  545. * Returns -ENOMEM, or 0 for success.
  546. */
  547. static int __radix_tree_create(struct radix_tree_root *root,
  548. unsigned long index, struct radix_tree_node **nodep,
  549. void __rcu ***slotp)
  550. {
  551. struct radix_tree_node *node = NULL, *child;
  552. void __rcu **slot = (void __rcu **)&root->xa_head;
  553. unsigned long maxindex;
  554. unsigned int shift, offset = 0;
  555. unsigned long max = index;
  556. gfp_t gfp = root_gfp_mask(root);
  557. shift = radix_tree_load_root(root, &child, &maxindex);
  558. /* Make sure the tree is high enough. */
  559. if (max > maxindex) {
  560. int error = radix_tree_extend(root, gfp, max, shift);
  561. if (error < 0)
  562. return error;
  563. shift = error;
  564. child = rcu_dereference_raw(root->xa_head);
  565. }
  566. while (shift > 0) {
  567. shift -= RADIX_TREE_MAP_SHIFT;
  568. if (child == NULL) {
  569. /* Have to add a child node. */
  570. child = radix_tree_node_alloc(gfp, node, root, shift,
  571. offset, 0, 0);
  572. if (!child)
  573. return -ENOMEM;
  574. rcu_assign_pointer(*slot, node_to_entry(child));
  575. if (node)
  576. node->count++;
  577. } else if (!radix_tree_is_internal_node(child))
  578. break;
  579. /* Go a level down */
  580. node = entry_to_node(child);
  581. offset = radix_tree_descend(node, &child, index);
  582. slot = &node->slots[offset];
  583. }
  584. if (nodep)
  585. *nodep = node;
  586. if (slotp)
  587. *slotp = slot;
  588. return 0;
  589. }
  590. /*
  591. * Free any nodes below this node. The tree is presumed to not need
  592. * shrinking, and any user data in the tree is presumed to not need a
  593. * destructor called on it. If we need to add a destructor, we can
  594. * add that functionality later. Note that we may not clear tags or
  595. * slots from the tree as an RCU walker may still have a pointer into
  596. * this subtree. We could replace the entries with RADIX_TREE_RETRY,
  597. * but we'll still have to clear those in rcu_free.
  598. */
  599. static void radix_tree_free_nodes(struct radix_tree_node *node)
  600. {
  601. unsigned offset = 0;
  602. struct radix_tree_node *child = entry_to_node(node);
  603. for (;;) {
  604. void *entry = rcu_dereference_raw(child->slots[offset]);
  605. if (xa_is_node(entry) && child->shift) {
  606. child = entry_to_node(entry);
  607. offset = 0;
  608. continue;
  609. }
  610. offset++;
  611. while (offset == RADIX_TREE_MAP_SIZE) {
  612. struct radix_tree_node *old = child;
  613. offset = child->offset + 1;
  614. child = child->parent;
  615. WARN_ON_ONCE(!list_empty(&old->private_list));
  616. radix_tree_node_free(old);
  617. if (old == entry_to_node(node))
  618. return;
  619. }
  620. }
  621. }
  622. static inline int insert_entries(struct radix_tree_node *node,
  623. void __rcu **slot, void *item, bool replace)
  624. {
  625. if (*slot)
  626. return -EEXIST;
  627. rcu_assign_pointer(*slot, item);
  628. if (node) {
  629. node->count++;
  630. if (xa_is_value(item))
  631. node->nr_values++;
  632. }
  633. return 1;
  634. }
  635. /**
  636. * __radix_tree_insert - insert into a radix tree
  637. * @root: radix tree root
  638. * @index: index key
  639. * @item: item to insert
  640. *
  641. * Insert an item into the radix tree at position @index.
  642. */
  643. int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
  644. void *item)
  645. {
  646. struct radix_tree_node *node;
  647. void __rcu **slot;
  648. int error;
  649. BUG_ON(radix_tree_is_internal_node(item));
  650. error = __radix_tree_create(root, index, &node, &slot);
  651. if (error)
  652. return error;
  653. error = insert_entries(node, slot, item, false);
  654. if (error < 0)
  655. return error;
  656. if (node) {
  657. unsigned offset = get_slot_offset(node, slot);
  658. BUG_ON(tag_get(node, 0, offset));
  659. BUG_ON(tag_get(node, 1, offset));
  660. BUG_ON(tag_get(node, 2, offset));
  661. } else {
  662. BUG_ON(root_tags_get(root));
  663. }
  664. return 0;
  665. }
  666. EXPORT_SYMBOL(radix_tree_insert);
  667. /**
  668. * __radix_tree_lookup - lookup an item in a radix tree
  669. * @root: radix tree root
  670. * @index: index key
  671. * @nodep: returns node
  672. * @slotp: returns slot
  673. *
  674. * Lookup and return the item at position @index in the radix
  675. * tree @root.
  676. *
  677. * Until there is more than one item in the tree, no nodes are
  678. * allocated and @root->xa_head is used as a direct slot instead of
  679. * pointing to a node, in which case *@nodep will be NULL.
  680. */
  681. void *__radix_tree_lookup(const struct radix_tree_root *root,
  682. unsigned long index, struct radix_tree_node **nodep,
  683. void __rcu ***slotp)
  684. {
  685. struct radix_tree_node *node, *parent;
  686. unsigned long maxindex;
  687. void __rcu **slot;
  688. restart:
  689. parent = NULL;
  690. slot = (void __rcu **)&root->xa_head;
  691. radix_tree_load_root(root, &node, &maxindex);
  692. if (index > maxindex)
  693. return NULL;
  694. while (radix_tree_is_internal_node(node)) {
  695. unsigned offset;
  696. parent = entry_to_node(node);
  697. offset = radix_tree_descend(parent, &node, index);
  698. slot = parent->slots + offset;
  699. if (node == RADIX_TREE_RETRY)
  700. goto restart;
  701. if (parent->shift == 0)
  702. break;
  703. }
  704. if (nodep)
  705. *nodep = parent;
  706. if (slotp)
  707. *slotp = slot;
  708. return node;
  709. }
  710. /**
  711. * radix_tree_lookup_slot - lookup a slot in a radix tree
  712. * @root: radix tree root
  713. * @index: index key
  714. *
  715. * Returns: the slot corresponding to the position @index in the
  716. * radix tree @root. This is useful for update-if-exists operations.
  717. *
  718. * This function can be called under rcu_read_lock iff the slot is not
  719. * modified by radix_tree_replace_slot, otherwise it must be called
  720. * exclusive from other writers. Any dereference of the slot must be done
  721. * using radix_tree_deref_slot.
  722. */
  723. void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
  724. unsigned long index)
  725. {
  726. void __rcu **slot;
  727. if (!__radix_tree_lookup(root, index, NULL, &slot))
  728. return NULL;
  729. return slot;
  730. }
  731. EXPORT_SYMBOL(radix_tree_lookup_slot);
  732. /**
  733. * radix_tree_lookup - perform lookup operation on a radix tree
  734. * @root: radix tree root
  735. * @index: index key
  736. *
  737. * Lookup the item at the position @index in the radix tree @root.
  738. *
  739. * This function can be called under rcu_read_lock, however the caller
  740. * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
  741. * them safely). No RCU barriers are required to access or modify the
  742. * returned item, however.
  743. */
  744. void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
  745. {
  746. return __radix_tree_lookup(root, index, NULL, NULL);
  747. }
  748. EXPORT_SYMBOL(radix_tree_lookup);
  749. static void replace_slot(void __rcu **slot, void *item,
  750. struct radix_tree_node *node, int count, int values)
  751. {
  752. if (node && (count || values)) {
  753. node->count += count;
  754. node->nr_values += values;
  755. }
  756. rcu_assign_pointer(*slot, item);
  757. }
  758. static bool node_tag_get(const struct radix_tree_root *root,
  759. const struct radix_tree_node *node,
  760. unsigned int tag, unsigned int offset)
  761. {
  762. if (node)
  763. return tag_get(node, tag, offset);
  764. return root_tag_get(root, tag);
  765. }
  766. /*
  767. * IDR users want to be able to store NULL in the tree, so if the slot isn't
  768. * free, don't adjust the count, even if it's transitioning between NULL and
  769. * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still
  770. * have empty bits, but it only stores NULL in slots when they're being
  771. * deleted.
  772. */
  773. static int calculate_count(struct radix_tree_root *root,
  774. struct radix_tree_node *node, void __rcu **slot,
  775. void *item, void *old)
  776. {
  777. if (is_idr(root)) {
  778. unsigned offset = get_slot_offset(node, slot);
  779. bool free = node_tag_get(root, node, IDR_FREE, offset);
  780. if (!free)
  781. return 0;
  782. if (!old)
  783. return 1;
  784. }
  785. return !!item - !!old;
  786. }
  787. /**
  788. * __radix_tree_replace - replace item in a slot
  789. * @root: radix tree root
  790. * @node: pointer to tree node
  791. * @slot: pointer to slot in @node
  792. * @item: new item to store in the slot.
  793. *
  794. * For use with __radix_tree_lookup(). Caller must hold tree write locked
  795. * across slot lookup and replacement.
  796. */
  797. void __radix_tree_replace(struct radix_tree_root *root,
  798. struct radix_tree_node *node,
  799. void __rcu **slot, void *item)
  800. {
  801. void *old = rcu_dereference_raw(*slot);
  802. int values = !!xa_is_value(item) - !!xa_is_value(old);
  803. int count = calculate_count(root, node, slot, item, old);
  804. /*
  805. * This function supports replacing value entries and
  806. * deleting entries, but that needs accounting against the
  807. * node unless the slot is root->xa_head.
  808. */
  809. WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
  810. (count || values));
  811. replace_slot(slot, item, node, count, values);
  812. if (!node)
  813. return;
  814. delete_node(root, node);
  815. }
  816. /**
  817. * radix_tree_replace_slot - replace item in a slot
  818. * @root: radix tree root
  819. * @slot: pointer to slot
  820. * @item: new item to store in the slot.
  821. *
  822. * For use with radix_tree_lookup_slot() and
  823. * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
  824. * across slot lookup and replacement.
  825. *
  826. * NOTE: This cannot be used to switch between non-entries (empty slots),
  827. * regular entries, and value entries, as that requires accounting
  828. * inside the radix tree node. When switching from one type of entry or
  829. * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
  830. * radix_tree_iter_replace().
  831. */
  832. void radix_tree_replace_slot(struct radix_tree_root *root,
  833. void __rcu **slot, void *item)
  834. {
  835. __radix_tree_replace(root, NULL, slot, item);
  836. }
  837. EXPORT_SYMBOL(radix_tree_replace_slot);
  838. /**
  839. * radix_tree_iter_replace - replace item in a slot
  840. * @root: radix tree root
  841. * @slot: pointer to slot
  842. * @item: new item to store in the slot.
  843. *
  844. * For use with radix_tree_for_each_slot().
  845. * Caller must hold tree write locked.
  846. */
  847. void radix_tree_iter_replace(struct radix_tree_root *root,
  848. const struct radix_tree_iter *iter,
  849. void __rcu **slot, void *item)
  850. {
  851. __radix_tree_replace(root, iter->node, slot, item);
  852. }
  853. static void node_tag_set(struct radix_tree_root *root,
  854. struct radix_tree_node *node,
  855. unsigned int tag, unsigned int offset)
  856. {
  857. while (node) {
  858. if (tag_get(node, tag, offset))
  859. return;
  860. tag_set(node, tag, offset);
  861. offset = node->offset;
  862. node = node->parent;
  863. }
  864. if (!root_tag_get(root, tag))
  865. root_tag_set(root, tag);
  866. }
  867. /**
  868. * radix_tree_tag_set - set a tag on a radix tree node
  869. * @root: radix tree root
  870. * @index: index key
  871. * @tag: tag index
  872. *
  873. * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
  874. * corresponding to @index in the radix tree. From
  875. * the root all the way down to the leaf node.
  876. *
  877. * Returns the address of the tagged item. Setting a tag on a not-present
  878. * item is a bug.
  879. */
  880. void *radix_tree_tag_set(struct radix_tree_root *root,
  881. unsigned long index, unsigned int tag)
  882. {
  883. struct radix_tree_node *node, *parent;
  884. unsigned long maxindex;
  885. radix_tree_load_root(root, &node, &maxindex);
  886. BUG_ON(index > maxindex);
  887. while (radix_tree_is_internal_node(node)) {
  888. unsigned offset;
  889. parent = entry_to_node(node);
  890. offset = radix_tree_descend(parent, &node, index);
  891. BUG_ON(!node);
  892. if (!tag_get(parent, tag, offset))
  893. tag_set(parent, tag, offset);
  894. }
  895. /* set the root's tag bit */
  896. if (!root_tag_get(root, tag))
  897. root_tag_set(root, tag);
  898. return node;
  899. }
  900. EXPORT_SYMBOL(radix_tree_tag_set);
  901. static void node_tag_clear(struct radix_tree_root *root,
  902. struct radix_tree_node *node,
  903. unsigned int tag, unsigned int offset)
  904. {
  905. while (node) {
  906. if (!tag_get(node, tag, offset))
  907. return;
  908. tag_clear(node, tag, offset);
  909. if (any_tag_set(node, tag))
  910. return;
  911. offset = node->offset;
  912. node = node->parent;
  913. }
  914. /* clear the root's tag bit */
  915. if (root_tag_get(root, tag))
  916. root_tag_clear(root, tag);
  917. }
  918. /**
  919. * radix_tree_tag_clear - clear a tag on a radix tree node
  920. * @root: radix tree root
  921. * @index: index key
  922. * @tag: tag index
  923. *
  924. * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
  925. * corresponding to @index in the radix tree. If this causes
  926. * the leaf node to have no tags set then clear the tag in the
  927. * next-to-leaf node, etc.
  928. *
  929. * Returns the address of the tagged item on success, else NULL. ie:
  930. * has the same return value and semantics as radix_tree_lookup().
  931. */
  932. void *radix_tree_tag_clear(struct radix_tree_root *root,
  933. unsigned long index, unsigned int tag)
  934. {
  935. struct radix_tree_node *node, *parent;
  936. unsigned long maxindex;
  937. int uninitialized_var(offset);
  938. radix_tree_load_root(root, &node, &maxindex);
  939. if (index > maxindex)
  940. return NULL;
  941. parent = NULL;
  942. while (radix_tree_is_internal_node(node)) {
  943. parent = entry_to_node(node);
  944. offset = radix_tree_descend(parent, &node, index);
  945. }
  946. if (node)
  947. node_tag_clear(root, parent, tag, offset);
  948. return node;
  949. }
  950. EXPORT_SYMBOL(radix_tree_tag_clear);
  951. /**
  952. * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
  953. * @root: radix tree root
  954. * @iter: iterator state
  955. * @tag: tag to clear
  956. */
  957. void radix_tree_iter_tag_clear(struct radix_tree_root *root,
  958. const struct radix_tree_iter *iter, unsigned int tag)
  959. {
  960. node_tag_clear(root, iter->node, tag, iter_offset(iter));
  961. }
  962. /**
  963. * radix_tree_tag_get - get a tag on a radix tree node
  964. * @root: radix tree root
  965. * @index: index key
  966. * @tag: tag index (< RADIX_TREE_MAX_TAGS)
  967. *
  968. * Return values:
  969. *
  970. * 0: tag not present or not set
  971. * 1: tag set
  972. *
  973. * Note that the return value of this function may not be relied on, even if
  974. * the RCU lock is held, unless tag modification and node deletion are excluded
  975. * from concurrency.
  976. */
  977. int radix_tree_tag_get(const struct radix_tree_root *root,
  978. unsigned long index, unsigned int tag)
  979. {
  980. struct radix_tree_node *node, *parent;
  981. unsigned long maxindex;
  982. if (!root_tag_get(root, tag))
  983. return 0;
  984. radix_tree_load_root(root, &node, &maxindex);
  985. if (index > maxindex)
  986. return 0;
  987. while (radix_tree_is_internal_node(node)) {
  988. unsigned offset;
  989. parent = entry_to_node(node);
  990. offset = radix_tree_descend(parent, &node, index);
  991. if (!tag_get(parent, tag, offset))
  992. return 0;
  993. if (node == RADIX_TREE_RETRY)
  994. break;
  995. }
  996. return 1;
  997. }
  998. EXPORT_SYMBOL(radix_tree_tag_get);
  999. /* Construct iter->tags bit-mask from node->tags[tag] array */
  1000. static void set_iter_tags(struct radix_tree_iter *iter,
  1001. struct radix_tree_node *node, unsigned offset,
  1002. unsigned tag)
  1003. {
  1004. unsigned tag_long = offset / BITS_PER_LONG;
  1005. unsigned tag_bit = offset % BITS_PER_LONG;
  1006. if (!node) {
  1007. iter->tags = 1;
  1008. return;
  1009. }
  1010. iter->tags = node->tags[tag][tag_long] >> tag_bit;
  1011. /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
  1012. if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
  1013. /* Pick tags from next element */
  1014. if (tag_bit)
  1015. iter->tags |= node->tags[tag][tag_long + 1] <<
  1016. (BITS_PER_LONG - tag_bit);
  1017. /* Clip chunk size, here only BITS_PER_LONG tags */
  1018. iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
  1019. }
  1020. }
  1021. void __rcu **radix_tree_iter_resume(void __rcu **slot,
  1022. struct radix_tree_iter *iter)
  1023. {
  1024. slot++;
  1025. iter->index = __radix_tree_iter_add(iter, 1);
  1026. iter->next_index = iter->index;
  1027. iter->tags = 0;
  1028. return NULL;
  1029. }
  1030. EXPORT_SYMBOL(radix_tree_iter_resume);
  1031. /**
  1032. * radix_tree_next_chunk - find next chunk of slots for iteration
  1033. *
  1034. * @root: radix tree root
  1035. * @iter: iterator state
  1036. * @flags: RADIX_TREE_ITER_* flags and tag index
  1037. * Returns: pointer to chunk first slot, or NULL if iteration is over
  1038. */
  1039. void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
  1040. struct radix_tree_iter *iter, unsigned flags)
  1041. {
  1042. unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
  1043. struct radix_tree_node *node, *child;
  1044. unsigned long index, offset, maxindex;
  1045. if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
  1046. return NULL;
  1047. /*
  1048. * Catch next_index overflow after ~0UL. iter->index never overflows
  1049. * during iterating; it can be zero only at the beginning.
  1050. * And we cannot overflow iter->next_index in a single step,
  1051. * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
  1052. *
  1053. * This condition also used by radix_tree_next_slot() to stop
  1054. * contiguous iterating, and forbid switching to the next chunk.
  1055. */
  1056. index = iter->next_index;
  1057. if (!index && iter->index)
  1058. return NULL;
  1059. restart:
  1060. radix_tree_load_root(root, &child, &maxindex);
  1061. if (index > maxindex)
  1062. return NULL;
  1063. if (!child)
  1064. return NULL;
  1065. if (!radix_tree_is_internal_node(child)) {
  1066. /* Single-slot tree */
  1067. iter->index = index;
  1068. iter->next_index = maxindex + 1;
  1069. iter->tags = 1;
  1070. iter->node = NULL;
  1071. return (void __rcu **)&root->xa_head;
  1072. }
  1073. do {
  1074. node = entry_to_node(child);
  1075. offset = radix_tree_descend(node, &child, index);
  1076. if ((flags & RADIX_TREE_ITER_TAGGED) ?
  1077. !tag_get(node, tag, offset) : !child) {
  1078. /* Hole detected */
  1079. if (flags & RADIX_TREE_ITER_CONTIG)
  1080. return NULL;
  1081. if (flags & RADIX_TREE_ITER_TAGGED)
  1082. offset = radix_tree_find_next_bit(node, tag,
  1083. offset + 1);
  1084. else
  1085. while (++offset < RADIX_TREE_MAP_SIZE) {
  1086. void *slot = rcu_dereference_raw(
  1087. node->slots[offset]);
  1088. if (slot)
  1089. break;
  1090. }
  1091. index &= ~node_maxindex(node);
  1092. index += offset << node->shift;
  1093. /* Overflow after ~0UL */
  1094. if (!index)
  1095. return NULL;
  1096. if (offset == RADIX_TREE_MAP_SIZE)
  1097. goto restart;
  1098. child = rcu_dereference_raw(node->slots[offset]);
  1099. }
  1100. if (!child)
  1101. goto restart;
  1102. if (child == RADIX_TREE_RETRY)
  1103. break;
  1104. } while (node->shift && radix_tree_is_internal_node(child));
  1105. /* Update the iterator state */
  1106. iter->index = (index &~ node_maxindex(node)) | offset;
  1107. iter->next_index = (index | node_maxindex(node)) + 1;
  1108. iter->node = node;
  1109. if (flags & RADIX_TREE_ITER_TAGGED)
  1110. set_iter_tags(iter, node, offset, tag);
  1111. return node->slots + offset;
  1112. }
  1113. EXPORT_SYMBOL(radix_tree_next_chunk);
  1114. /**
  1115. * radix_tree_gang_lookup - perform multiple lookup on a radix tree
  1116. * @root: radix tree root
  1117. * @results: where the results of the lookup are placed
  1118. * @first_index: start the lookup from this key
  1119. * @max_items: place up to this many items at *results
  1120. *
  1121. * Performs an index-ascending scan of the tree for present items. Places
  1122. * them at *@results and returns the number of items which were placed at
  1123. * *@results.
  1124. *
  1125. * The implementation is naive.
  1126. *
  1127. * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
  1128. * rcu_read_lock. In this case, rather than the returned results being
  1129. * an atomic snapshot of the tree at a single point in time, the
  1130. * semantics of an RCU protected gang lookup are as though multiple
  1131. * radix_tree_lookups have been issued in individual locks, and results
  1132. * stored in 'results'.
  1133. */
  1134. unsigned int
  1135. radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
  1136. unsigned long first_index, unsigned int max_items)
  1137. {
  1138. struct radix_tree_iter iter;
  1139. void __rcu **slot;
  1140. unsigned int ret = 0;
  1141. if (unlikely(!max_items))
  1142. return 0;
  1143. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  1144. results[ret] = rcu_dereference_raw(*slot);
  1145. if (!results[ret])
  1146. continue;
  1147. if (radix_tree_is_internal_node(results[ret])) {
  1148. slot = radix_tree_iter_retry(&iter);
  1149. continue;
  1150. }
  1151. if (++ret == max_items)
  1152. break;
  1153. }
  1154. return ret;
  1155. }
  1156. EXPORT_SYMBOL(radix_tree_gang_lookup);
  1157. /**
  1158. * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  1159. * based on a tag
  1160. * @root: radix tree root
  1161. * @results: where the results of the lookup are placed
  1162. * @first_index: start the lookup from this key
  1163. * @max_items: place up to this many items at *results
  1164. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  1165. *
  1166. * Performs an index-ascending scan of the tree for present items which
  1167. * have the tag indexed by @tag set. Places the items at *@results and
  1168. * returns the number of items which were placed at *@results.
  1169. */
  1170. unsigned int
  1171. radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
  1172. unsigned long first_index, unsigned int max_items,
  1173. unsigned int tag)
  1174. {
  1175. struct radix_tree_iter iter;
  1176. void __rcu **slot;
  1177. unsigned int ret = 0;
  1178. if (unlikely(!max_items))
  1179. return 0;
  1180. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  1181. results[ret] = rcu_dereference_raw(*slot);
  1182. if (!results[ret])
  1183. continue;
  1184. if (radix_tree_is_internal_node(results[ret])) {
  1185. slot = radix_tree_iter_retry(&iter);
  1186. continue;
  1187. }
  1188. if (++ret == max_items)
  1189. break;
  1190. }
  1191. return ret;
  1192. }
  1193. EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  1194. /**
  1195. * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
  1196. * radix tree based on a tag
  1197. * @root: radix tree root
  1198. * @results: where the results of the lookup are placed
  1199. * @first_index: start the lookup from this key
  1200. * @max_items: place up to this many items at *results
  1201. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  1202. *
  1203. * Performs an index-ascending scan of the tree for present items which
  1204. * have the tag indexed by @tag set. Places the slots at *@results and
  1205. * returns the number of slots which were placed at *@results.
  1206. */
  1207. unsigned int
  1208. radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
  1209. void __rcu ***results, unsigned long first_index,
  1210. unsigned int max_items, unsigned int tag)
  1211. {
  1212. struct radix_tree_iter iter;
  1213. void __rcu **slot;
  1214. unsigned int ret = 0;
  1215. if (unlikely(!max_items))
  1216. return 0;
  1217. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  1218. results[ret] = slot;
  1219. if (++ret == max_items)
  1220. break;
  1221. }
  1222. return ret;
  1223. }
  1224. EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  1225. static bool __radix_tree_delete(struct radix_tree_root *root,
  1226. struct radix_tree_node *node, void __rcu **slot)
  1227. {
  1228. void *old = rcu_dereference_raw(*slot);
  1229. int values = xa_is_value(old) ? -1 : 0;
  1230. unsigned offset = get_slot_offset(node, slot);
  1231. int tag;
  1232. if (is_idr(root))
  1233. node_tag_set(root, node, IDR_FREE, offset);
  1234. else
  1235. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
  1236. node_tag_clear(root, node, tag, offset);
  1237. replace_slot(slot, NULL, node, -1, values);
  1238. return node && delete_node(root, node);
  1239. }
  1240. /**
  1241. * radix_tree_iter_delete - delete the entry at this iterator position
  1242. * @root: radix tree root
  1243. * @iter: iterator state
  1244. * @slot: pointer to slot
  1245. *
  1246. * Delete the entry at the position currently pointed to by the iterator.
  1247. * This may result in the current node being freed; if it is, the iterator
  1248. * is advanced so that it will not reference the freed memory. This
  1249. * function may be called without any locking if there are no other threads
  1250. * which can access this tree.
  1251. */
  1252. void radix_tree_iter_delete(struct radix_tree_root *root,
  1253. struct radix_tree_iter *iter, void __rcu **slot)
  1254. {
  1255. if (__radix_tree_delete(root, iter->node, slot))
  1256. iter->index = iter->next_index;
  1257. }
  1258. EXPORT_SYMBOL(radix_tree_iter_delete);
  1259. /**
  1260. * radix_tree_delete_item - delete an item from a radix tree
  1261. * @root: radix tree root
  1262. * @index: index key
  1263. * @item: expected item
  1264. *
  1265. * Remove @item at @index from the radix tree rooted at @root.
  1266. *
  1267. * Return: the deleted entry, or %NULL if it was not present
  1268. * or the entry at the given @index was not @item.
  1269. */
  1270. void *radix_tree_delete_item(struct radix_tree_root *root,
  1271. unsigned long index, void *item)
  1272. {
  1273. struct radix_tree_node *node = NULL;
  1274. void __rcu **slot = NULL;
  1275. void *entry;
  1276. entry = __radix_tree_lookup(root, index, &node, &slot);
  1277. if (!slot)
  1278. return NULL;
  1279. if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
  1280. get_slot_offset(node, slot))))
  1281. return NULL;
  1282. if (item && entry != item)
  1283. return NULL;
  1284. __radix_tree_delete(root, node, slot);
  1285. return entry;
  1286. }
  1287. EXPORT_SYMBOL(radix_tree_delete_item);
  1288. /**
  1289. * radix_tree_delete - delete an entry from a radix tree
  1290. * @root: radix tree root
  1291. * @index: index key
  1292. *
  1293. * Remove the entry at @index from the radix tree rooted at @root.
  1294. *
  1295. * Return: The deleted entry, or %NULL if it was not present.
  1296. */
  1297. void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
  1298. {
  1299. return radix_tree_delete_item(root, index, NULL);
  1300. }
  1301. EXPORT_SYMBOL(radix_tree_delete);
  1302. /**
  1303. * radix_tree_tagged - test whether any items in the tree are tagged
  1304. * @root: radix tree root
  1305. * @tag: tag to test
  1306. */
  1307. int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
  1308. {
  1309. return root_tag_get(root, tag);
  1310. }
  1311. EXPORT_SYMBOL(radix_tree_tagged);
  1312. /**
  1313. * idr_preload - preload for idr_alloc()
  1314. * @gfp_mask: allocation mask to use for preloading
  1315. *
  1316. * Preallocate memory to use for the next call to idr_alloc(). This function
  1317. * returns with preemption disabled. It will be enabled by idr_preload_end().
  1318. */
  1319. void idr_preload(gfp_t gfp_mask)
  1320. {
  1321. if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
  1322. preempt_disable();
  1323. }
  1324. EXPORT_SYMBOL(idr_preload);
  1325. void __rcu **idr_get_free(struct radix_tree_root *root,
  1326. struct radix_tree_iter *iter, gfp_t gfp,
  1327. unsigned long max)
  1328. {
  1329. struct radix_tree_node *node = NULL, *child;
  1330. void __rcu **slot = (void __rcu **)&root->xa_head;
  1331. unsigned long maxindex, start = iter->next_index;
  1332. unsigned int shift, offset = 0;
  1333. grow:
  1334. shift = radix_tree_load_root(root, &child, &maxindex);
  1335. if (!radix_tree_tagged(root, IDR_FREE))
  1336. start = max(start, maxindex + 1);
  1337. if (start > max)
  1338. return ERR_PTR(-ENOSPC);
  1339. if (start > maxindex) {
  1340. int error = radix_tree_extend(root, gfp, start, shift);
  1341. if (error < 0)
  1342. return ERR_PTR(error);
  1343. shift = error;
  1344. child = rcu_dereference_raw(root->xa_head);
  1345. }
  1346. if (start == 0 && shift == 0)
  1347. shift = RADIX_TREE_MAP_SHIFT;
  1348. while (shift) {
  1349. shift -= RADIX_TREE_MAP_SHIFT;
  1350. if (child == NULL) {
  1351. /* Have to add a child node. */
  1352. child = radix_tree_node_alloc(gfp, node, root, shift,
  1353. offset, 0, 0);
  1354. if (!child)
  1355. return ERR_PTR(-ENOMEM);
  1356. all_tag_set(child, IDR_FREE);
  1357. rcu_assign_pointer(*slot, node_to_entry(child));
  1358. if (node)
  1359. node->count++;
  1360. } else if (!radix_tree_is_internal_node(child))
  1361. break;
  1362. node = entry_to_node(child);
  1363. offset = radix_tree_descend(node, &child, start);
  1364. if (!tag_get(node, IDR_FREE, offset)) {
  1365. offset = radix_tree_find_next_bit(node, IDR_FREE,
  1366. offset + 1);
  1367. start = next_index(start, node, offset);
  1368. if (start > max)
  1369. return ERR_PTR(-ENOSPC);
  1370. while (offset == RADIX_TREE_MAP_SIZE) {
  1371. offset = node->offset + 1;
  1372. node = node->parent;
  1373. if (!node)
  1374. goto grow;
  1375. shift = node->shift;
  1376. }
  1377. child = rcu_dereference_raw(node->slots[offset]);
  1378. }
  1379. slot = &node->slots[offset];
  1380. }
  1381. iter->index = start;
  1382. if (node)
  1383. iter->next_index = 1 + min(max, (start | node_maxindex(node)));
  1384. else
  1385. iter->next_index = 1;
  1386. iter->node = node;
  1387. set_iter_tags(iter, node, offset, IDR_FREE);
  1388. return slot;
  1389. }
  1390. /**
  1391. * idr_destroy - release all internal memory from an IDR
  1392. * @idr: idr handle
  1393. *
  1394. * After this function is called, the IDR is empty, and may be reused or
  1395. * the data structure containing it may be freed.
  1396. *
  1397. * A typical clean-up sequence for objects stored in an idr tree will use
  1398. * idr_for_each() to free all objects, if necessary, then idr_destroy() to
  1399. * free the memory used to keep track of those objects.
  1400. */
  1401. void idr_destroy(struct idr *idr)
  1402. {
  1403. struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
  1404. if (radix_tree_is_internal_node(node))
  1405. radix_tree_free_nodes(node);
  1406. idr->idr_rt.xa_head = NULL;
  1407. root_tag_set(&idr->idr_rt, IDR_FREE);
  1408. }
  1409. EXPORT_SYMBOL(idr_destroy);
  1410. static void
  1411. radix_tree_node_ctor(void *arg)
  1412. {
  1413. struct radix_tree_node *node = arg;
  1414. memset(node, 0, sizeof(*node));
  1415. INIT_LIST_HEAD(&node->private_list);
  1416. }
  1417. static int radix_tree_cpu_dead(unsigned int cpu)
  1418. {
  1419. struct radix_tree_preload *rtp;
  1420. struct radix_tree_node *node;
  1421. /* Free per-cpu pool of preloaded nodes */
  1422. rtp = &per_cpu(radix_tree_preloads, cpu);
  1423. while (rtp->nr) {
  1424. node = rtp->nodes;
  1425. rtp->nodes = node->parent;
  1426. kmem_cache_free(radix_tree_node_cachep, node);
  1427. rtp->nr--;
  1428. }
  1429. return 0;
  1430. }
  1431. void __init radix_tree_init(void)
  1432. {
  1433. int ret;
  1434. BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
  1435. BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
  1436. BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
  1437. radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
  1438. sizeof(struct radix_tree_node), 0,
  1439. SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
  1440. radix_tree_node_ctor);
  1441. ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
  1442. NULL, radix_tree_cpu_dead);
  1443. WARN_ON(ret < 0);
  1444. }