inet_fragment.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * inet fragments management
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * Authors: Pavel Emelyanov <xemul@openvz.org>
  10. * Started as consolidation of ipv4/ip_fragment.c,
  11. * ipv6/reassembly. and ipv6 nf conntrack reassembly
  12. */
  13. #include <linux/list.h>
  14. #include <linux/spinlock.h>
  15. #include <linux/module.h>
  16. #include <linux/timer.h>
  17. #include <linux/mm.h>
  18. #include <linux/random.h>
  19. #include <linux/skbuff.h>
  20. #include <linux/rtnetlink.h>
  21. #include <linux/slab.h>
  22. #include <linux/rhashtable.h>
  23. #include <net/sock.h>
  24. #include <net/inet_frag.h>
  25. #include <net/inet_ecn.h>
  26. /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  27. * Value : 0xff if frame should be dropped.
  28. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
  29. */
  30. const u8 ip_frag_ecn_table[16] = {
  31. /* at least one fragment had CE, and others ECT_0 or ECT_1 */
  32. [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
  33. [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
  34. [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
  35. /* invalid combinations : drop frame */
  36. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
  37. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
  38. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
  39. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
  40. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
  41. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
  42. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
  43. };
  44. EXPORT_SYMBOL(ip_frag_ecn_table);
  45. int inet_frags_init(struct inet_frags *f)
  46. {
  47. f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
  48. NULL);
  49. if (!f->frags_cachep)
  50. return -ENOMEM;
  51. return 0;
  52. }
  53. EXPORT_SYMBOL(inet_frags_init);
  54. void inet_frags_fini(struct inet_frags *f)
  55. {
  56. /* We must wait that all inet_frag_destroy_rcu() have completed. */
  57. rcu_barrier();
  58. kmem_cache_destroy(f->frags_cachep);
  59. f->frags_cachep = NULL;
  60. }
  61. EXPORT_SYMBOL(inet_frags_fini);
  62. static void inet_frags_free_cb(void *ptr, void *arg)
  63. {
  64. struct inet_frag_queue *fq = ptr;
  65. /* If we can not cancel the timer, it means this frag_queue
  66. * is already disappearing, we have nothing to do.
  67. * Otherwise, we own a refcount until the end of this function.
  68. */
  69. if (!del_timer(&fq->timer))
  70. return;
  71. spin_lock_bh(&fq->lock);
  72. if (!(fq->flags & INET_FRAG_COMPLETE)) {
  73. fq->flags |= INET_FRAG_COMPLETE;
  74. refcount_dec(&fq->refcnt);
  75. }
  76. spin_unlock_bh(&fq->lock);
  77. inet_frag_put(fq);
  78. }
  79. void inet_frags_exit_net(struct netns_frags *nf)
  80. {
  81. nf->high_thresh = 0; /* prevent creation of new frags */
  82. rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
  83. }
  84. EXPORT_SYMBOL(inet_frags_exit_net);
  85. void inet_frag_kill(struct inet_frag_queue *fq)
  86. {
  87. if (del_timer(&fq->timer))
  88. refcount_dec(&fq->refcnt);
  89. if (!(fq->flags & INET_FRAG_COMPLETE)) {
  90. struct netns_frags *nf = fq->net;
  91. fq->flags |= INET_FRAG_COMPLETE;
  92. rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
  93. refcount_dec(&fq->refcnt);
  94. }
  95. }
  96. EXPORT_SYMBOL(inet_frag_kill);
  97. static void inet_frag_destroy_rcu(struct rcu_head *head)
  98. {
  99. struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
  100. rcu);
  101. struct inet_frags *f = q->net->f;
  102. if (f->destructor)
  103. f->destructor(q);
  104. kmem_cache_free(f->frags_cachep, q);
  105. }
  106. void inet_frag_destroy(struct inet_frag_queue *q)
  107. {
  108. struct sk_buff *fp;
  109. struct netns_frags *nf;
  110. unsigned int sum, sum_truesize = 0;
  111. struct inet_frags *f;
  112. WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
  113. WARN_ON(del_timer(&q->timer) != 0);
  114. /* Release all fragment data. */
  115. fp = q->fragments;
  116. nf = q->net;
  117. f = nf->f;
  118. if (fp) {
  119. do {
  120. struct sk_buff *xp = fp->next;
  121. sum_truesize += fp->truesize;
  122. kfree_skb(fp);
  123. fp = xp;
  124. } while (fp);
  125. } else {
  126. sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
  127. }
  128. sum = sum_truesize + f->qsize;
  129. call_rcu(&q->rcu, inet_frag_destroy_rcu);
  130. sub_frag_mem_limit(nf, sum);
  131. }
  132. EXPORT_SYMBOL(inet_frag_destroy);
  133. static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
  134. struct inet_frags *f,
  135. void *arg)
  136. {
  137. struct inet_frag_queue *q;
  138. q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
  139. if (!q)
  140. return NULL;
  141. q->net = nf;
  142. f->constructor(q, arg);
  143. add_frag_mem_limit(nf, f->qsize);
  144. timer_setup(&q->timer, f->frag_expire, 0);
  145. spin_lock_init(&q->lock);
  146. refcount_set(&q->refcnt, 3);
  147. return q;
  148. }
  149. static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
  150. void *arg,
  151. struct inet_frag_queue **prev)
  152. {
  153. struct inet_frags *f = nf->f;
  154. struct inet_frag_queue *q;
  155. q = inet_frag_alloc(nf, f, arg);
  156. if (!q) {
  157. *prev = ERR_PTR(-ENOMEM);
  158. return NULL;
  159. }
  160. mod_timer(&q->timer, jiffies + nf->timeout);
  161. *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
  162. &q->node, f->rhash_params);
  163. if (*prev) {
  164. q->flags |= INET_FRAG_COMPLETE;
  165. inet_frag_kill(q);
  166. inet_frag_destroy(q);
  167. return NULL;
  168. }
  169. return q;
  170. }
  171. /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
  172. struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
  173. {
  174. struct inet_frag_queue *fq = NULL, *prev;
  175. if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
  176. return NULL;
  177. rcu_read_lock();
  178. prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
  179. if (!prev)
  180. fq = inet_frag_create(nf, key, &prev);
  181. if (prev && !IS_ERR(prev)) {
  182. fq = prev;
  183. if (!refcount_inc_not_zero(&fq->refcnt))
  184. fq = NULL;
  185. }
  186. rcu_read_unlock();
  187. return fq;
  188. }
  189. EXPORT_SYMBOL(inet_frag_find);