cls_tcindex.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. /*
  2. * net/sched/cls_tcindex.c Packet classifier for skb->tc_index
  3. *
  4. * Written 1998,1999 by Werner Almesberger, EPFL ICA
  5. */
  6. #include <linux/module.h>
  7. #include <linux/types.h>
  8. #include <linux/kernel.h>
  9. #include <linux/skbuff.h>
  10. #include <linux/errno.h>
  11. #include <linux/slab.h>
  12. #include <net/act_api.h>
  13. #include <net/netlink.h>
  14. #include <net/pkt_cls.h>
  15. /*
  16. * Passing parameters to the root seems to be done more awkwardly than really
  17. * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
  18. * verified. FIXME.
  19. */
  20. #define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
  21. #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
  22. struct tcindex_filter_result {
  23. struct tcf_exts exts;
  24. struct tcf_result res;
  25. struct rcu_head rcu;
  26. };
  27. struct tcindex_filter {
  28. u16 key;
  29. struct tcindex_filter_result result;
  30. struct tcindex_filter __rcu *next;
  31. struct rcu_head rcu;
  32. };
  33. struct tcindex_data {
  34. struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
  35. struct tcindex_filter __rcu **h; /* imperfect hash; */
  36. struct tcf_proto *tp;
  37. u16 mask; /* AND key with mask */
  38. u32 shift; /* shift ANDed key to the right */
  39. u32 hash; /* hash table size; 0 if undefined */
  40. u32 alloc_hash; /* allocated size */
  41. u32 fall_through; /* 0: only classify if explicit match */
  42. struct rcu_head rcu;
  43. };
  44. static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
  45. {
  46. return tcf_exts_has_actions(&r->exts) || r->res.classid;
  47. }
  48. static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
  49. u16 key)
  50. {
  51. if (p->perfect) {
  52. struct tcindex_filter_result *f = p->perfect + key;
  53. return tcindex_filter_is_set(f) ? f : NULL;
  54. } else if (p->h) {
  55. struct tcindex_filter __rcu **fp;
  56. struct tcindex_filter *f;
  57. fp = &p->h[key % p->hash];
  58. for (f = rcu_dereference_bh_rtnl(*fp);
  59. f;
  60. fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
  61. if (f->key == key)
  62. return &f->result;
  63. }
  64. return NULL;
  65. }
  66. static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  67. struct tcf_result *res)
  68. {
  69. struct tcindex_data *p = rcu_dereference_bh(tp->root);
  70. struct tcindex_filter_result *f;
  71. int key = (skb->tc_index & p->mask) >> p->shift;
  72. pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
  73. skb, tp, res, p);
  74. f = tcindex_lookup(p, key);
  75. if (!f) {
  76. if (!p->fall_through)
  77. return -1;
  78. res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
  79. res->class = 0;
  80. pr_debug("alg 0x%x\n", res->classid);
  81. return 0;
  82. }
  83. *res = f->res;
  84. pr_debug("map 0x%x\n", res->classid);
  85. return tcf_exts_exec(skb, &f->exts, res);
  86. }
  87. static void *tcindex_get(struct tcf_proto *tp, u32 handle)
  88. {
  89. struct tcindex_data *p = rtnl_dereference(tp->root);
  90. struct tcindex_filter_result *r;
  91. pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
  92. if (p->perfect && handle >= p->alloc_hash)
  93. return NULL;
  94. r = tcindex_lookup(p, handle);
  95. return r && tcindex_filter_is_set(r) ? r : NULL;
  96. }
  97. static int tcindex_init(struct tcf_proto *tp)
  98. {
  99. struct tcindex_data *p;
  100. pr_debug("tcindex_init(tp %p)\n", tp);
  101. p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
  102. if (!p)
  103. return -ENOMEM;
  104. p->mask = 0xffff;
  105. p->hash = DEFAULT_HASH_SIZE;
  106. p->fall_through = 1;
  107. rcu_assign_pointer(tp->root, p);
  108. return 0;
  109. }
  110. static void tcindex_destroy_rexts(struct rcu_head *head)
  111. {
  112. struct tcindex_filter_result *r;
  113. r = container_of(head, struct tcindex_filter_result, rcu);
  114. tcf_exts_destroy(&r->exts);
  115. }
  116. static void tcindex_destroy_fexts(struct rcu_head *head)
  117. {
  118. struct tcindex_filter *f = container_of(head, struct tcindex_filter,
  119. rcu);
  120. tcf_exts_destroy(&f->result.exts);
  121. kfree(f);
  122. }
  123. static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
  124. {
  125. struct tcindex_data *p = rtnl_dereference(tp->root);
  126. struct tcindex_filter_result *r = arg;
  127. struct tcindex_filter __rcu **walk;
  128. struct tcindex_filter *f = NULL;
  129. pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
  130. if (p->perfect) {
  131. if (!r->res.class)
  132. return -ENOENT;
  133. } else {
  134. int i;
  135. for (i = 0; i < p->hash; i++) {
  136. walk = p->h + i;
  137. for (f = rtnl_dereference(*walk); f;
  138. walk = &f->next, f = rtnl_dereference(*walk)) {
  139. if (&f->result == r)
  140. goto found;
  141. }
  142. }
  143. return -ENOENT;
  144. found:
  145. rcu_assign_pointer(*walk, rtnl_dereference(f->next));
  146. }
  147. tcf_unbind_filter(tp, &r->res);
  148. /* all classifiers are required to call tcf_exts_destroy() after rcu
  149. * grace period, since converted-to-rcu actions are relying on that
  150. * in cleanup() callback
  151. */
  152. if (f)
  153. call_rcu(&f->rcu, tcindex_destroy_fexts);
  154. else
  155. call_rcu(&r->rcu, tcindex_destroy_rexts);
  156. *last = false;
  157. return 0;
  158. }
  159. static int tcindex_destroy_element(struct tcf_proto *tp,
  160. void *arg, struct tcf_walker *walker)
  161. {
  162. bool last;
  163. return tcindex_delete(tp, arg, &last);
  164. }
  165. static void __tcindex_destroy(struct rcu_head *head)
  166. {
  167. struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
  168. kfree(p->perfect);
  169. kfree(p->h);
  170. kfree(p);
  171. }
  172. static inline int
  173. valid_perfect_hash(struct tcindex_data *p)
  174. {
  175. return p->hash > (p->mask >> p->shift);
  176. }
  177. static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
  178. [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
  179. [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
  180. [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
  181. [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
  182. [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
  183. };
  184. static int tcindex_filter_result_init(struct tcindex_filter_result *r)
  185. {
  186. memset(r, 0, sizeof(*r));
  187. return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
  188. }
  189. static void __tcindex_partial_destroy(struct rcu_head *head)
  190. {
  191. struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
  192. kfree(p->perfect);
  193. kfree(p);
  194. }
  195. static void tcindex_free_perfect_hash(struct tcindex_data *cp)
  196. {
  197. int i;
  198. for (i = 0; i < cp->hash; i++)
  199. tcf_exts_destroy(&cp->perfect[i].exts);
  200. kfree(cp->perfect);
  201. }
  202. static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
  203. {
  204. int i, err = 0;
  205. cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
  206. GFP_KERNEL);
  207. if (!cp->perfect)
  208. return -ENOMEM;
  209. for (i = 0; i < cp->hash; i++) {
  210. err = tcf_exts_init(&cp->perfect[i].exts,
  211. TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
  212. if (err < 0)
  213. goto errout;
  214. }
  215. return 0;
  216. errout:
  217. tcindex_free_perfect_hash(cp);
  218. return err;
  219. }
  220. static int
  221. tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
  222. u32 handle, struct tcindex_data *p,
  223. struct tcindex_filter_result *r, struct nlattr **tb,
  224. struct nlattr *est, bool ovr)
  225. {
  226. struct tcindex_filter_result new_filter_result, *old_r = r;
  227. struct tcindex_filter_result cr;
  228. struct tcindex_data *cp = NULL, *oldp;
  229. struct tcindex_filter *f = NULL; /* make gcc behave */
  230. int err, balloc = 0;
  231. struct tcf_exts e;
  232. err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
  233. if (err < 0)
  234. return err;
  235. err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
  236. if (err < 0)
  237. goto errout;
  238. err = -ENOMEM;
  239. /* tcindex_data attributes must look atomic to classifier/lookup so
  240. * allocate new tcindex data and RCU assign it onto root. Keeping
  241. * perfect hash and hash pointers from old data.
  242. */
  243. cp = kzalloc(sizeof(*cp), GFP_KERNEL);
  244. if (!cp)
  245. goto errout;
  246. cp->mask = p->mask;
  247. cp->shift = p->shift;
  248. cp->hash = p->hash;
  249. cp->alloc_hash = p->alloc_hash;
  250. cp->fall_through = p->fall_through;
  251. cp->tp = tp;
  252. if (p->perfect) {
  253. int i;
  254. if (tcindex_alloc_perfect_hash(cp) < 0)
  255. goto errout;
  256. for (i = 0; i < cp->hash; i++)
  257. cp->perfect[i].res = p->perfect[i].res;
  258. balloc = 1;
  259. }
  260. cp->h = p->h;
  261. err = tcindex_filter_result_init(&new_filter_result);
  262. if (err < 0)
  263. goto errout1;
  264. err = tcindex_filter_result_init(&cr);
  265. if (err < 0)
  266. goto errout1;
  267. if (old_r)
  268. cr.res = r->res;
  269. if (tb[TCA_TCINDEX_HASH])
  270. cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
  271. if (tb[TCA_TCINDEX_MASK])
  272. cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
  273. if (tb[TCA_TCINDEX_SHIFT])
  274. cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
  275. err = -EBUSY;
  276. /* Hash already allocated, make sure that we still meet the
  277. * requirements for the allocated hash.
  278. */
  279. if (cp->perfect) {
  280. if (!valid_perfect_hash(cp) ||
  281. cp->hash > cp->alloc_hash)
  282. goto errout_alloc;
  283. } else if (cp->h && cp->hash != cp->alloc_hash) {
  284. goto errout_alloc;
  285. }
  286. err = -EINVAL;
  287. if (tb[TCA_TCINDEX_FALL_THROUGH])
  288. cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
  289. if (!cp->hash) {
  290. /* Hash not specified, use perfect hash if the upper limit
  291. * of the hashing index is below the threshold.
  292. */
  293. if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
  294. cp->hash = (cp->mask >> cp->shift) + 1;
  295. else
  296. cp->hash = DEFAULT_HASH_SIZE;
  297. }
  298. if (!cp->perfect && !cp->h)
  299. cp->alloc_hash = cp->hash;
  300. /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
  301. * but then, we'd fail handles that may become valid after some future
  302. * mask change. While this is extremely unlikely to ever matter,
  303. * the check below is safer (and also more backwards-compatible).
  304. */
  305. if (cp->perfect || valid_perfect_hash(cp))
  306. if (handle >= cp->alloc_hash)
  307. goto errout_alloc;
  308. err = -ENOMEM;
  309. if (!cp->perfect && !cp->h) {
  310. if (valid_perfect_hash(cp)) {
  311. if (tcindex_alloc_perfect_hash(cp) < 0)
  312. goto errout_alloc;
  313. balloc = 1;
  314. } else {
  315. struct tcindex_filter __rcu **hash;
  316. hash = kcalloc(cp->hash,
  317. sizeof(struct tcindex_filter *),
  318. GFP_KERNEL);
  319. if (!hash)
  320. goto errout_alloc;
  321. cp->h = hash;
  322. balloc = 2;
  323. }
  324. }
  325. if (cp->perfect)
  326. r = cp->perfect + handle;
  327. else
  328. r = tcindex_lookup(cp, handle) ? : &new_filter_result;
  329. if (r == &new_filter_result) {
  330. f = kzalloc(sizeof(*f), GFP_KERNEL);
  331. if (!f)
  332. goto errout_alloc;
  333. f->key = handle;
  334. f->next = NULL;
  335. err = tcindex_filter_result_init(&f->result);
  336. if (err < 0) {
  337. kfree(f);
  338. goto errout_alloc;
  339. }
  340. }
  341. if (tb[TCA_TCINDEX_CLASSID]) {
  342. cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
  343. tcf_bind_filter(tp, &cr.res, base);
  344. }
  345. if (old_r)
  346. tcf_exts_change(&r->exts, &e);
  347. else
  348. tcf_exts_change(&cr.exts, &e);
  349. if (old_r && old_r != r) {
  350. err = tcindex_filter_result_init(old_r);
  351. if (err < 0) {
  352. kfree(f);
  353. goto errout_alloc;
  354. }
  355. }
  356. oldp = p;
  357. r->res = cr.res;
  358. rcu_assign_pointer(tp->root, cp);
  359. if (r == &new_filter_result) {
  360. struct tcindex_filter *nfp;
  361. struct tcindex_filter __rcu **fp;
  362. tcf_exts_change(&f->result.exts, &r->exts);
  363. fp = cp->h + (handle % cp->hash);
  364. for (nfp = rtnl_dereference(*fp);
  365. nfp;
  366. fp = &nfp->next, nfp = rtnl_dereference(*fp))
  367. ; /* nothing */
  368. rcu_assign_pointer(*fp, f);
  369. }
  370. if (oldp)
  371. call_rcu(&oldp->rcu, __tcindex_partial_destroy);
  372. return 0;
  373. errout_alloc:
  374. if (balloc == 1)
  375. tcindex_free_perfect_hash(cp);
  376. else if (balloc == 2)
  377. kfree(cp->h);
  378. errout1:
  379. tcf_exts_destroy(&cr.exts);
  380. tcf_exts_destroy(&new_filter_result.exts);
  381. errout:
  382. kfree(cp);
  383. tcf_exts_destroy(&e);
  384. return err;
  385. }
  386. static int
  387. tcindex_change(struct net *net, struct sk_buff *in_skb,
  388. struct tcf_proto *tp, unsigned long base, u32 handle,
  389. struct nlattr **tca, void **arg, bool ovr)
  390. {
  391. struct nlattr *opt = tca[TCA_OPTIONS];
  392. struct nlattr *tb[TCA_TCINDEX_MAX + 1];
  393. struct tcindex_data *p = rtnl_dereference(tp->root);
  394. struct tcindex_filter_result *r = *arg;
  395. int err;
  396. pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
  397. "p %p,r %p,*arg %p\n",
  398. tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
  399. if (!opt)
  400. return 0;
  401. err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL);
  402. if (err < 0)
  403. return err;
  404. return tcindex_set_parms(net, tp, base, handle, p, r, tb,
  405. tca[TCA_RATE], ovr);
  406. }
  407. static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
  408. {
  409. struct tcindex_data *p = rtnl_dereference(tp->root);
  410. struct tcindex_filter *f, *next;
  411. int i;
  412. pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
  413. if (p->perfect) {
  414. for (i = 0; i < p->hash; i++) {
  415. if (!p->perfect[i].res.class)
  416. continue;
  417. if (walker->count >= walker->skip) {
  418. if (walker->fn(tp, p->perfect + i, walker) < 0) {
  419. walker->stop = 1;
  420. return;
  421. }
  422. }
  423. walker->count++;
  424. }
  425. }
  426. if (!p->h)
  427. return;
  428. for (i = 0; i < p->hash; i++) {
  429. for (f = rtnl_dereference(p->h[i]); f; f = next) {
  430. next = rtnl_dereference(f->next);
  431. if (walker->count >= walker->skip) {
  432. if (walker->fn(tp, &f->result, walker) < 0) {
  433. walker->stop = 1;
  434. return;
  435. }
  436. }
  437. walker->count++;
  438. }
  439. }
  440. }
  441. static void tcindex_destroy(struct tcf_proto *tp)
  442. {
  443. struct tcindex_data *p = rtnl_dereference(tp->root);
  444. struct tcf_walker walker;
  445. pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
  446. walker.count = 0;
  447. walker.skip = 0;
  448. walker.fn = tcindex_destroy_element;
  449. tcindex_walk(tp, &walker);
  450. call_rcu(&p->rcu, __tcindex_destroy);
  451. }
  452. static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
  453. struct sk_buff *skb, struct tcmsg *t)
  454. {
  455. struct tcindex_data *p = rtnl_dereference(tp->root);
  456. struct tcindex_filter_result *r = fh;
  457. struct nlattr *nest;
  458. pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
  459. tp, fh, skb, t, p, r);
  460. pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
  461. nest = nla_nest_start(skb, TCA_OPTIONS);
  462. if (nest == NULL)
  463. goto nla_put_failure;
  464. if (!fh) {
  465. t->tcm_handle = ~0; /* whatever ... */
  466. if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
  467. nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
  468. nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
  469. nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
  470. goto nla_put_failure;
  471. nla_nest_end(skb, nest);
  472. } else {
  473. if (p->perfect) {
  474. t->tcm_handle = r - p->perfect;
  475. } else {
  476. struct tcindex_filter *f;
  477. struct tcindex_filter __rcu **fp;
  478. int i;
  479. t->tcm_handle = 0;
  480. for (i = 0; !t->tcm_handle && i < p->hash; i++) {
  481. fp = &p->h[i];
  482. for (f = rtnl_dereference(*fp);
  483. !t->tcm_handle && f;
  484. fp = &f->next, f = rtnl_dereference(*fp)) {
  485. if (&f->result == r)
  486. t->tcm_handle = f->key;
  487. }
  488. }
  489. }
  490. pr_debug("handle = %d\n", t->tcm_handle);
  491. if (r->res.class &&
  492. nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
  493. goto nla_put_failure;
  494. if (tcf_exts_dump(skb, &r->exts) < 0)
  495. goto nla_put_failure;
  496. nla_nest_end(skb, nest);
  497. if (tcf_exts_dump_stats(skb, &r->exts) < 0)
  498. goto nla_put_failure;
  499. }
  500. return skb->len;
  501. nla_put_failure:
  502. nla_nest_cancel(skb, nest);
  503. return -1;
  504. }
  505. static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
  506. .kind = "tcindex",
  507. .classify = tcindex_classify,
  508. .init = tcindex_init,
  509. .destroy = tcindex_destroy,
  510. .get = tcindex_get,
  511. .change = tcindex_change,
  512. .delete = tcindex_delete,
  513. .walk = tcindex_walk,
  514. .dump = tcindex_dump,
  515. .owner = THIS_MODULE,
  516. };
  517. static int __init init_tcindex(void)
  518. {
  519. return register_tcf_proto_ops(&cls_tcindex_ops);
  520. }
  521. static void __exit exit_tcindex(void)
  522. {
  523. unregister_tcf_proto_ops(&cls_tcindex_ops);
  524. }
  525. module_init(init_tcindex)
  526. module_exit(exit_tcindex)
  527. MODULE_LICENSE("GPL");