cls_api.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273
  1. /*
  2. * net/sched/cls_api.c Packet classifier API.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10. *
  11. * Changes:
  12. *
  13. * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  14. *
  15. */
  16. #include <linux/module.h>
  17. #include <linux/types.h>
  18. #include <linux/kernel.h>
  19. #include <linux/string.h>
  20. #include <linux/errno.h>
  21. #include <linux/err.h>
  22. #include <linux/skbuff.h>
  23. #include <linux/init.h>
  24. #include <linux/kmod.h>
  25. #include <linux/slab.h>
  26. #include <net/net_namespace.h>
  27. #include <net/sock.h>
  28. #include <net/netlink.h>
  29. #include <net/pkt_sched.h>
  30. #include <net/pkt_cls.h>
  31. /* The list of all installed classifier types */
  32. static LIST_HEAD(tcf_proto_base);
  33. /* Protects list of registered TC modules. It is pure SMP lock. */
  34. static DEFINE_RWLOCK(cls_mod_lock);
  35. /* Find classifier type by string name */
  36. static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
  37. {
  38. const struct tcf_proto_ops *t, *res = NULL;
  39. if (kind) {
  40. read_lock(&cls_mod_lock);
  41. list_for_each_entry(t, &tcf_proto_base, head) {
  42. if (strcmp(kind, t->kind) == 0) {
  43. if (try_module_get(t->owner))
  44. res = t;
  45. break;
  46. }
  47. }
  48. read_unlock(&cls_mod_lock);
  49. }
  50. return res;
  51. }
  52. /* Register(unregister) new classifier type */
  53. int register_tcf_proto_ops(struct tcf_proto_ops *ops)
  54. {
  55. struct tcf_proto_ops *t;
  56. int rc = -EEXIST;
  57. write_lock(&cls_mod_lock);
  58. list_for_each_entry(t, &tcf_proto_base, head)
  59. if (!strcmp(ops->kind, t->kind))
  60. goto out;
  61. list_add_tail(&ops->head, &tcf_proto_base);
  62. rc = 0;
  63. out:
  64. write_unlock(&cls_mod_lock);
  65. return rc;
  66. }
  67. EXPORT_SYMBOL(register_tcf_proto_ops);
  68. static struct workqueue_struct *tc_filter_wq;
  69. int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
  70. {
  71. struct tcf_proto_ops *t;
  72. int rc = -ENOENT;
  73. /* Wait for outstanding call_rcu()s, if any, from a
  74. * tcf_proto_ops's destroy() handler.
  75. */
  76. rcu_barrier();
  77. flush_workqueue(tc_filter_wq);
  78. write_lock(&cls_mod_lock);
  79. list_for_each_entry(t, &tcf_proto_base, head) {
  80. if (t == ops) {
  81. list_del(&t->head);
  82. rc = 0;
  83. break;
  84. }
  85. }
  86. write_unlock(&cls_mod_lock);
  87. return rc;
  88. }
  89. EXPORT_SYMBOL(unregister_tcf_proto_ops);
  90. bool tcf_queue_work(struct work_struct *work)
  91. {
  92. return queue_work(tc_filter_wq, work);
  93. }
  94. EXPORT_SYMBOL(tcf_queue_work);
  95. /* Select new prio value from the range, managed by kernel. */
  96. static inline u32 tcf_auto_prio(struct tcf_proto *tp)
  97. {
  98. u32 first = TC_H_MAKE(0xC0000000U, 0U);
  99. if (tp)
  100. first = tp->prio - 1;
  101. return TC_H_MAJ(first);
  102. }
  103. static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
  104. u32 prio, u32 parent, struct Qdisc *q,
  105. struct tcf_chain *chain)
  106. {
  107. struct tcf_proto *tp;
  108. int err;
  109. tp = kzalloc(sizeof(*tp), GFP_KERNEL);
  110. if (!tp)
  111. return ERR_PTR(-ENOBUFS);
  112. err = -ENOENT;
  113. tp->ops = tcf_proto_lookup_ops(kind);
  114. if (!tp->ops) {
  115. #ifdef CONFIG_MODULES
  116. rtnl_unlock();
  117. request_module("cls_%s", kind);
  118. rtnl_lock();
  119. tp->ops = tcf_proto_lookup_ops(kind);
  120. /* We dropped the RTNL semaphore in order to perform
  121. * the module load. So, even if we succeeded in loading
  122. * the module we have to replay the request. We indicate
  123. * this using -EAGAIN.
  124. */
  125. if (tp->ops) {
  126. module_put(tp->ops->owner);
  127. err = -EAGAIN;
  128. } else {
  129. err = -ENOENT;
  130. }
  131. goto errout;
  132. #endif
  133. }
  134. tp->classify = tp->ops->classify;
  135. tp->protocol = protocol;
  136. tp->prio = prio;
  137. tp->classid = parent;
  138. tp->q = q;
  139. tp->chain = chain;
  140. err = tp->ops->init(tp);
  141. if (err) {
  142. module_put(tp->ops->owner);
  143. goto errout;
  144. }
  145. return tp;
  146. errout:
  147. kfree(tp);
  148. return ERR_PTR(err);
  149. }
  150. static void tcf_proto_destroy(struct tcf_proto *tp)
  151. {
  152. tp->ops->destroy(tp);
  153. module_put(tp->ops->owner);
  154. kfree_rcu(tp, rcu);
  155. }
  156. static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
  157. u32 chain_index)
  158. {
  159. struct tcf_chain *chain;
  160. chain = kzalloc(sizeof(*chain), GFP_KERNEL);
  161. if (!chain)
  162. return NULL;
  163. list_add_tail(&chain->list, &block->chain_list);
  164. chain->block = block;
  165. chain->index = chain_index;
  166. chain->refcnt = 1;
  167. return chain;
  168. }
  169. static void tcf_chain_head_change(struct tcf_chain *chain,
  170. struct tcf_proto *tp_head)
  171. {
  172. if (chain->chain_head_change)
  173. chain->chain_head_change(tp_head,
  174. chain->chain_head_change_priv);
  175. }
  176. static void tcf_chain_flush(struct tcf_chain *chain)
  177. {
  178. struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
  179. tcf_chain_head_change(chain, NULL);
  180. while (tp) {
  181. RCU_INIT_POINTER(chain->filter_chain, tp->next);
  182. tcf_proto_destroy(tp);
  183. tp = rtnl_dereference(chain->filter_chain);
  184. tcf_chain_put(chain);
  185. }
  186. }
  187. static void tcf_chain_destroy(struct tcf_chain *chain)
  188. {
  189. list_del(&chain->list);
  190. kfree(chain);
  191. }
  192. static void tcf_chain_hold(struct tcf_chain *chain)
  193. {
  194. ++chain->refcnt;
  195. }
  196. struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
  197. bool create)
  198. {
  199. struct tcf_chain *chain;
  200. list_for_each_entry(chain, &block->chain_list, list) {
  201. if (chain->index == chain_index) {
  202. tcf_chain_hold(chain);
  203. return chain;
  204. }
  205. }
  206. return create ? tcf_chain_create(block, chain_index) : NULL;
  207. }
  208. EXPORT_SYMBOL(tcf_chain_get);
  209. void tcf_chain_put(struct tcf_chain *chain)
  210. {
  211. if (--chain->refcnt == 0)
  212. tcf_chain_destroy(chain);
  213. }
  214. EXPORT_SYMBOL(tcf_chain_put);
  215. static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
  216. struct tcf_block_ext_info *ei,
  217. enum tc_block_command command)
  218. {
  219. struct net_device *dev = q->dev_queue->dev;
  220. struct tc_block_offload bo = {};
  221. if (!dev->netdev_ops->ndo_setup_tc)
  222. return;
  223. bo.command = command;
  224. bo.binder_type = ei->binder_type;
  225. bo.block = block;
  226. dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
  227. }
  228. static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
  229. struct tcf_block_ext_info *ei)
  230. {
  231. tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
  232. }
  233. static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
  234. struct tcf_block_ext_info *ei)
  235. {
  236. tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
  237. }
  238. int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
  239. struct tcf_block_ext_info *ei)
  240. {
  241. struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
  242. struct tcf_chain *chain;
  243. int err;
  244. if (!block)
  245. return -ENOMEM;
  246. INIT_LIST_HEAD(&block->chain_list);
  247. INIT_LIST_HEAD(&block->cb_list);
  248. /* Create chain 0 by default, it has to be always present. */
  249. chain = tcf_chain_create(block, 0);
  250. if (!chain) {
  251. err = -ENOMEM;
  252. goto err_chain_create;
  253. }
  254. WARN_ON(!ei->chain_head_change);
  255. chain->chain_head_change = ei->chain_head_change;
  256. chain->chain_head_change_priv = ei->chain_head_change_priv;
  257. block->net = qdisc_net(q);
  258. block->q = q;
  259. tcf_block_offload_bind(block, q, ei);
  260. *p_block = block;
  261. return 0;
  262. err_chain_create:
  263. kfree(block);
  264. return err;
  265. }
  266. EXPORT_SYMBOL(tcf_block_get_ext);
  267. static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
  268. {
  269. struct tcf_proto __rcu **p_filter_chain = priv;
  270. rcu_assign_pointer(*p_filter_chain, tp_head);
  271. }
  272. int tcf_block_get(struct tcf_block **p_block,
  273. struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
  274. {
  275. struct tcf_block_ext_info ei = {
  276. .chain_head_change = tcf_chain_head_change_dflt,
  277. .chain_head_change_priv = p_filter_chain,
  278. };
  279. WARN_ON(!p_filter_chain);
  280. return tcf_block_get_ext(p_block, q, &ei);
  281. }
  282. EXPORT_SYMBOL(tcf_block_get);
  283. static void tcf_block_put_final(struct work_struct *work)
  284. {
  285. struct tcf_block *block = container_of(work, struct tcf_block, work);
  286. struct tcf_chain *chain, *tmp;
  287. rtnl_lock();
  288. /* At this point, all the chains should have refcnt == 1. */
  289. list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
  290. tcf_chain_put(chain);
  291. rtnl_unlock();
  292. kfree(block);
  293. }
  294. /* XXX: Standalone actions are not allowed to jump to any chain, and bound
  295. * actions should be all removed after flushing.
  296. */
  297. void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
  298. struct tcf_block_ext_info *ei)
  299. {
  300. struct tcf_chain *chain;
  301. if (!block)
  302. return;
  303. /* Hold a refcnt for all chains, except 0, so that they don't disappear
  304. * while we are iterating.
  305. */
  306. list_for_each_entry(chain, &block->chain_list, list)
  307. if (chain->index)
  308. tcf_chain_hold(chain);
  309. list_for_each_entry(chain, &block->chain_list, list)
  310. tcf_chain_flush(chain);
  311. tcf_block_offload_unbind(block, q, ei);
  312. INIT_WORK(&block->work, tcf_block_put_final);
  313. /* Wait for existing RCU callbacks to cool down, make sure their works
  314. * have been queued before this. We can not flush pending works here
  315. * because we are holding the RTNL lock.
  316. */
  317. rcu_barrier();
  318. tcf_queue_work(&block->work);
  319. }
  320. EXPORT_SYMBOL(tcf_block_put_ext);
  321. void tcf_block_put(struct tcf_block *block)
  322. {
  323. struct tcf_block_ext_info ei = {0, };
  324. tcf_block_put_ext(block, block->q, &ei);
  325. }
  326. EXPORT_SYMBOL(tcf_block_put);
  327. struct tcf_block_cb {
  328. struct list_head list;
  329. tc_setup_cb_t *cb;
  330. void *cb_ident;
  331. void *cb_priv;
  332. unsigned int refcnt;
  333. };
  334. void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
  335. {
  336. return block_cb->cb_priv;
  337. }
  338. EXPORT_SYMBOL(tcf_block_cb_priv);
  339. struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
  340. tc_setup_cb_t *cb, void *cb_ident)
  341. { struct tcf_block_cb *block_cb;
  342. list_for_each_entry(block_cb, &block->cb_list, list)
  343. if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
  344. return block_cb;
  345. return NULL;
  346. }
  347. EXPORT_SYMBOL(tcf_block_cb_lookup);
  348. void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
  349. {
  350. block_cb->refcnt++;
  351. }
  352. EXPORT_SYMBOL(tcf_block_cb_incref);
  353. unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
  354. {
  355. return --block_cb->refcnt;
  356. }
  357. EXPORT_SYMBOL(tcf_block_cb_decref);
  358. struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
  359. tc_setup_cb_t *cb, void *cb_ident,
  360. void *cb_priv)
  361. {
  362. struct tcf_block_cb *block_cb;
  363. block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
  364. if (!block_cb)
  365. return NULL;
  366. block_cb->cb = cb;
  367. block_cb->cb_ident = cb_ident;
  368. block_cb->cb_priv = cb_priv;
  369. list_add(&block_cb->list, &block->cb_list);
  370. return block_cb;
  371. }
  372. EXPORT_SYMBOL(__tcf_block_cb_register);
  373. int tcf_block_cb_register(struct tcf_block *block,
  374. tc_setup_cb_t *cb, void *cb_ident,
  375. void *cb_priv)
  376. {
  377. struct tcf_block_cb *block_cb;
  378. block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
  379. return block_cb ? 0 : -ENOMEM;
  380. }
  381. EXPORT_SYMBOL(tcf_block_cb_register);
  382. void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
  383. {
  384. list_del(&block_cb->list);
  385. kfree(block_cb);
  386. }
  387. EXPORT_SYMBOL(__tcf_block_cb_unregister);
  388. void tcf_block_cb_unregister(struct tcf_block *block,
  389. tc_setup_cb_t *cb, void *cb_ident)
  390. {
  391. struct tcf_block_cb *block_cb;
  392. block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
  393. if (!block_cb)
  394. return;
  395. __tcf_block_cb_unregister(block_cb);
  396. }
  397. EXPORT_SYMBOL(tcf_block_cb_unregister);
  398. static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
  399. void *type_data, bool err_stop)
  400. {
  401. struct tcf_block_cb *block_cb;
  402. int ok_count = 0;
  403. int err;
  404. list_for_each_entry(block_cb, &block->cb_list, list) {
  405. err = block_cb->cb(type, type_data, block_cb->cb_priv);
  406. if (err) {
  407. if (err_stop)
  408. return err;
  409. } else {
  410. ok_count++;
  411. }
  412. }
  413. return ok_count;
  414. }
  415. /* Main classifier routine: scans classifier chain attached
  416. * to this qdisc, (optionally) tests for protocol and asks
  417. * specific classifiers.
  418. */
  419. int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  420. struct tcf_result *res, bool compat_mode)
  421. {
  422. __be16 protocol = tc_skb_protocol(skb);
  423. #ifdef CONFIG_NET_CLS_ACT
  424. const int max_reclassify_loop = 4;
  425. const struct tcf_proto *orig_tp = tp;
  426. const struct tcf_proto *first_tp;
  427. int limit = 0;
  428. reclassify:
  429. #endif
  430. for (; tp; tp = rcu_dereference_bh(tp->next)) {
  431. int err;
  432. if (tp->protocol != protocol &&
  433. tp->protocol != htons(ETH_P_ALL))
  434. continue;
  435. err = tp->classify(skb, tp, res);
  436. #ifdef CONFIG_NET_CLS_ACT
  437. if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
  438. first_tp = orig_tp;
  439. goto reset;
  440. } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
  441. first_tp = res->goto_tp;
  442. goto reset;
  443. }
  444. #endif
  445. if (err >= 0)
  446. return err;
  447. }
  448. return TC_ACT_UNSPEC; /* signal: continue lookup */
  449. #ifdef CONFIG_NET_CLS_ACT
  450. reset:
  451. if (unlikely(limit++ >= max_reclassify_loop)) {
  452. net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
  453. tp->q->ops->id, tp->prio & 0xffff,
  454. ntohs(tp->protocol));
  455. return TC_ACT_SHOT;
  456. }
  457. tp = first_tp;
  458. protocol = tc_skb_protocol(skb);
  459. goto reclassify;
  460. #endif
  461. }
  462. EXPORT_SYMBOL(tcf_classify);
  463. struct tcf_chain_info {
  464. struct tcf_proto __rcu **pprev;
  465. struct tcf_proto __rcu *next;
  466. };
  467. static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
  468. {
  469. return rtnl_dereference(*chain_info->pprev);
  470. }
  471. static void tcf_chain_tp_insert(struct tcf_chain *chain,
  472. struct tcf_chain_info *chain_info,
  473. struct tcf_proto *tp)
  474. {
  475. if (*chain_info->pprev == chain->filter_chain)
  476. tcf_chain_head_change(chain, tp);
  477. RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
  478. rcu_assign_pointer(*chain_info->pprev, tp);
  479. tcf_chain_hold(chain);
  480. }
  481. static void tcf_chain_tp_remove(struct tcf_chain *chain,
  482. struct tcf_chain_info *chain_info,
  483. struct tcf_proto *tp)
  484. {
  485. struct tcf_proto *next = rtnl_dereference(chain_info->next);
  486. if (tp == chain->filter_chain)
  487. tcf_chain_head_change(chain, next);
  488. RCU_INIT_POINTER(*chain_info->pprev, next);
  489. tcf_chain_put(chain);
  490. }
  491. static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
  492. struct tcf_chain_info *chain_info,
  493. u32 protocol, u32 prio,
  494. bool prio_allocate)
  495. {
  496. struct tcf_proto **pprev;
  497. struct tcf_proto *tp;
  498. /* Check the chain for existence of proto-tcf with this priority */
  499. for (pprev = &chain->filter_chain;
  500. (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
  501. if (tp->prio >= prio) {
  502. if (tp->prio == prio) {
  503. if (prio_allocate ||
  504. (tp->protocol != protocol && protocol))
  505. return ERR_PTR(-EINVAL);
  506. } else {
  507. tp = NULL;
  508. }
  509. break;
  510. }
  511. }
  512. chain_info->pprev = pprev;
  513. chain_info->next = tp ? tp->next : NULL;
  514. return tp;
  515. }
  516. static int tcf_fill_node(struct net *net, struct sk_buff *skb,
  517. struct tcf_proto *tp, struct Qdisc *q, u32 parent,
  518. void *fh, u32 portid, u32 seq, u16 flags, int event)
  519. {
  520. struct tcmsg *tcm;
  521. struct nlmsghdr *nlh;
  522. unsigned char *b = skb_tail_pointer(skb);
  523. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
  524. if (!nlh)
  525. goto out_nlmsg_trim;
  526. tcm = nlmsg_data(nlh);
  527. tcm->tcm_family = AF_UNSPEC;
  528. tcm->tcm__pad1 = 0;
  529. tcm->tcm__pad2 = 0;
  530. tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
  531. tcm->tcm_parent = parent;
  532. tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
  533. if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
  534. goto nla_put_failure;
  535. if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
  536. goto nla_put_failure;
  537. if (!fh) {
  538. tcm->tcm_handle = 0;
  539. } else {
  540. if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
  541. goto nla_put_failure;
  542. }
  543. nlh->nlmsg_len = skb_tail_pointer(skb) - b;
  544. return skb->len;
  545. out_nlmsg_trim:
  546. nla_put_failure:
  547. nlmsg_trim(skb, b);
  548. return -1;
  549. }
  550. static int tfilter_notify(struct net *net, struct sk_buff *oskb,
  551. struct nlmsghdr *n, struct tcf_proto *tp,
  552. struct Qdisc *q, u32 parent,
  553. void *fh, int event, bool unicast)
  554. {
  555. struct sk_buff *skb;
  556. u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
  557. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  558. if (!skb)
  559. return -ENOBUFS;
  560. if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
  561. n->nlmsg_flags, event) <= 0) {
  562. kfree_skb(skb);
  563. return -EINVAL;
  564. }
  565. if (unicast)
  566. return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
  567. return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
  568. n->nlmsg_flags & NLM_F_ECHO);
  569. }
  570. static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
  571. struct nlmsghdr *n, struct tcf_proto *tp,
  572. struct Qdisc *q, u32 parent,
  573. void *fh, bool unicast, bool *last)
  574. {
  575. struct sk_buff *skb;
  576. u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
  577. int err;
  578. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  579. if (!skb)
  580. return -ENOBUFS;
  581. if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
  582. n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
  583. kfree_skb(skb);
  584. return -EINVAL;
  585. }
  586. err = tp->ops->delete(tp, fh, last);
  587. if (err) {
  588. kfree_skb(skb);
  589. return err;
  590. }
  591. if (unicast)
  592. return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
  593. return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
  594. n->nlmsg_flags & NLM_F_ECHO);
  595. }
  596. static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
  597. struct Qdisc *q, u32 parent,
  598. struct nlmsghdr *n,
  599. struct tcf_chain *chain, int event)
  600. {
  601. struct tcf_proto *tp;
  602. for (tp = rtnl_dereference(chain->filter_chain);
  603. tp; tp = rtnl_dereference(tp->next))
  604. tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
  605. }
  606. /* Add/change/delete/get a filter node */
  607. static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
  608. struct netlink_ext_ack *extack)
  609. {
  610. struct net *net = sock_net(skb->sk);
  611. struct nlattr *tca[TCA_MAX + 1];
  612. struct tcmsg *t;
  613. u32 protocol;
  614. u32 prio;
  615. bool prio_allocate;
  616. u32 parent;
  617. u32 chain_index;
  618. struct net_device *dev;
  619. struct Qdisc *q;
  620. struct tcf_chain_info chain_info;
  621. struct tcf_chain *chain = NULL;
  622. struct tcf_block *block;
  623. struct tcf_proto *tp;
  624. const struct Qdisc_class_ops *cops;
  625. unsigned long cl;
  626. void *fh;
  627. int err;
  628. int tp_created;
  629. if ((n->nlmsg_type != RTM_GETTFILTER) &&
  630. !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
  631. return -EPERM;
  632. replay:
  633. tp_created = 0;
  634. err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
  635. if (err < 0)
  636. return err;
  637. t = nlmsg_data(n);
  638. protocol = TC_H_MIN(t->tcm_info);
  639. prio = TC_H_MAJ(t->tcm_info);
  640. prio_allocate = false;
  641. parent = t->tcm_parent;
  642. cl = 0;
  643. if (prio == 0) {
  644. switch (n->nlmsg_type) {
  645. case RTM_DELTFILTER:
  646. if (protocol || t->tcm_handle || tca[TCA_KIND])
  647. return -ENOENT;
  648. break;
  649. case RTM_NEWTFILTER:
  650. /* If no priority is provided by the user,
  651. * we allocate one.
  652. */
  653. if (n->nlmsg_flags & NLM_F_CREATE) {
  654. prio = TC_H_MAKE(0x80000000U, 0U);
  655. prio_allocate = true;
  656. break;
  657. }
  658. /* fall-through */
  659. default:
  660. return -ENOENT;
  661. }
  662. }
  663. /* Find head of filter chain. */
  664. /* Find link */
  665. dev = __dev_get_by_index(net, t->tcm_ifindex);
  666. if (dev == NULL)
  667. return -ENODEV;
  668. /* Find qdisc */
  669. if (!parent) {
  670. q = dev->qdisc;
  671. parent = q->handle;
  672. } else {
  673. q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
  674. if (q == NULL)
  675. return -EINVAL;
  676. }
  677. /* Is it classful? */
  678. cops = q->ops->cl_ops;
  679. if (!cops)
  680. return -EINVAL;
  681. if (!cops->tcf_block)
  682. return -EOPNOTSUPP;
  683. /* Do we search for filter, attached to class? */
  684. if (TC_H_MIN(parent)) {
  685. cl = cops->find(q, parent);
  686. if (cl == 0)
  687. return -ENOENT;
  688. }
  689. /* And the last stroke */
  690. block = cops->tcf_block(q, cl);
  691. if (!block) {
  692. err = -EINVAL;
  693. goto errout;
  694. }
  695. chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
  696. if (chain_index > TC_ACT_EXT_VAL_MASK) {
  697. err = -EINVAL;
  698. goto errout;
  699. }
  700. chain = tcf_chain_get(block, chain_index,
  701. n->nlmsg_type == RTM_NEWTFILTER);
  702. if (!chain) {
  703. err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
  704. goto errout;
  705. }
  706. if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
  707. tfilter_notify_chain(net, skb, q, parent, n,
  708. chain, RTM_DELTFILTER);
  709. tcf_chain_flush(chain);
  710. err = 0;
  711. goto errout;
  712. }
  713. tp = tcf_chain_tp_find(chain, &chain_info, protocol,
  714. prio, prio_allocate);
  715. if (IS_ERR(tp)) {
  716. err = PTR_ERR(tp);
  717. goto errout;
  718. }
  719. if (tp == NULL) {
  720. /* Proto-tcf does not exist, create new one */
  721. if (tca[TCA_KIND] == NULL || !protocol) {
  722. err = -EINVAL;
  723. goto errout;
  724. }
  725. if (n->nlmsg_type != RTM_NEWTFILTER ||
  726. !(n->nlmsg_flags & NLM_F_CREATE)) {
  727. err = -ENOENT;
  728. goto errout;
  729. }
  730. if (prio_allocate)
  731. prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
  732. tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
  733. protocol, prio, parent, q, chain);
  734. if (IS_ERR(tp)) {
  735. err = PTR_ERR(tp);
  736. goto errout;
  737. }
  738. tp_created = 1;
  739. } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
  740. err = -EINVAL;
  741. goto errout;
  742. }
  743. fh = tp->ops->get(tp, t->tcm_handle);
  744. if (!fh) {
  745. if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
  746. tcf_chain_tp_remove(chain, &chain_info, tp);
  747. tfilter_notify(net, skb, n, tp, q, parent, fh,
  748. RTM_DELTFILTER, false);
  749. tcf_proto_destroy(tp);
  750. err = 0;
  751. goto errout;
  752. }
  753. if (n->nlmsg_type != RTM_NEWTFILTER ||
  754. !(n->nlmsg_flags & NLM_F_CREATE)) {
  755. err = -ENOENT;
  756. goto errout;
  757. }
  758. } else {
  759. bool last;
  760. switch (n->nlmsg_type) {
  761. case RTM_NEWTFILTER:
  762. if (n->nlmsg_flags & NLM_F_EXCL) {
  763. if (tp_created)
  764. tcf_proto_destroy(tp);
  765. err = -EEXIST;
  766. goto errout;
  767. }
  768. break;
  769. case RTM_DELTFILTER:
  770. err = tfilter_del_notify(net, skb, n, tp, q, parent,
  771. fh, false, &last);
  772. if (err)
  773. goto errout;
  774. if (last) {
  775. tcf_chain_tp_remove(chain, &chain_info, tp);
  776. tcf_proto_destroy(tp);
  777. }
  778. goto errout;
  779. case RTM_GETTFILTER:
  780. err = tfilter_notify(net, skb, n, tp, q, parent, fh,
  781. RTM_NEWTFILTER, true);
  782. goto errout;
  783. default:
  784. err = -EINVAL;
  785. goto errout;
  786. }
  787. }
  788. err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
  789. n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
  790. if (err == 0) {
  791. if (tp_created)
  792. tcf_chain_tp_insert(chain, &chain_info, tp);
  793. tfilter_notify(net, skb, n, tp, q, parent, fh,
  794. RTM_NEWTFILTER, false);
  795. } else {
  796. if (tp_created)
  797. tcf_proto_destroy(tp);
  798. }
  799. errout:
  800. if (chain)
  801. tcf_chain_put(chain);
  802. if (err == -EAGAIN)
  803. /* Replay the request. */
  804. goto replay;
  805. return err;
  806. }
  807. struct tcf_dump_args {
  808. struct tcf_walker w;
  809. struct sk_buff *skb;
  810. struct netlink_callback *cb;
  811. struct Qdisc *q;
  812. u32 parent;
  813. };
  814. static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
  815. {
  816. struct tcf_dump_args *a = (void *)arg;
  817. struct net *net = sock_net(a->skb->sk);
  818. return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
  819. n, NETLINK_CB(a->cb->skb).portid,
  820. a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
  821. RTM_NEWTFILTER);
  822. }
  823. static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
  824. struct sk_buff *skb, struct netlink_callback *cb,
  825. long index_start, long *p_index)
  826. {
  827. struct net *net = sock_net(skb->sk);
  828. struct tcmsg *tcm = nlmsg_data(cb->nlh);
  829. struct tcf_dump_args arg;
  830. struct tcf_proto *tp;
  831. for (tp = rtnl_dereference(chain->filter_chain);
  832. tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
  833. if (*p_index < index_start)
  834. continue;
  835. if (TC_H_MAJ(tcm->tcm_info) &&
  836. TC_H_MAJ(tcm->tcm_info) != tp->prio)
  837. continue;
  838. if (TC_H_MIN(tcm->tcm_info) &&
  839. TC_H_MIN(tcm->tcm_info) != tp->protocol)
  840. continue;
  841. if (*p_index > index_start)
  842. memset(&cb->args[1], 0,
  843. sizeof(cb->args) - sizeof(cb->args[0]));
  844. if (cb->args[1] == 0) {
  845. if (tcf_fill_node(net, skb, tp, q, parent, 0,
  846. NETLINK_CB(cb->skb).portid,
  847. cb->nlh->nlmsg_seq, NLM_F_MULTI,
  848. RTM_NEWTFILTER) <= 0)
  849. return false;
  850. cb->args[1] = 1;
  851. }
  852. if (!tp->ops->walk)
  853. continue;
  854. arg.w.fn = tcf_node_dump;
  855. arg.skb = skb;
  856. arg.cb = cb;
  857. arg.q = q;
  858. arg.parent = parent;
  859. arg.w.stop = 0;
  860. arg.w.skip = cb->args[1] - 1;
  861. arg.w.count = 0;
  862. tp->ops->walk(tp, &arg.w);
  863. cb->args[1] = arg.w.count + 1;
  864. if (arg.w.stop)
  865. return false;
  866. }
  867. return true;
  868. }
  869. /* called with RTNL */
  870. static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
  871. {
  872. struct net *net = sock_net(skb->sk);
  873. struct nlattr *tca[TCA_MAX + 1];
  874. struct net_device *dev;
  875. struct Qdisc *q;
  876. struct tcf_block *block;
  877. struct tcf_chain *chain;
  878. struct tcmsg *tcm = nlmsg_data(cb->nlh);
  879. unsigned long cl = 0;
  880. const struct Qdisc_class_ops *cops;
  881. long index_start;
  882. long index;
  883. u32 parent;
  884. int err;
  885. if (nlmsg_len(cb->nlh) < sizeof(*tcm))
  886. return skb->len;
  887. err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
  888. if (err)
  889. return err;
  890. dev = __dev_get_by_index(net, tcm->tcm_ifindex);
  891. if (!dev)
  892. return skb->len;
  893. parent = tcm->tcm_parent;
  894. if (!parent) {
  895. q = dev->qdisc;
  896. parent = q->handle;
  897. } else {
  898. q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
  899. }
  900. if (!q)
  901. goto out;
  902. cops = q->ops->cl_ops;
  903. if (!cops)
  904. goto out;
  905. if (!cops->tcf_block)
  906. goto out;
  907. if (TC_H_MIN(tcm->tcm_parent)) {
  908. cl = cops->find(q, tcm->tcm_parent);
  909. if (cl == 0)
  910. goto out;
  911. }
  912. block = cops->tcf_block(q, cl);
  913. if (!block)
  914. goto out;
  915. index_start = cb->args[0];
  916. index = 0;
  917. list_for_each_entry(chain, &block->chain_list, list) {
  918. if (tca[TCA_CHAIN] &&
  919. nla_get_u32(tca[TCA_CHAIN]) != chain->index)
  920. continue;
  921. if (!tcf_chain_dump(chain, q, parent, skb, cb,
  922. index_start, &index))
  923. break;
  924. }
  925. cb->args[0] = index;
  926. out:
  927. return skb->len;
  928. }
  929. void tcf_exts_destroy(struct tcf_exts *exts)
  930. {
  931. #ifdef CONFIG_NET_CLS_ACT
  932. LIST_HEAD(actions);
  933. ASSERT_RTNL();
  934. tcf_exts_to_list(exts, &actions);
  935. tcf_action_destroy(&actions, TCA_ACT_UNBIND);
  936. kfree(exts->actions);
  937. exts->nr_actions = 0;
  938. #endif
  939. }
  940. EXPORT_SYMBOL(tcf_exts_destroy);
  941. int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
  942. struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
  943. {
  944. #ifdef CONFIG_NET_CLS_ACT
  945. {
  946. struct tc_action *act;
  947. if (exts->police && tb[exts->police]) {
  948. act = tcf_action_init_1(net, tp, tb[exts->police],
  949. rate_tlv, "police", ovr,
  950. TCA_ACT_BIND);
  951. if (IS_ERR(act))
  952. return PTR_ERR(act);
  953. act->type = exts->type = TCA_OLD_COMPAT;
  954. exts->actions[0] = act;
  955. exts->nr_actions = 1;
  956. } else if (exts->action && tb[exts->action]) {
  957. LIST_HEAD(actions);
  958. int err, i = 0;
  959. err = tcf_action_init(net, tp, tb[exts->action],
  960. rate_tlv, NULL, ovr, TCA_ACT_BIND,
  961. &actions);
  962. if (err)
  963. return err;
  964. list_for_each_entry(act, &actions, list)
  965. exts->actions[i++] = act;
  966. exts->nr_actions = i;
  967. }
  968. exts->net = net;
  969. }
  970. #else
  971. if ((exts->action && tb[exts->action]) ||
  972. (exts->police && tb[exts->police]))
  973. return -EOPNOTSUPP;
  974. #endif
  975. return 0;
  976. }
  977. EXPORT_SYMBOL(tcf_exts_validate);
  978. void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
  979. {
  980. #ifdef CONFIG_NET_CLS_ACT
  981. struct tcf_exts old = *dst;
  982. *dst = *src;
  983. tcf_exts_destroy(&old);
  984. #endif
  985. }
  986. EXPORT_SYMBOL(tcf_exts_change);
  987. #ifdef CONFIG_NET_CLS_ACT
  988. static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
  989. {
  990. if (exts->nr_actions == 0)
  991. return NULL;
  992. else
  993. return exts->actions[0];
  994. }
  995. #endif
  996. int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
  997. {
  998. #ifdef CONFIG_NET_CLS_ACT
  999. struct nlattr *nest;
  1000. if (exts->action && tcf_exts_has_actions(exts)) {
  1001. /*
  1002. * again for backward compatible mode - we want
  1003. * to work with both old and new modes of entering
  1004. * tc data even if iproute2 was newer - jhs
  1005. */
  1006. if (exts->type != TCA_OLD_COMPAT) {
  1007. LIST_HEAD(actions);
  1008. nest = nla_nest_start(skb, exts->action);
  1009. if (nest == NULL)
  1010. goto nla_put_failure;
  1011. tcf_exts_to_list(exts, &actions);
  1012. if (tcf_action_dump(skb, &actions, 0, 0) < 0)
  1013. goto nla_put_failure;
  1014. nla_nest_end(skb, nest);
  1015. } else if (exts->police) {
  1016. struct tc_action *act = tcf_exts_first_act(exts);
  1017. nest = nla_nest_start(skb, exts->police);
  1018. if (nest == NULL || !act)
  1019. goto nla_put_failure;
  1020. if (tcf_action_dump_old(skb, act, 0, 0) < 0)
  1021. goto nla_put_failure;
  1022. nla_nest_end(skb, nest);
  1023. }
  1024. }
  1025. return 0;
  1026. nla_put_failure:
  1027. nla_nest_cancel(skb, nest);
  1028. return -1;
  1029. #else
  1030. return 0;
  1031. #endif
  1032. }
  1033. EXPORT_SYMBOL(tcf_exts_dump);
  1034. int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
  1035. {
  1036. #ifdef CONFIG_NET_CLS_ACT
  1037. struct tc_action *a = tcf_exts_first_act(exts);
  1038. if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
  1039. return -1;
  1040. #endif
  1041. return 0;
  1042. }
  1043. EXPORT_SYMBOL(tcf_exts_dump_stats);
  1044. static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
  1045. enum tc_setup_type type,
  1046. void *type_data, bool err_stop)
  1047. {
  1048. int ok_count = 0;
  1049. #ifdef CONFIG_NET_CLS_ACT
  1050. const struct tc_action *a;
  1051. struct net_device *dev;
  1052. int i, ret;
  1053. if (!tcf_exts_has_actions(exts))
  1054. return 0;
  1055. for (i = 0; i < exts->nr_actions; i++) {
  1056. a = exts->actions[i];
  1057. if (!a->ops->get_dev)
  1058. continue;
  1059. dev = a->ops->get_dev(a);
  1060. if (!dev)
  1061. continue;
  1062. ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
  1063. if (ret < 0)
  1064. return ret;
  1065. ok_count += ret;
  1066. }
  1067. #endif
  1068. return ok_count;
  1069. }
  1070. int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
  1071. enum tc_setup_type type, void *type_data, bool err_stop)
  1072. {
  1073. int ok_count;
  1074. int ret;
  1075. ret = tcf_block_cb_call(block, type, type_data, err_stop);
  1076. if (ret < 0)
  1077. return ret;
  1078. ok_count = ret;
  1079. if (!exts)
  1080. return ok_count;
  1081. ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
  1082. if (ret < 0)
  1083. return ret;
  1084. ok_count += ret;
  1085. return ok_count;
  1086. }
  1087. EXPORT_SYMBOL(tc_setup_cb_call);
  1088. static int __init tc_filter_init(void)
  1089. {
  1090. tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
  1091. if (!tc_filter_wq)
  1092. return -ENOMEM;
  1093. rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
  1094. rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
  1095. rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
  1096. tc_dump_tfilter, 0);
  1097. return 0;
  1098. }
  1099. subsys_initcall(tc_filter_init);