cls_api.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262
  1. /*
  2. * net/sched/cls_api.c Packet classifier API.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10. *
  11. * Changes:
  12. *
  13. * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  14. *
  15. */
  16. #include <linux/module.h>
  17. #include <linux/types.h>
  18. #include <linux/kernel.h>
  19. #include <linux/string.h>
  20. #include <linux/errno.h>
  21. #include <linux/err.h>
  22. #include <linux/skbuff.h>
  23. #include <linux/init.h>
  24. #include <linux/kmod.h>
  25. #include <linux/slab.h>
  26. #include <net/net_namespace.h>
  27. #include <net/sock.h>
  28. #include <net/netlink.h>
  29. #include <net/pkt_sched.h>
  30. #include <net/pkt_cls.h>
  31. /* The list of all installed classifier types */
  32. static LIST_HEAD(tcf_proto_base);
  33. /* Protects list of registered TC modules. It is pure SMP lock. */
  34. static DEFINE_RWLOCK(cls_mod_lock);
  35. /* Find classifier type by string name */
  36. static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
  37. {
  38. const struct tcf_proto_ops *t, *res = NULL;
  39. if (kind) {
  40. read_lock(&cls_mod_lock);
  41. list_for_each_entry(t, &tcf_proto_base, head) {
  42. if (strcmp(kind, t->kind) == 0) {
  43. if (try_module_get(t->owner))
  44. res = t;
  45. break;
  46. }
  47. }
  48. read_unlock(&cls_mod_lock);
  49. }
  50. return res;
  51. }
  52. /* Register(unregister) new classifier type */
  53. int register_tcf_proto_ops(struct tcf_proto_ops *ops)
  54. {
  55. struct tcf_proto_ops *t;
  56. int rc = -EEXIST;
  57. write_lock(&cls_mod_lock);
  58. list_for_each_entry(t, &tcf_proto_base, head)
  59. if (!strcmp(ops->kind, t->kind))
  60. goto out;
  61. list_add_tail(&ops->head, &tcf_proto_base);
  62. rc = 0;
  63. out:
  64. write_unlock(&cls_mod_lock);
  65. return rc;
  66. }
  67. EXPORT_SYMBOL(register_tcf_proto_ops);
  68. static struct workqueue_struct *tc_filter_wq;
  69. int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
  70. {
  71. struct tcf_proto_ops *t;
  72. int rc = -ENOENT;
  73. /* Wait for outstanding call_rcu()s, if any, from a
  74. * tcf_proto_ops's destroy() handler.
  75. */
  76. rcu_barrier();
  77. flush_workqueue(tc_filter_wq);
  78. write_lock(&cls_mod_lock);
  79. list_for_each_entry(t, &tcf_proto_base, head) {
  80. if (t == ops) {
  81. list_del(&t->head);
  82. rc = 0;
  83. break;
  84. }
  85. }
  86. write_unlock(&cls_mod_lock);
  87. return rc;
  88. }
  89. EXPORT_SYMBOL(unregister_tcf_proto_ops);
  90. bool tcf_queue_work(struct work_struct *work)
  91. {
  92. return queue_work(tc_filter_wq, work);
  93. }
  94. EXPORT_SYMBOL(tcf_queue_work);
  95. /* Select new prio value from the range, managed by kernel. */
  96. static inline u32 tcf_auto_prio(struct tcf_proto *tp)
  97. {
  98. u32 first = TC_H_MAKE(0xC0000000U, 0U);
  99. if (tp)
  100. first = tp->prio - 1;
  101. return TC_H_MAJ(first);
  102. }
  103. static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
  104. u32 prio, u32 parent, struct Qdisc *q,
  105. struct tcf_chain *chain)
  106. {
  107. struct tcf_proto *tp;
  108. int err;
  109. tp = kzalloc(sizeof(*tp), GFP_KERNEL);
  110. if (!tp)
  111. return ERR_PTR(-ENOBUFS);
  112. err = -ENOENT;
  113. tp->ops = tcf_proto_lookup_ops(kind);
  114. if (!tp->ops) {
  115. #ifdef CONFIG_MODULES
  116. rtnl_unlock();
  117. request_module("cls_%s", kind);
  118. rtnl_lock();
  119. tp->ops = tcf_proto_lookup_ops(kind);
  120. /* We dropped the RTNL semaphore in order to perform
  121. * the module load. So, even if we succeeded in loading
  122. * the module we have to replay the request. We indicate
  123. * this using -EAGAIN.
  124. */
  125. if (tp->ops) {
  126. module_put(tp->ops->owner);
  127. err = -EAGAIN;
  128. } else {
  129. err = -ENOENT;
  130. }
  131. goto errout;
  132. #endif
  133. }
  134. tp->classify = tp->ops->classify;
  135. tp->protocol = protocol;
  136. tp->prio = prio;
  137. tp->classid = parent;
  138. tp->q = q;
  139. tp->chain = chain;
  140. err = tp->ops->init(tp);
  141. if (err) {
  142. module_put(tp->ops->owner);
  143. goto errout;
  144. }
  145. return tp;
  146. errout:
  147. kfree(tp);
  148. return ERR_PTR(err);
  149. }
  150. static void tcf_proto_destroy(struct tcf_proto *tp)
  151. {
  152. tp->ops->destroy(tp);
  153. module_put(tp->ops->owner);
  154. kfree_rcu(tp, rcu);
  155. }
  156. static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
  157. u32 chain_index)
  158. {
  159. struct tcf_chain *chain;
  160. chain = kzalloc(sizeof(*chain), GFP_KERNEL);
  161. if (!chain)
  162. return NULL;
  163. list_add_tail(&chain->list, &block->chain_list);
  164. chain->block = block;
  165. chain->index = chain_index;
  166. chain->refcnt = 1;
  167. return chain;
  168. }
  169. static void tcf_chain_head_change(struct tcf_chain *chain,
  170. struct tcf_proto *tp_head)
  171. {
  172. if (chain->chain_head_change)
  173. chain->chain_head_change(tp_head,
  174. chain->chain_head_change_priv);
  175. }
  176. static void tcf_chain_flush(struct tcf_chain *chain)
  177. {
  178. struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
  179. tcf_chain_head_change(chain, NULL);
  180. while (tp) {
  181. RCU_INIT_POINTER(chain->filter_chain, tp->next);
  182. tcf_proto_destroy(tp);
  183. tp = rtnl_dereference(chain->filter_chain);
  184. tcf_chain_put(chain);
  185. }
  186. }
  187. static void tcf_chain_destroy(struct tcf_chain *chain)
  188. {
  189. struct tcf_block *block = chain->block;
  190. list_del(&chain->list);
  191. kfree(chain);
  192. if (list_empty(&block->chain_list))
  193. kfree(block);
  194. }
  195. static void tcf_chain_hold(struct tcf_chain *chain)
  196. {
  197. ++chain->refcnt;
  198. }
  199. struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
  200. bool create)
  201. {
  202. struct tcf_chain *chain;
  203. list_for_each_entry(chain, &block->chain_list, list) {
  204. if (chain->index == chain_index) {
  205. tcf_chain_hold(chain);
  206. return chain;
  207. }
  208. }
  209. return create ? tcf_chain_create(block, chain_index) : NULL;
  210. }
  211. EXPORT_SYMBOL(tcf_chain_get);
  212. void tcf_chain_put(struct tcf_chain *chain)
  213. {
  214. if (--chain->refcnt == 0)
  215. tcf_chain_destroy(chain);
  216. }
  217. EXPORT_SYMBOL(tcf_chain_put);
  218. static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
  219. struct tcf_block_ext_info *ei,
  220. enum tc_block_command command)
  221. {
  222. struct net_device *dev = q->dev_queue->dev;
  223. struct tc_block_offload bo = {};
  224. if (!dev->netdev_ops->ndo_setup_tc)
  225. return;
  226. bo.command = command;
  227. bo.binder_type = ei->binder_type;
  228. bo.block = block;
  229. dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
  230. }
  231. static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
  232. struct tcf_block_ext_info *ei)
  233. {
  234. tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
  235. }
  236. static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
  237. struct tcf_block_ext_info *ei)
  238. {
  239. tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
  240. }
  241. int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
  242. struct tcf_block_ext_info *ei)
  243. {
  244. struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
  245. struct tcf_chain *chain;
  246. int err;
  247. if (!block)
  248. return -ENOMEM;
  249. INIT_LIST_HEAD(&block->chain_list);
  250. INIT_LIST_HEAD(&block->cb_list);
  251. /* Create chain 0 by default, it has to be always present. */
  252. chain = tcf_chain_create(block, 0);
  253. if (!chain) {
  254. err = -ENOMEM;
  255. goto err_chain_create;
  256. }
  257. WARN_ON(!ei->chain_head_change);
  258. chain->chain_head_change = ei->chain_head_change;
  259. chain->chain_head_change_priv = ei->chain_head_change_priv;
  260. block->net = qdisc_net(q);
  261. block->q = q;
  262. tcf_block_offload_bind(block, q, ei);
  263. *p_block = block;
  264. return 0;
  265. err_chain_create:
  266. kfree(block);
  267. return err;
  268. }
  269. EXPORT_SYMBOL(tcf_block_get_ext);
  270. static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
  271. {
  272. struct tcf_proto __rcu **p_filter_chain = priv;
  273. rcu_assign_pointer(*p_filter_chain, tp_head);
  274. }
  275. int tcf_block_get(struct tcf_block **p_block,
  276. struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
  277. {
  278. struct tcf_block_ext_info ei = {
  279. .chain_head_change = tcf_chain_head_change_dflt,
  280. .chain_head_change_priv = p_filter_chain,
  281. };
  282. WARN_ON(!p_filter_chain);
  283. return tcf_block_get_ext(p_block, q, &ei);
  284. }
  285. EXPORT_SYMBOL(tcf_block_get);
  286. /* XXX: Standalone actions are not allowed to jump to any chain, and bound
  287. * actions should be all removed after flushing.
  288. */
  289. void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
  290. struct tcf_block_ext_info *ei)
  291. {
  292. struct tcf_chain *chain, *tmp;
  293. /* Hold a refcnt for all chains, so that they don't disappear
  294. * while we are iterating.
  295. */
  296. if (!block)
  297. return;
  298. list_for_each_entry(chain, &block->chain_list, list)
  299. tcf_chain_hold(chain);
  300. list_for_each_entry(chain, &block->chain_list, list)
  301. tcf_chain_flush(chain);
  302. tcf_block_offload_unbind(block, q, ei);
  303. /* At this point, all the chains should have refcnt >= 1. */
  304. list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
  305. tcf_chain_put(chain);
  306. /* Finally, put chain 0 and allow block to be freed. */
  307. chain = list_first_entry(&block->chain_list, struct tcf_chain, list);
  308. tcf_chain_put(chain);
  309. }
  310. EXPORT_SYMBOL(tcf_block_put_ext);
  311. void tcf_block_put(struct tcf_block *block)
  312. {
  313. struct tcf_block_ext_info ei = {0, };
  314. tcf_block_put_ext(block, block->q, &ei);
  315. }
  316. EXPORT_SYMBOL(tcf_block_put);
  317. struct tcf_block_cb {
  318. struct list_head list;
  319. tc_setup_cb_t *cb;
  320. void *cb_ident;
  321. void *cb_priv;
  322. unsigned int refcnt;
  323. };
  324. void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
  325. {
  326. return block_cb->cb_priv;
  327. }
  328. EXPORT_SYMBOL(tcf_block_cb_priv);
  329. struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
  330. tc_setup_cb_t *cb, void *cb_ident)
  331. { struct tcf_block_cb *block_cb;
  332. list_for_each_entry(block_cb, &block->cb_list, list)
  333. if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
  334. return block_cb;
  335. return NULL;
  336. }
  337. EXPORT_SYMBOL(tcf_block_cb_lookup);
  338. void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
  339. {
  340. block_cb->refcnt++;
  341. }
  342. EXPORT_SYMBOL(tcf_block_cb_incref);
  343. unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
  344. {
  345. return --block_cb->refcnt;
  346. }
  347. EXPORT_SYMBOL(tcf_block_cb_decref);
  348. struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
  349. tc_setup_cb_t *cb, void *cb_ident,
  350. void *cb_priv)
  351. {
  352. struct tcf_block_cb *block_cb;
  353. block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
  354. if (!block_cb)
  355. return NULL;
  356. block_cb->cb = cb;
  357. block_cb->cb_ident = cb_ident;
  358. block_cb->cb_priv = cb_priv;
  359. list_add(&block_cb->list, &block->cb_list);
  360. return block_cb;
  361. }
  362. EXPORT_SYMBOL(__tcf_block_cb_register);
  363. int tcf_block_cb_register(struct tcf_block *block,
  364. tc_setup_cb_t *cb, void *cb_ident,
  365. void *cb_priv)
  366. {
  367. struct tcf_block_cb *block_cb;
  368. block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
  369. return block_cb ? 0 : -ENOMEM;
  370. }
  371. EXPORT_SYMBOL(tcf_block_cb_register);
  372. void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
  373. {
  374. list_del(&block_cb->list);
  375. kfree(block_cb);
  376. }
  377. EXPORT_SYMBOL(__tcf_block_cb_unregister);
  378. void tcf_block_cb_unregister(struct tcf_block *block,
  379. tc_setup_cb_t *cb, void *cb_ident)
  380. {
  381. struct tcf_block_cb *block_cb;
  382. block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
  383. if (!block_cb)
  384. return;
  385. __tcf_block_cb_unregister(block_cb);
  386. }
  387. EXPORT_SYMBOL(tcf_block_cb_unregister);
  388. static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
  389. void *type_data, bool err_stop)
  390. {
  391. struct tcf_block_cb *block_cb;
  392. int ok_count = 0;
  393. int err;
  394. list_for_each_entry(block_cb, &block->cb_list, list) {
  395. err = block_cb->cb(type, type_data, block_cb->cb_priv);
  396. if (err) {
  397. if (err_stop)
  398. return err;
  399. } else {
  400. ok_count++;
  401. }
  402. }
  403. return ok_count;
  404. }
  405. /* Main classifier routine: scans classifier chain attached
  406. * to this qdisc, (optionally) tests for protocol and asks
  407. * specific classifiers.
  408. */
  409. int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  410. struct tcf_result *res, bool compat_mode)
  411. {
  412. __be16 protocol = tc_skb_protocol(skb);
  413. #ifdef CONFIG_NET_CLS_ACT
  414. const int max_reclassify_loop = 4;
  415. const struct tcf_proto *orig_tp = tp;
  416. const struct tcf_proto *first_tp;
  417. int limit = 0;
  418. reclassify:
  419. #endif
  420. for (; tp; tp = rcu_dereference_bh(tp->next)) {
  421. int err;
  422. if (tp->protocol != protocol &&
  423. tp->protocol != htons(ETH_P_ALL))
  424. continue;
  425. err = tp->classify(skb, tp, res);
  426. #ifdef CONFIG_NET_CLS_ACT
  427. if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
  428. first_tp = orig_tp;
  429. goto reset;
  430. } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
  431. first_tp = res->goto_tp;
  432. goto reset;
  433. }
  434. #endif
  435. if (err >= 0)
  436. return err;
  437. }
  438. return TC_ACT_UNSPEC; /* signal: continue lookup */
  439. #ifdef CONFIG_NET_CLS_ACT
  440. reset:
  441. if (unlikely(limit++ >= max_reclassify_loop)) {
  442. net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
  443. tp->q->ops->id, tp->prio & 0xffff,
  444. ntohs(tp->protocol));
  445. return TC_ACT_SHOT;
  446. }
  447. tp = first_tp;
  448. protocol = tc_skb_protocol(skb);
  449. goto reclassify;
  450. #endif
  451. }
  452. EXPORT_SYMBOL(tcf_classify);
  453. struct tcf_chain_info {
  454. struct tcf_proto __rcu **pprev;
  455. struct tcf_proto __rcu *next;
  456. };
  457. static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
  458. {
  459. return rtnl_dereference(*chain_info->pprev);
  460. }
  461. static void tcf_chain_tp_insert(struct tcf_chain *chain,
  462. struct tcf_chain_info *chain_info,
  463. struct tcf_proto *tp)
  464. {
  465. if (*chain_info->pprev == chain->filter_chain)
  466. tcf_chain_head_change(chain, tp);
  467. RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
  468. rcu_assign_pointer(*chain_info->pprev, tp);
  469. tcf_chain_hold(chain);
  470. }
  471. static void tcf_chain_tp_remove(struct tcf_chain *chain,
  472. struct tcf_chain_info *chain_info,
  473. struct tcf_proto *tp)
  474. {
  475. struct tcf_proto *next = rtnl_dereference(chain_info->next);
  476. if (tp == chain->filter_chain)
  477. tcf_chain_head_change(chain, next);
  478. RCU_INIT_POINTER(*chain_info->pprev, next);
  479. tcf_chain_put(chain);
  480. }
  481. static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
  482. struct tcf_chain_info *chain_info,
  483. u32 protocol, u32 prio,
  484. bool prio_allocate)
  485. {
  486. struct tcf_proto **pprev;
  487. struct tcf_proto *tp;
  488. /* Check the chain for existence of proto-tcf with this priority */
  489. for (pprev = &chain->filter_chain;
  490. (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
  491. if (tp->prio >= prio) {
  492. if (tp->prio == prio) {
  493. if (prio_allocate ||
  494. (tp->protocol != protocol && protocol))
  495. return ERR_PTR(-EINVAL);
  496. } else {
  497. tp = NULL;
  498. }
  499. break;
  500. }
  501. }
  502. chain_info->pprev = pprev;
  503. chain_info->next = tp ? tp->next : NULL;
  504. return tp;
  505. }
  506. static int tcf_fill_node(struct net *net, struct sk_buff *skb,
  507. struct tcf_proto *tp, struct Qdisc *q, u32 parent,
  508. void *fh, u32 portid, u32 seq, u16 flags, int event)
  509. {
  510. struct tcmsg *tcm;
  511. struct nlmsghdr *nlh;
  512. unsigned char *b = skb_tail_pointer(skb);
  513. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
  514. if (!nlh)
  515. goto out_nlmsg_trim;
  516. tcm = nlmsg_data(nlh);
  517. tcm->tcm_family = AF_UNSPEC;
  518. tcm->tcm__pad1 = 0;
  519. tcm->tcm__pad2 = 0;
  520. tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
  521. tcm->tcm_parent = parent;
  522. tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
  523. if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
  524. goto nla_put_failure;
  525. if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
  526. goto nla_put_failure;
  527. if (!fh) {
  528. tcm->tcm_handle = 0;
  529. } else {
  530. if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
  531. goto nla_put_failure;
  532. }
  533. nlh->nlmsg_len = skb_tail_pointer(skb) - b;
  534. return skb->len;
  535. out_nlmsg_trim:
  536. nla_put_failure:
  537. nlmsg_trim(skb, b);
  538. return -1;
  539. }
  540. static int tfilter_notify(struct net *net, struct sk_buff *oskb,
  541. struct nlmsghdr *n, struct tcf_proto *tp,
  542. struct Qdisc *q, u32 parent,
  543. void *fh, int event, bool unicast)
  544. {
  545. struct sk_buff *skb;
  546. u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
  547. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  548. if (!skb)
  549. return -ENOBUFS;
  550. if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
  551. n->nlmsg_flags, event) <= 0) {
  552. kfree_skb(skb);
  553. return -EINVAL;
  554. }
  555. if (unicast)
  556. return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
  557. return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
  558. n->nlmsg_flags & NLM_F_ECHO);
  559. }
  560. static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
  561. struct nlmsghdr *n, struct tcf_proto *tp,
  562. struct Qdisc *q, u32 parent,
  563. void *fh, bool unicast, bool *last)
  564. {
  565. struct sk_buff *skb;
  566. u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
  567. int err;
  568. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  569. if (!skb)
  570. return -ENOBUFS;
  571. if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
  572. n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
  573. kfree_skb(skb);
  574. return -EINVAL;
  575. }
  576. err = tp->ops->delete(tp, fh, last);
  577. if (err) {
  578. kfree_skb(skb);
  579. return err;
  580. }
  581. if (unicast)
  582. return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
  583. return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
  584. n->nlmsg_flags & NLM_F_ECHO);
  585. }
  586. static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
  587. struct Qdisc *q, u32 parent,
  588. struct nlmsghdr *n,
  589. struct tcf_chain *chain, int event)
  590. {
  591. struct tcf_proto *tp;
  592. for (tp = rtnl_dereference(chain->filter_chain);
  593. tp; tp = rtnl_dereference(tp->next))
  594. tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
  595. }
  596. /* Add/change/delete/get a filter node */
  597. static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
  598. struct netlink_ext_ack *extack)
  599. {
  600. struct net *net = sock_net(skb->sk);
  601. struct nlattr *tca[TCA_MAX + 1];
  602. struct tcmsg *t;
  603. u32 protocol;
  604. u32 prio;
  605. bool prio_allocate;
  606. u32 parent;
  607. u32 chain_index;
  608. struct net_device *dev;
  609. struct Qdisc *q;
  610. struct tcf_chain_info chain_info;
  611. struct tcf_chain *chain = NULL;
  612. struct tcf_block *block;
  613. struct tcf_proto *tp;
  614. const struct Qdisc_class_ops *cops;
  615. unsigned long cl;
  616. void *fh;
  617. int err;
  618. int tp_created;
  619. if ((n->nlmsg_type != RTM_GETTFILTER) &&
  620. !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
  621. return -EPERM;
  622. replay:
  623. tp_created = 0;
  624. err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
  625. if (err < 0)
  626. return err;
  627. t = nlmsg_data(n);
  628. protocol = TC_H_MIN(t->tcm_info);
  629. prio = TC_H_MAJ(t->tcm_info);
  630. prio_allocate = false;
  631. parent = t->tcm_parent;
  632. cl = 0;
  633. if (prio == 0) {
  634. switch (n->nlmsg_type) {
  635. case RTM_DELTFILTER:
  636. if (protocol || t->tcm_handle || tca[TCA_KIND])
  637. return -ENOENT;
  638. break;
  639. case RTM_NEWTFILTER:
  640. /* If no priority is provided by the user,
  641. * we allocate one.
  642. */
  643. if (n->nlmsg_flags & NLM_F_CREATE) {
  644. prio = TC_H_MAKE(0x80000000U, 0U);
  645. prio_allocate = true;
  646. break;
  647. }
  648. /* fall-through */
  649. default:
  650. return -ENOENT;
  651. }
  652. }
  653. /* Find head of filter chain. */
  654. /* Find link */
  655. dev = __dev_get_by_index(net, t->tcm_ifindex);
  656. if (dev == NULL)
  657. return -ENODEV;
  658. /* Find qdisc */
  659. if (!parent) {
  660. q = dev->qdisc;
  661. parent = q->handle;
  662. } else {
  663. q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
  664. if (q == NULL)
  665. return -EINVAL;
  666. }
  667. /* Is it classful? */
  668. cops = q->ops->cl_ops;
  669. if (!cops)
  670. return -EINVAL;
  671. if (!cops->tcf_block)
  672. return -EOPNOTSUPP;
  673. /* Do we search for filter, attached to class? */
  674. if (TC_H_MIN(parent)) {
  675. cl = cops->find(q, parent);
  676. if (cl == 0)
  677. return -ENOENT;
  678. }
  679. /* And the last stroke */
  680. block = cops->tcf_block(q, cl, extack);
  681. if (!block) {
  682. err = -EINVAL;
  683. goto errout;
  684. }
  685. chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
  686. if (chain_index > TC_ACT_EXT_VAL_MASK) {
  687. err = -EINVAL;
  688. goto errout;
  689. }
  690. chain = tcf_chain_get(block, chain_index,
  691. n->nlmsg_type == RTM_NEWTFILTER);
  692. if (!chain) {
  693. err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
  694. goto errout;
  695. }
  696. if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
  697. tfilter_notify_chain(net, skb, q, parent, n,
  698. chain, RTM_DELTFILTER);
  699. tcf_chain_flush(chain);
  700. err = 0;
  701. goto errout;
  702. }
  703. tp = tcf_chain_tp_find(chain, &chain_info, protocol,
  704. prio, prio_allocate);
  705. if (IS_ERR(tp)) {
  706. err = PTR_ERR(tp);
  707. goto errout;
  708. }
  709. if (tp == NULL) {
  710. /* Proto-tcf does not exist, create new one */
  711. if (tca[TCA_KIND] == NULL || !protocol) {
  712. err = -EINVAL;
  713. goto errout;
  714. }
  715. if (n->nlmsg_type != RTM_NEWTFILTER ||
  716. !(n->nlmsg_flags & NLM_F_CREATE)) {
  717. err = -ENOENT;
  718. goto errout;
  719. }
  720. if (prio_allocate)
  721. prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
  722. tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
  723. protocol, prio, parent, q, chain);
  724. if (IS_ERR(tp)) {
  725. err = PTR_ERR(tp);
  726. goto errout;
  727. }
  728. tp_created = 1;
  729. } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
  730. err = -EINVAL;
  731. goto errout;
  732. }
  733. fh = tp->ops->get(tp, t->tcm_handle);
  734. if (!fh) {
  735. if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
  736. tcf_chain_tp_remove(chain, &chain_info, tp);
  737. tfilter_notify(net, skb, n, tp, q, parent, fh,
  738. RTM_DELTFILTER, false);
  739. tcf_proto_destroy(tp);
  740. err = 0;
  741. goto errout;
  742. }
  743. if (n->nlmsg_type != RTM_NEWTFILTER ||
  744. !(n->nlmsg_flags & NLM_F_CREATE)) {
  745. err = -ENOENT;
  746. goto errout;
  747. }
  748. } else {
  749. bool last;
  750. switch (n->nlmsg_type) {
  751. case RTM_NEWTFILTER:
  752. if (n->nlmsg_flags & NLM_F_EXCL) {
  753. if (tp_created)
  754. tcf_proto_destroy(tp);
  755. err = -EEXIST;
  756. goto errout;
  757. }
  758. break;
  759. case RTM_DELTFILTER:
  760. err = tfilter_del_notify(net, skb, n, tp, q, parent,
  761. fh, false, &last);
  762. if (err)
  763. goto errout;
  764. if (last) {
  765. tcf_chain_tp_remove(chain, &chain_info, tp);
  766. tcf_proto_destroy(tp);
  767. }
  768. goto errout;
  769. case RTM_GETTFILTER:
  770. err = tfilter_notify(net, skb, n, tp, q, parent, fh,
  771. RTM_NEWTFILTER, true);
  772. goto errout;
  773. default:
  774. err = -EINVAL;
  775. goto errout;
  776. }
  777. }
  778. err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
  779. n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
  780. if (err == 0) {
  781. if (tp_created)
  782. tcf_chain_tp_insert(chain, &chain_info, tp);
  783. tfilter_notify(net, skb, n, tp, q, parent, fh,
  784. RTM_NEWTFILTER, false);
  785. } else {
  786. if (tp_created)
  787. tcf_proto_destroy(tp);
  788. }
  789. errout:
  790. if (chain)
  791. tcf_chain_put(chain);
  792. if (err == -EAGAIN)
  793. /* Replay the request. */
  794. goto replay;
  795. return err;
  796. }
  797. struct tcf_dump_args {
  798. struct tcf_walker w;
  799. struct sk_buff *skb;
  800. struct netlink_callback *cb;
  801. struct Qdisc *q;
  802. u32 parent;
  803. };
  804. static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
  805. {
  806. struct tcf_dump_args *a = (void *)arg;
  807. struct net *net = sock_net(a->skb->sk);
  808. return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
  809. n, NETLINK_CB(a->cb->skb).portid,
  810. a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
  811. RTM_NEWTFILTER);
  812. }
  813. static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
  814. struct sk_buff *skb, struct netlink_callback *cb,
  815. long index_start, long *p_index)
  816. {
  817. struct net *net = sock_net(skb->sk);
  818. struct tcmsg *tcm = nlmsg_data(cb->nlh);
  819. struct tcf_dump_args arg;
  820. struct tcf_proto *tp;
  821. for (tp = rtnl_dereference(chain->filter_chain);
  822. tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
  823. if (*p_index < index_start)
  824. continue;
  825. if (TC_H_MAJ(tcm->tcm_info) &&
  826. TC_H_MAJ(tcm->tcm_info) != tp->prio)
  827. continue;
  828. if (TC_H_MIN(tcm->tcm_info) &&
  829. TC_H_MIN(tcm->tcm_info) != tp->protocol)
  830. continue;
  831. if (*p_index > index_start)
  832. memset(&cb->args[1], 0,
  833. sizeof(cb->args) - sizeof(cb->args[0]));
  834. if (cb->args[1] == 0) {
  835. if (tcf_fill_node(net, skb, tp, q, parent, 0,
  836. NETLINK_CB(cb->skb).portid,
  837. cb->nlh->nlmsg_seq, NLM_F_MULTI,
  838. RTM_NEWTFILTER) <= 0)
  839. return false;
  840. cb->args[1] = 1;
  841. }
  842. if (!tp->ops->walk)
  843. continue;
  844. arg.w.fn = tcf_node_dump;
  845. arg.skb = skb;
  846. arg.cb = cb;
  847. arg.q = q;
  848. arg.parent = parent;
  849. arg.w.stop = 0;
  850. arg.w.skip = cb->args[1] - 1;
  851. arg.w.count = 0;
  852. tp->ops->walk(tp, &arg.w);
  853. cb->args[1] = arg.w.count + 1;
  854. if (arg.w.stop)
  855. return false;
  856. }
  857. return true;
  858. }
  859. /* called with RTNL */
  860. static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
  861. {
  862. struct net *net = sock_net(skb->sk);
  863. struct nlattr *tca[TCA_MAX + 1];
  864. struct net_device *dev;
  865. struct Qdisc *q;
  866. struct tcf_block *block;
  867. struct tcf_chain *chain;
  868. struct tcmsg *tcm = nlmsg_data(cb->nlh);
  869. unsigned long cl = 0;
  870. const struct Qdisc_class_ops *cops;
  871. long index_start;
  872. long index;
  873. u32 parent;
  874. int err;
  875. if (nlmsg_len(cb->nlh) < sizeof(*tcm))
  876. return skb->len;
  877. err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
  878. if (err)
  879. return err;
  880. dev = __dev_get_by_index(net, tcm->tcm_ifindex);
  881. if (!dev)
  882. return skb->len;
  883. parent = tcm->tcm_parent;
  884. if (!parent) {
  885. q = dev->qdisc;
  886. parent = q->handle;
  887. } else {
  888. q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
  889. }
  890. if (!q)
  891. goto out;
  892. cops = q->ops->cl_ops;
  893. if (!cops)
  894. goto out;
  895. if (!cops->tcf_block)
  896. goto out;
  897. if (TC_H_MIN(tcm->tcm_parent)) {
  898. cl = cops->find(q, tcm->tcm_parent);
  899. if (cl == 0)
  900. goto out;
  901. }
  902. block = cops->tcf_block(q, cl, NULL);
  903. if (!block)
  904. goto out;
  905. index_start = cb->args[0];
  906. index = 0;
  907. list_for_each_entry(chain, &block->chain_list, list) {
  908. if (tca[TCA_CHAIN] &&
  909. nla_get_u32(tca[TCA_CHAIN]) != chain->index)
  910. continue;
  911. if (!tcf_chain_dump(chain, q, parent, skb, cb,
  912. index_start, &index))
  913. break;
  914. }
  915. cb->args[0] = index;
  916. out:
  917. return skb->len;
  918. }
  919. void tcf_exts_destroy(struct tcf_exts *exts)
  920. {
  921. #ifdef CONFIG_NET_CLS_ACT
  922. LIST_HEAD(actions);
  923. ASSERT_RTNL();
  924. tcf_exts_to_list(exts, &actions);
  925. tcf_action_destroy(&actions, TCA_ACT_UNBIND);
  926. kfree(exts->actions);
  927. exts->nr_actions = 0;
  928. #endif
  929. }
  930. EXPORT_SYMBOL(tcf_exts_destroy);
  931. int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
  932. struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
  933. {
  934. #ifdef CONFIG_NET_CLS_ACT
  935. {
  936. struct tc_action *act;
  937. if (exts->police && tb[exts->police]) {
  938. act = tcf_action_init_1(net, tp, tb[exts->police],
  939. rate_tlv, "police", ovr,
  940. TCA_ACT_BIND);
  941. if (IS_ERR(act))
  942. return PTR_ERR(act);
  943. act->type = exts->type = TCA_OLD_COMPAT;
  944. exts->actions[0] = act;
  945. exts->nr_actions = 1;
  946. } else if (exts->action && tb[exts->action]) {
  947. LIST_HEAD(actions);
  948. int err, i = 0;
  949. err = tcf_action_init(net, tp, tb[exts->action],
  950. rate_tlv, NULL, ovr, TCA_ACT_BIND,
  951. &actions);
  952. if (err)
  953. return err;
  954. list_for_each_entry(act, &actions, list)
  955. exts->actions[i++] = act;
  956. exts->nr_actions = i;
  957. }
  958. exts->net = net;
  959. }
  960. #else
  961. if ((exts->action && tb[exts->action]) ||
  962. (exts->police && tb[exts->police]))
  963. return -EOPNOTSUPP;
  964. #endif
  965. return 0;
  966. }
  967. EXPORT_SYMBOL(tcf_exts_validate);
  968. void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
  969. {
  970. #ifdef CONFIG_NET_CLS_ACT
  971. struct tcf_exts old = *dst;
  972. *dst = *src;
  973. tcf_exts_destroy(&old);
  974. #endif
  975. }
  976. EXPORT_SYMBOL(tcf_exts_change);
  977. #ifdef CONFIG_NET_CLS_ACT
  978. static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
  979. {
  980. if (exts->nr_actions == 0)
  981. return NULL;
  982. else
  983. return exts->actions[0];
  984. }
  985. #endif
  986. int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
  987. {
  988. #ifdef CONFIG_NET_CLS_ACT
  989. struct nlattr *nest;
  990. if (exts->action && tcf_exts_has_actions(exts)) {
  991. /*
  992. * again for backward compatible mode - we want
  993. * to work with both old and new modes of entering
  994. * tc data even if iproute2 was newer - jhs
  995. */
  996. if (exts->type != TCA_OLD_COMPAT) {
  997. LIST_HEAD(actions);
  998. nest = nla_nest_start(skb, exts->action);
  999. if (nest == NULL)
  1000. goto nla_put_failure;
  1001. tcf_exts_to_list(exts, &actions);
  1002. if (tcf_action_dump(skb, &actions, 0, 0) < 0)
  1003. goto nla_put_failure;
  1004. nla_nest_end(skb, nest);
  1005. } else if (exts->police) {
  1006. struct tc_action *act = tcf_exts_first_act(exts);
  1007. nest = nla_nest_start(skb, exts->police);
  1008. if (nest == NULL || !act)
  1009. goto nla_put_failure;
  1010. if (tcf_action_dump_old(skb, act, 0, 0) < 0)
  1011. goto nla_put_failure;
  1012. nla_nest_end(skb, nest);
  1013. }
  1014. }
  1015. return 0;
  1016. nla_put_failure:
  1017. nla_nest_cancel(skb, nest);
  1018. return -1;
  1019. #else
  1020. return 0;
  1021. #endif
  1022. }
  1023. EXPORT_SYMBOL(tcf_exts_dump);
  1024. int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
  1025. {
  1026. #ifdef CONFIG_NET_CLS_ACT
  1027. struct tc_action *a = tcf_exts_first_act(exts);
  1028. if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
  1029. return -1;
  1030. #endif
  1031. return 0;
  1032. }
  1033. EXPORT_SYMBOL(tcf_exts_dump_stats);
  1034. static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
  1035. enum tc_setup_type type,
  1036. void *type_data, bool err_stop)
  1037. {
  1038. int ok_count = 0;
  1039. #ifdef CONFIG_NET_CLS_ACT
  1040. const struct tc_action *a;
  1041. struct net_device *dev;
  1042. int i, ret;
  1043. if (!tcf_exts_has_actions(exts))
  1044. return 0;
  1045. for (i = 0; i < exts->nr_actions; i++) {
  1046. a = exts->actions[i];
  1047. if (!a->ops->get_dev)
  1048. continue;
  1049. dev = a->ops->get_dev(a);
  1050. if (!dev)
  1051. continue;
  1052. ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
  1053. if (ret < 0)
  1054. return ret;
  1055. ok_count += ret;
  1056. }
  1057. #endif
  1058. return ok_count;
  1059. }
  1060. int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
  1061. enum tc_setup_type type, void *type_data, bool err_stop)
  1062. {
  1063. int ok_count;
  1064. int ret;
  1065. ret = tcf_block_cb_call(block, type, type_data, err_stop);
  1066. if (ret < 0)
  1067. return ret;
  1068. ok_count = ret;
  1069. if (!exts)
  1070. return ok_count;
  1071. ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
  1072. if (ret < 0)
  1073. return ret;
  1074. ok_count += ret;
  1075. return ok_count;
  1076. }
  1077. EXPORT_SYMBOL(tc_setup_cb_call);
  1078. static int __init tc_filter_init(void)
  1079. {
  1080. tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
  1081. if (!tc_filter_wq)
  1082. return -ENOMEM;
  1083. rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
  1084. rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
  1085. rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
  1086. tc_dump_tfilter, 0);
  1087. return 0;
  1088. }
  1089. subsys_initcall(tc_filter_init);