fib_frontend.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529
  1. /*
  2. * INET An implementation of the TCP/IP protocol suite for the LINUX
  3. * operating system. INET is implemented using the BSD Socket
  4. * interface as the means of communication with the user level.
  5. *
  6. * IPv4 Forwarding Information Base: FIB frontend.
  7. *
  8. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License
  12. * as published by the Free Software Foundation; either version
  13. * 2 of the License, or (at your option) any later version.
  14. */
  15. #include <linux/module.h>
  16. #include <linux/uaccess.h>
  17. #include <linux/bitops.h>
  18. #include <linux/capability.h>
  19. #include <linux/types.h>
  20. #include <linux/kernel.h>
  21. #include <linux/mm.h>
  22. #include <linux/string.h>
  23. #include <linux/socket.h>
  24. #include <linux/sockios.h>
  25. #include <linux/errno.h>
  26. #include <linux/in.h>
  27. #include <linux/inet.h>
  28. #include <linux/inetdevice.h>
  29. #include <linux/netdevice.h>
  30. #include <linux/if_addr.h>
  31. #include <linux/if_arp.h>
  32. #include <linux/skbuff.h>
  33. #include <linux/cache.h>
  34. #include <linux/init.h>
  35. #include <linux/list.h>
  36. #include <linux/slab.h>
  37. #include <net/ip.h>
  38. #include <net/protocol.h>
  39. #include <net/route.h>
  40. #include <net/tcp.h>
  41. #include <net/sock.h>
  42. #include <net/arp.h>
  43. #include <net/ip_fib.h>
  44. #include <net/rtnetlink.h>
  45. #include <net/xfrm.h>
  46. #include <net/l3mdev.h>
  47. #include <net/lwtunnel.h>
  48. #include <trace/events/fib.h>
  49. #ifndef CONFIG_IP_MULTIPLE_TABLES
  50. static int __net_init fib4_rules_init(struct net *net)
  51. {
  52. struct fib_table *local_table, *main_table;
  53. main_table = fib_trie_table(RT_TABLE_MAIN, NULL);
  54. if (!main_table)
  55. return -ENOMEM;
  56. local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
  57. if (!local_table)
  58. goto fail;
  59. hlist_add_head_rcu(&local_table->tb_hlist,
  60. &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
  61. hlist_add_head_rcu(&main_table->tb_hlist,
  62. &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
  63. return 0;
  64. fail:
  65. fib_free_table(main_table);
  66. return -ENOMEM;
  67. }
  68. static bool fib4_has_custom_rules(struct net *net)
  69. {
  70. return false;
  71. }
  72. #else
  73. struct fib_table *fib_new_table(struct net *net, u32 id)
  74. {
  75. struct fib_table *tb, *alias = NULL;
  76. unsigned int h;
  77. if (id == 0)
  78. id = RT_TABLE_MAIN;
  79. tb = fib_get_table(net, id);
  80. if (tb)
  81. return tb;
  82. if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
  83. alias = fib_new_table(net, RT_TABLE_MAIN);
  84. tb = fib_trie_table(id, alias);
  85. if (!tb)
  86. return NULL;
  87. switch (id) {
  88. case RT_TABLE_MAIN:
  89. rcu_assign_pointer(net->ipv4.fib_main, tb);
  90. break;
  91. case RT_TABLE_DEFAULT:
  92. rcu_assign_pointer(net->ipv4.fib_default, tb);
  93. break;
  94. default:
  95. break;
  96. }
  97. h = id & (FIB_TABLE_HASHSZ - 1);
  98. hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
  99. return tb;
  100. }
  101. EXPORT_SYMBOL_GPL(fib_new_table);
  102. /* caller must hold either rtnl or rcu read lock */
  103. struct fib_table *fib_get_table(struct net *net, u32 id)
  104. {
  105. struct fib_table *tb;
  106. struct hlist_head *head;
  107. unsigned int h;
  108. if (id == 0)
  109. id = RT_TABLE_MAIN;
  110. h = id & (FIB_TABLE_HASHSZ - 1);
  111. head = &net->ipv4.fib_table_hash[h];
  112. hlist_for_each_entry_rcu(tb, head, tb_hlist) {
  113. if (tb->tb_id == id)
  114. return tb;
  115. }
  116. return NULL;
  117. }
  118. static bool fib4_has_custom_rules(struct net *net)
  119. {
  120. return net->ipv4.fib_has_custom_rules;
  121. }
  122. #endif /* CONFIG_IP_MULTIPLE_TABLES */
  123. static void fib_replace_table(struct net *net, struct fib_table *old,
  124. struct fib_table *new)
  125. {
  126. #ifdef CONFIG_IP_MULTIPLE_TABLES
  127. switch (new->tb_id) {
  128. case RT_TABLE_MAIN:
  129. rcu_assign_pointer(net->ipv4.fib_main, new);
  130. break;
  131. case RT_TABLE_DEFAULT:
  132. rcu_assign_pointer(net->ipv4.fib_default, new);
  133. break;
  134. default:
  135. break;
  136. }
  137. #endif
  138. /* replace the old table in the hlist */
  139. hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
  140. }
  141. int fib_unmerge(struct net *net)
  142. {
  143. struct fib_table *old, *new, *main_table;
  144. /* attempt to fetch local table if it has been allocated */
  145. old = fib_get_table(net, RT_TABLE_LOCAL);
  146. if (!old)
  147. return 0;
  148. new = fib_trie_unmerge(old);
  149. if (!new)
  150. return -ENOMEM;
  151. /* table is already unmerged */
  152. if (new == old)
  153. return 0;
  154. /* replace merged table with clean table */
  155. fib_replace_table(net, old, new);
  156. fib_free_table(old);
  157. /* attempt to fetch main table if it has been allocated */
  158. main_table = fib_get_table(net, RT_TABLE_MAIN);
  159. if (!main_table)
  160. return 0;
  161. /* flush local entries from main table */
  162. fib_table_flush_external(main_table);
  163. return 0;
  164. }
  165. static void fib_flush(struct net *net)
  166. {
  167. int flushed = 0;
  168. unsigned int h;
  169. for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
  170. struct hlist_head *head = &net->ipv4.fib_table_hash[h];
  171. struct hlist_node *tmp;
  172. struct fib_table *tb;
  173. hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
  174. flushed += fib_table_flush(net, tb);
  175. }
  176. if (flushed)
  177. rt_cache_flush(net);
  178. }
  179. /*
  180. * Find address type as if only "dev" was present in the system. If
  181. * on_dev is NULL then all interfaces are taken into consideration.
  182. */
  183. static inline unsigned int __inet_dev_addr_type(struct net *net,
  184. const struct net_device *dev,
  185. __be32 addr, u32 tb_id)
  186. {
  187. struct flowi4 fl4 = { .daddr = addr };
  188. struct fib_result res;
  189. unsigned int ret = RTN_BROADCAST;
  190. struct fib_table *table;
  191. if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
  192. return RTN_BROADCAST;
  193. if (ipv4_is_multicast(addr))
  194. return RTN_MULTICAST;
  195. rcu_read_lock();
  196. table = fib_get_table(net, tb_id);
  197. if (table) {
  198. ret = RTN_UNICAST;
  199. if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
  200. if (!dev || dev == res.fi->fib_dev)
  201. ret = res.type;
  202. }
  203. }
  204. rcu_read_unlock();
  205. return ret;
  206. }
  207. unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
  208. {
  209. return __inet_dev_addr_type(net, NULL, addr, tb_id);
  210. }
  211. EXPORT_SYMBOL(inet_addr_type_table);
  212. unsigned int inet_addr_type(struct net *net, __be32 addr)
  213. {
  214. return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
  215. }
  216. EXPORT_SYMBOL(inet_addr_type);
  217. unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
  218. __be32 addr)
  219. {
  220. u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
  221. return __inet_dev_addr_type(net, dev, addr, rt_table);
  222. }
  223. EXPORT_SYMBOL(inet_dev_addr_type);
  224. /* inet_addr_type with dev == NULL but using the table from a dev
  225. * if one is associated
  226. */
  227. unsigned int inet_addr_type_dev_table(struct net *net,
  228. const struct net_device *dev,
  229. __be32 addr)
  230. {
  231. u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
  232. return __inet_dev_addr_type(net, NULL, addr, rt_table);
  233. }
  234. EXPORT_SYMBOL(inet_addr_type_dev_table);
  235. __be32 fib_compute_spec_dst(struct sk_buff *skb)
  236. {
  237. struct net_device *dev = skb->dev;
  238. struct in_device *in_dev;
  239. struct fib_result res;
  240. struct rtable *rt;
  241. struct net *net;
  242. int scope;
  243. rt = skb_rtable(skb);
  244. if ((rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL)) ==
  245. RTCF_LOCAL)
  246. return ip_hdr(skb)->daddr;
  247. in_dev = __in_dev_get_rcu(dev);
  248. net = dev_net(dev);
  249. scope = RT_SCOPE_UNIVERSE;
  250. if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
  251. bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
  252. struct flowi4 fl4 = {
  253. .flowi4_iif = LOOPBACK_IFINDEX,
  254. .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
  255. .daddr = ip_hdr(skb)->saddr,
  256. .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
  257. .flowi4_scope = scope,
  258. .flowi4_mark = vmark ? skb->mark : 0,
  259. };
  260. if (!fib_lookup(net, &fl4, &res, 0))
  261. return FIB_RES_PREFSRC(net, res);
  262. } else {
  263. scope = RT_SCOPE_LINK;
  264. }
  265. return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
  266. }
  267. bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
  268. {
  269. bool dev_match = false;
  270. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  271. int ret;
  272. for (ret = 0; ret < fi->fib_nhs; ret++) {
  273. struct fib_nh *nh = &fi->fib_nh[ret];
  274. if (nh->nh_dev == dev) {
  275. dev_match = true;
  276. break;
  277. } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
  278. dev_match = true;
  279. break;
  280. }
  281. }
  282. #else
  283. if (fi->fib_nh[0].nh_dev == dev)
  284. dev_match = true;
  285. #endif
  286. return dev_match;
  287. }
  288. EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
  289. /* Given (packet source, input interface) and optional (dst, oif, tos):
  290. * - (main) check, that source is valid i.e. not broadcast or our local
  291. * address.
  292. * - figure out what "logical" interface this packet arrived
  293. * and calculate "specific destination" address.
  294. * - check, that packet arrived from expected physical interface.
  295. * called with rcu_read_lock()
  296. */
  297. static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
  298. u8 tos, int oif, struct net_device *dev,
  299. int rpf, struct in_device *idev, u32 *itag)
  300. {
  301. struct net *net = dev_net(dev);
  302. struct flow_keys flkeys;
  303. int ret, no_addr;
  304. struct fib_result res;
  305. struct flowi4 fl4;
  306. bool dev_match;
  307. fl4.flowi4_oif = 0;
  308. fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
  309. if (!fl4.flowi4_iif)
  310. fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
  311. fl4.daddr = src;
  312. fl4.saddr = dst;
  313. fl4.flowi4_tos = tos;
  314. fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
  315. fl4.flowi4_tun_key.tun_id = 0;
  316. fl4.flowi4_flags = 0;
  317. fl4.flowi4_uid = sock_net_uid(net, NULL);
  318. no_addr = idev->ifa_list == NULL;
  319. fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
  320. if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) {
  321. fl4.flowi4_proto = 0;
  322. fl4.fl4_sport = 0;
  323. fl4.fl4_dport = 0;
  324. }
  325. if (fib_lookup(net, &fl4, &res, 0))
  326. goto last_resort;
  327. if (res.type != RTN_UNICAST &&
  328. (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
  329. goto e_inval;
  330. fib_combine_itag(itag, &res);
  331. dev_match = fib_info_nh_uses_dev(res.fi, dev);
  332. if (dev_match) {
  333. ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
  334. return ret;
  335. }
  336. if (no_addr)
  337. goto last_resort;
  338. if (rpf == 1)
  339. goto e_rpf;
  340. fl4.flowi4_oif = dev->ifindex;
  341. ret = 0;
  342. if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
  343. if (res.type == RTN_UNICAST)
  344. ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
  345. }
  346. return ret;
  347. last_resort:
  348. if (rpf)
  349. goto e_rpf;
  350. *itag = 0;
  351. return 0;
  352. e_inval:
  353. return -EINVAL;
  354. e_rpf:
  355. return -EXDEV;
  356. }
  357. /* Ignore rp_filter for packets protected by IPsec. */
  358. int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
  359. u8 tos, int oif, struct net_device *dev,
  360. struct in_device *idev, u32 *itag)
  361. {
  362. int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
  363. struct net *net = dev_net(dev);
  364. if (!r && !fib_num_tclassid_users(net) &&
  365. (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
  366. if (IN_DEV_ACCEPT_LOCAL(idev))
  367. goto ok;
  368. /* with custom local routes in place, checking local addresses
  369. * only will be too optimistic, with custom rules, checking
  370. * local addresses only can be too strict, e.g. due to vrf
  371. */
  372. if (net->ipv4.fib_has_custom_local_routes ||
  373. fib4_has_custom_rules(net))
  374. goto full_check;
  375. if (inet_lookup_ifaddr_rcu(net, src))
  376. return -EINVAL;
  377. ok:
  378. *itag = 0;
  379. return 0;
  380. }
  381. full_check:
  382. return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
  383. }
  384. static inline __be32 sk_extract_addr(struct sockaddr *addr)
  385. {
  386. return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
  387. }
  388. static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
  389. {
  390. struct nlattr *nla;
  391. nla = (struct nlattr *) ((char *) mx + len);
  392. nla->nla_type = type;
  393. nla->nla_len = nla_attr_size(4);
  394. *(u32 *) nla_data(nla) = value;
  395. return len + nla_total_size(4);
  396. }
  397. static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
  398. struct fib_config *cfg)
  399. {
  400. __be32 addr;
  401. int plen;
  402. memset(cfg, 0, sizeof(*cfg));
  403. cfg->fc_nlinfo.nl_net = net;
  404. if (rt->rt_dst.sa_family != AF_INET)
  405. return -EAFNOSUPPORT;
  406. /*
  407. * Check mask for validity:
  408. * a) it must be contiguous.
  409. * b) destination must have all host bits clear.
  410. * c) if application forgot to set correct family (AF_INET),
  411. * reject request unless it is absolutely clear i.e.
  412. * both family and mask are zero.
  413. */
  414. plen = 32;
  415. addr = sk_extract_addr(&rt->rt_dst);
  416. if (!(rt->rt_flags & RTF_HOST)) {
  417. __be32 mask = sk_extract_addr(&rt->rt_genmask);
  418. if (rt->rt_genmask.sa_family != AF_INET) {
  419. if (mask || rt->rt_genmask.sa_family)
  420. return -EAFNOSUPPORT;
  421. }
  422. if (bad_mask(mask, addr))
  423. return -EINVAL;
  424. plen = inet_mask_len(mask);
  425. }
  426. cfg->fc_dst_len = plen;
  427. cfg->fc_dst = addr;
  428. if (cmd != SIOCDELRT) {
  429. cfg->fc_nlflags = NLM_F_CREATE;
  430. cfg->fc_protocol = RTPROT_BOOT;
  431. }
  432. if (rt->rt_metric)
  433. cfg->fc_priority = rt->rt_metric - 1;
  434. if (rt->rt_flags & RTF_REJECT) {
  435. cfg->fc_scope = RT_SCOPE_HOST;
  436. cfg->fc_type = RTN_UNREACHABLE;
  437. return 0;
  438. }
  439. cfg->fc_scope = RT_SCOPE_NOWHERE;
  440. cfg->fc_type = RTN_UNICAST;
  441. if (rt->rt_dev) {
  442. char *colon;
  443. struct net_device *dev;
  444. char devname[IFNAMSIZ];
  445. if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
  446. return -EFAULT;
  447. devname[IFNAMSIZ-1] = 0;
  448. colon = strchr(devname, ':');
  449. if (colon)
  450. *colon = 0;
  451. dev = __dev_get_by_name(net, devname);
  452. if (!dev)
  453. return -ENODEV;
  454. cfg->fc_oif = dev->ifindex;
  455. cfg->fc_table = l3mdev_fib_table(dev);
  456. if (colon) {
  457. struct in_ifaddr *ifa;
  458. struct in_device *in_dev = __in_dev_get_rtnl(dev);
  459. if (!in_dev)
  460. return -ENODEV;
  461. *colon = ':';
  462. for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
  463. if (strcmp(ifa->ifa_label, devname) == 0)
  464. break;
  465. if (!ifa)
  466. return -ENODEV;
  467. cfg->fc_prefsrc = ifa->ifa_local;
  468. }
  469. }
  470. addr = sk_extract_addr(&rt->rt_gateway);
  471. if (rt->rt_gateway.sa_family == AF_INET && addr) {
  472. unsigned int addr_type;
  473. cfg->fc_gw = addr;
  474. addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
  475. if (rt->rt_flags & RTF_GATEWAY &&
  476. addr_type == RTN_UNICAST)
  477. cfg->fc_scope = RT_SCOPE_UNIVERSE;
  478. }
  479. if (cmd == SIOCDELRT)
  480. return 0;
  481. if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
  482. return -EINVAL;
  483. if (cfg->fc_scope == RT_SCOPE_NOWHERE)
  484. cfg->fc_scope = RT_SCOPE_LINK;
  485. if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
  486. struct nlattr *mx;
  487. int len = 0;
  488. mx = kcalloc(3, nla_total_size(4), GFP_KERNEL);
  489. if (!mx)
  490. return -ENOMEM;
  491. if (rt->rt_flags & RTF_MTU)
  492. len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
  493. if (rt->rt_flags & RTF_WINDOW)
  494. len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
  495. if (rt->rt_flags & RTF_IRTT)
  496. len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
  497. cfg->fc_mx = mx;
  498. cfg->fc_mx_len = len;
  499. }
  500. return 0;
  501. }
  502. /*
  503. * Handle IP routing ioctl calls.
  504. * These are used to manipulate the routing tables
  505. */
  506. int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
  507. {
  508. struct fib_config cfg;
  509. int err;
  510. switch (cmd) {
  511. case SIOCADDRT: /* Add a route */
  512. case SIOCDELRT: /* Delete a route */
  513. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  514. return -EPERM;
  515. rtnl_lock();
  516. err = rtentry_to_fib_config(net, cmd, rt, &cfg);
  517. if (err == 0) {
  518. struct fib_table *tb;
  519. if (cmd == SIOCDELRT) {
  520. tb = fib_get_table(net, cfg.fc_table);
  521. if (tb)
  522. err = fib_table_delete(net, tb, &cfg,
  523. NULL);
  524. else
  525. err = -ESRCH;
  526. } else {
  527. tb = fib_new_table(net, cfg.fc_table);
  528. if (tb)
  529. err = fib_table_insert(net, tb,
  530. &cfg, NULL);
  531. else
  532. err = -ENOBUFS;
  533. }
  534. /* allocated by rtentry_to_fib_config() */
  535. kfree(cfg.fc_mx);
  536. }
  537. rtnl_unlock();
  538. return err;
  539. }
  540. return -EINVAL;
  541. }
  542. const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
  543. [RTA_DST] = { .type = NLA_U32 },
  544. [RTA_SRC] = { .type = NLA_U32 },
  545. [RTA_IIF] = { .type = NLA_U32 },
  546. [RTA_OIF] = { .type = NLA_U32 },
  547. [RTA_GATEWAY] = { .type = NLA_U32 },
  548. [RTA_PRIORITY] = { .type = NLA_U32 },
  549. [RTA_PREFSRC] = { .type = NLA_U32 },
  550. [RTA_METRICS] = { .type = NLA_NESTED },
  551. [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
  552. [RTA_FLOW] = { .type = NLA_U32 },
  553. [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
  554. [RTA_ENCAP] = { .type = NLA_NESTED },
  555. [RTA_UID] = { .type = NLA_U32 },
  556. [RTA_MARK] = { .type = NLA_U32 },
  557. [RTA_TABLE] = { .type = NLA_U32 },
  558. [RTA_IP_PROTO] = { .type = NLA_U8 },
  559. [RTA_SPORT] = { .type = NLA_U16 },
  560. [RTA_DPORT] = { .type = NLA_U16 },
  561. };
  562. static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
  563. struct nlmsghdr *nlh, struct fib_config *cfg,
  564. struct netlink_ext_ack *extack)
  565. {
  566. struct nlattr *attr;
  567. int err, remaining;
  568. struct rtmsg *rtm;
  569. err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
  570. extack);
  571. if (err < 0)
  572. goto errout;
  573. memset(cfg, 0, sizeof(*cfg));
  574. rtm = nlmsg_data(nlh);
  575. cfg->fc_dst_len = rtm->rtm_dst_len;
  576. cfg->fc_tos = rtm->rtm_tos;
  577. cfg->fc_table = rtm->rtm_table;
  578. cfg->fc_protocol = rtm->rtm_protocol;
  579. cfg->fc_scope = rtm->rtm_scope;
  580. cfg->fc_type = rtm->rtm_type;
  581. cfg->fc_flags = rtm->rtm_flags;
  582. cfg->fc_nlflags = nlh->nlmsg_flags;
  583. cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
  584. cfg->fc_nlinfo.nlh = nlh;
  585. cfg->fc_nlinfo.nl_net = net;
  586. if (cfg->fc_type > RTN_MAX) {
  587. NL_SET_ERR_MSG(extack, "Invalid route type");
  588. err = -EINVAL;
  589. goto errout;
  590. }
  591. nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
  592. switch (nla_type(attr)) {
  593. case RTA_DST:
  594. cfg->fc_dst = nla_get_be32(attr);
  595. break;
  596. case RTA_OIF:
  597. cfg->fc_oif = nla_get_u32(attr);
  598. break;
  599. case RTA_GATEWAY:
  600. cfg->fc_gw = nla_get_be32(attr);
  601. break;
  602. case RTA_PRIORITY:
  603. cfg->fc_priority = nla_get_u32(attr);
  604. break;
  605. case RTA_PREFSRC:
  606. cfg->fc_prefsrc = nla_get_be32(attr);
  607. break;
  608. case RTA_METRICS:
  609. cfg->fc_mx = nla_data(attr);
  610. cfg->fc_mx_len = nla_len(attr);
  611. break;
  612. case RTA_MULTIPATH:
  613. err = lwtunnel_valid_encap_type_attr(nla_data(attr),
  614. nla_len(attr),
  615. extack);
  616. if (err < 0)
  617. goto errout;
  618. cfg->fc_mp = nla_data(attr);
  619. cfg->fc_mp_len = nla_len(attr);
  620. break;
  621. case RTA_FLOW:
  622. cfg->fc_flow = nla_get_u32(attr);
  623. break;
  624. case RTA_TABLE:
  625. cfg->fc_table = nla_get_u32(attr);
  626. break;
  627. case RTA_ENCAP:
  628. cfg->fc_encap = attr;
  629. break;
  630. case RTA_ENCAP_TYPE:
  631. cfg->fc_encap_type = nla_get_u16(attr);
  632. err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
  633. extack);
  634. if (err < 0)
  635. goto errout;
  636. break;
  637. }
  638. }
  639. return 0;
  640. errout:
  641. return err;
  642. }
  643. static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  644. struct netlink_ext_ack *extack)
  645. {
  646. struct net *net = sock_net(skb->sk);
  647. struct fib_config cfg;
  648. struct fib_table *tb;
  649. int err;
  650. err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
  651. if (err < 0)
  652. goto errout;
  653. tb = fib_get_table(net, cfg.fc_table);
  654. if (!tb) {
  655. NL_SET_ERR_MSG(extack, "FIB table does not exist");
  656. err = -ESRCH;
  657. goto errout;
  658. }
  659. err = fib_table_delete(net, tb, &cfg, extack);
  660. errout:
  661. return err;
  662. }
  663. static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  664. struct netlink_ext_ack *extack)
  665. {
  666. struct net *net = sock_net(skb->sk);
  667. struct fib_config cfg;
  668. struct fib_table *tb;
  669. int err;
  670. err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
  671. if (err < 0)
  672. goto errout;
  673. tb = fib_new_table(net, cfg.fc_table);
  674. if (!tb) {
  675. err = -ENOBUFS;
  676. goto errout;
  677. }
  678. err = fib_table_insert(net, tb, &cfg, extack);
  679. if (!err && cfg.fc_type == RTN_LOCAL)
  680. net->ipv4.fib_has_custom_local_routes = true;
  681. errout:
  682. return err;
  683. }
  684. int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
  685. struct fib_dump_filter *filter,
  686. struct netlink_callback *cb)
  687. {
  688. struct netlink_ext_ack *extack = cb->extack;
  689. struct nlattr *tb[RTA_MAX + 1];
  690. struct rtmsg *rtm;
  691. int err, i;
  692. ASSERT_RTNL();
  693. if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
  694. NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
  695. return -EINVAL;
  696. }
  697. rtm = nlmsg_data(nlh);
  698. if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
  699. rtm->rtm_scope) {
  700. NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
  701. return -EINVAL;
  702. }
  703. if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
  704. NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
  705. return -EINVAL;
  706. }
  707. filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
  708. filter->flags = rtm->rtm_flags;
  709. filter->protocol = rtm->rtm_protocol;
  710. filter->rt_type = rtm->rtm_type;
  711. filter->table_id = rtm->rtm_table;
  712. err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
  713. rtm_ipv4_policy, extack);
  714. if (err < 0)
  715. return err;
  716. for (i = 0; i <= RTA_MAX; ++i) {
  717. int ifindex;
  718. if (!tb[i])
  719. continue;
  720. switch (i) {
  721. case RTA_TABLE:
  722. filter->table_id = nla_get_u32(tb[i]);
  723. break;
  724. case RTA_OIF:
  725. ifindex = nla_get_u32(tb[i]);
  726. filter->dev = __dev_get_by_index(net, ifindex);
  727. if (!filter->dev)
  728. return -ENODEV;
  729. break;
  730. default:
  731. NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
  732. return -EINVAL;
  733. }
  734. }
  735. if (filter->flags || filter->protocol || filter->rt_type ||
  736. filter->table_id || filter->dev) {
  737. filter->filter_set = 1;
  738. cb->answer_flags = NLM_F_DUMP_FILTERED;
  739. }
  740. return 0;
  741. }
  742. EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
  743. static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
  744. {
  745. const struct nlmsghdr *nlh = cb->nlh;
  746. struct net *net = sock_net(skb->sk);
  747. struct fib_dump_filter filter = {};
  748. unsigned int h, s_h;
  749. unsigned int e = 0, s_e;
  750. struct fib_table *tb;
  751. struct hlist_head *head;
  752. int dumped = 0, err;
  753. if (cb->strict_check) {
  754. err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
  755. if (err < 0)
  756. return err;
  757. } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
  758. struct rtmsg *rtm = nlmsg_data(nlh);
  759. filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
  760. }
  761. /* fib entries are never clones and ipv4 does not use prefix flag */
  762. if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED))
  763. return skb->len;
  764. if (filter.table_id) {
  765. tb = fib_get_table(net, filter.table_id);
  766. if (!tb) {
  767. if (filter.dump_all_families)
  768. return skb->len;
  769. NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
  770. return -ENOENT;
  771. }
  772. err = fib_table_dump(tb, skb, cb, &filter);
  773. return skb->len ? : err;
  774. }
  775. s_h = cb->args[0];
  776. s_e = cb->args[1];
  777. rcu_read_lock();
  778. for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
  779. e = 0;
  780. head = &net->ipv4.fib_table_hash[h];
  781. hlist_for_each_entry_rcu(tb, head, tb_hlist) {
  782. if (e < s_e)
  783. goto next;
  784. if (dumped)
  785. memset(&cb->args[2], 0, sizeof(cb->args) -
  786. 2 * sizeof(cb->args[0]));
  787. err = fib_table_dump(tb, skb, cb, &filter);
  788. if (err < 0) {
  789. if (likely(skb->len))
  790. goto out;
  791. goto out_err;
  792. }
  793. dumped = 1;
  794. next:
  795. e++;
  796. }
  797. }
  798. out:
  799. err = skb->len;
  800. out_err:
  801. rcu_read_unlock();
  802. cb->args[1] = e;
  803. cb->args[0] = h;
  804. return err;
  805. }
  806. /* Prepare and feed intra-kernel routing request.
  807. * Really, it should be netlink message, but :-( netlink
  808. * can be not configured, so that we feed it directly
  809. * to fib engine. It is legal, because all events occur
  810. * only when netlink is already locked.
  811. */
  812. static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
  813. struct in_ifaddr *ifa, u32 rt_priority)
  814. {
  815. struct net *net = dev_net(ifa->ifa_dev->dev);
  816. u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
  817. struct fib_table *tb;
  818. struct fib_config cfg = {
  819. .fc_protocol = RTPROT_KERNEL,
  820. .fc_type = type,
  821. .fc_dst = dst,
  822. .fc_dst_len = dst_len,
  823. .fc_priority = rt_priority,
  824. .fc_prefsrc = ifa->ifa_local,
  825. .fc_oif = ifa->ifa_dev->dev->ifindex,
  826. .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
  827. .fc_nlinfo = {
  828. .nl_net = net,
  829. },
  830. };
  831. if (!tb_id)
  832. tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
  833. tb = fib_new_table(net, tb_id);
  834. if (!tb)
  835. return;
  836. cfg.fc_table = tb->tb_id;
  837. if (type != RTN_LOCAL)
  838. cfg.fc_scope = RT_SCOPE_LINK;
  839. else
  840. cfg.fc_scope = RT_SCOPE_HOST;
  841. if (cmd == RTM_NEWROUTE)
  842. fib_table_insert(net, tb, &cfg, NULL);
  843. else
  844. fib_table_delete(net, tb, &cfg, NULL);
  845. }
  846. void fib_add_ifaddr(struct in_ifaddr *ifa)
  847. {
  848. struct in_device *in_dev = ifa->ifa_dev;
  849. struct net_device *dev = in_dev->dev;
  850. struct in_ifaddr *prim = ifa;
  851. __be32 mask = ifa->ifa_mask;
  852. __be32 addr = ifa->ifa_local;
  853. __be32 prefix = ifa->ifa_address & mask;
  854. if (ifa->ifa_flags & IFA_F_SECONDARY) {
  855. prim = inet_ifa_byprefix(in_dev, prefix, mask);
  856. if (!prim) {
  857. pr_warn("%s: bug: prim == NULL\n", __func__);
  858. return;
  859. }
  860. }
  861. fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);
  862. if (!(dev->flags & IFF_UP))
  863. return;
  864. /* Add broadcast address, if it is explicitly assigned. */
  865. if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
  866. fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
  867. prim, 0);
  868. if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
  869. (prefix != addr || ifa->ifa_prefixlen < 32)) {
  870. if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
  871. fib_magic(RTM_NEWROUTE,
  872. dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
  873. prefix, ifa->ifa_prefixlen, prim,
  874. ifa->ifa_rt_priority);
  875. /* Add network specific broadcasts, when it takes a sense */
  876. if (ifa->ifa_prefixlen < 31) {
  877. fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
  878. prim, 0);
  879. fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
  880. 32, prim, 0);
  881. }
  882. }
  883. }
  884. void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
  885. {
  886. __be32 prefix = ifa->ifa_address & ifa->ifa_mask;
  887. struct in_device *in_dev = ifa->ifa_dev;
  888. struct net_device *dev = in_dev->dev;
  889. if (!(dev->flags & IFF_UP) ||
  890. ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
  891. ipv4_is_zeronet(prefix) ||
  892. prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
  893. return;
  894. /* add the new */
  895. fib_magic(RTM_NEWROUTE,
  896. dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
  897. prefix, ifa->ifa_prefixlen, ifa, new_metric);
  898. /* delete the old */
  899. fib_magic(RTM_DELROUTE,
  900. dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
  901. prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
  902. }
  903. /* Delete primary or secondary address.
  904. * Optionally, on secondary address promotion consider the addresses
  905. * from subnet iprim as deleted, even if they are in device list.
  906. * In this case the secondary ifa can be in device list.
  907. */
  908. void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
  909. {
  910. struct in_device *in_dev = ifa->ifa_dev;
  911. struct net_device *dev = in_dev->dev;
  912. struct in_ifaddr *ifa1;
  913. struct in_ifaddr *prim = ifa, *prim1 = NULL;
  914. __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
  915. __be32 any = ifa->ifa_address & ifa->ifa_mask;
  916. #define LOCAL_OK 1
  917. #define BRD_OK 2
  918. #define BRD0_OK 4
  919. #define BRD1_OK 8
  920. unsigned int ok = 0;
  921. int subnet = 0; /* Primary network */
  922. int gone = 1; /* Address is missing */
  923. int same_prefsrc = 0; /* Another primary with same IP */
  924. if (ifa->ifa_flags & IFA_F_SECONDARY) {
  925. prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
  926. if (!prim) {
  927. /* if the device has been deleted, we don't perform
  928. * address promotion
  929. */
  930. if (!in_dev->dead)
  931. pr_warn("%s: bug: prim == NULL\n", __func__);
  932. return;
  933. }
  934. if (iprim && iprim != prim) {
  935. pr_warn("%s: bug: iprim != prim\n", __func__);
  936. return;
  937. }
  938. } else if (!ipv4_is_zeronet(any) &&
  939. (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
  940. if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
  941. fib_magic(RTM_DELROUTE,
  942. dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
  943. any, ifa->ifa_prefixlen, prim, 0);
  944. subnet = 1;
  945. }
  946. if (in_dev->dead)
  947. goto no_promotions;
  948. /* Deletion is more complicated than add.
  949. * We should take care of not to delete too much :-)
  950. *
  951. * Scan address list to be sure that addresses are really gone.
  952. */
  953. for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
  954. if (ifa1 == ifa) {
  955. /* promotion, keep the IP */
  956. gone = 0;
  957. continue;
  958. }
  959. /* Ignore IFAs from our subnet */
  960. if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
  961. inet_ifa_match(ifa1->ifa_address, iprim))
  962. continue;
  963. /* Ignore ifa1 if it uses different primary IP (prefsrc) */
  964. if (ifa1->ifa_flags & IFA_F_SECONDARY) {
  965. /* Another address from our subnet? */
  966. if (ifa1->ifa_mask == prim->ifa_mask &&
  967. inet_ifa_match(ifa1->ifa_address, prim))
  968. prim1 = prim;
  969. else {
  970. /* We reached the secondaries, so
  971. * same_prefsrc should be determined.
  972. */
  973. if (!same_prefsrc)
  974. continue;
  975. /* Search new prim1 if ifa1 is not
  976. * using the current prim1
  977. */
  978. if (!prim1 ||
  979. ifa1->ifa_mask != prim1->ifa_mask ||
  980. !inet_ifa_match(ifa1->ifa_address, prim1))
  981. prim1 = inet_ifa_byprefix(in_dev,
  982. ifa1->ifa_address,
  983. ifa1->ifa_mask);
  984. if (!prim1)
  985. continue;
  986. if (prim1->ifa_local != prim->ifa_local)
  987. continue;
  988. }
  989. } else {
  990. if (prim->ifa_local != ifa1->ifa_local)
  991. continue;
  992. prim1 = ifa1;
  993. if (prim != prim1)
  994. same_prefsrc = 1;
  995. }
  996. if (ifa->ifa_local == ifa1->ifa_local)
  997. ok |= LOCAL_OK;
  998. if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
  999. ok |= BRD_OK;
  1000. if (brd == ifa1->ifa_broadcast)
  1001. ok |= BRD1_OK;
  1002. if (any == ifa1->ifa_broadcast)
  1003. ok |= BRD0_OK;
  1004. /* primary has network specific broadcasts */
  1005. if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
  1006. __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
  1007. __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
  1008. if (!ipv4_is_zeronet(any1)) {
  1009. if (ifa->ifa_broadcast == brd1 ||
  1010. ifa->ifa_broadcast == any1)
  1011. ok |= BRD_OK;
  1012. if (brd == brd1 || brd == any1)
  1013. ok |= BRD1_OK;
  1014. if (any == brd1 || any == any1)
  1015. ok |= BRD0_OK;
  1016. }
  1017. }
  1018. }
  1019. no_promotions:
  1020. if (!(ok & BRD_OK))
  1021. fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
  1022. prim, 0);
  1023. if (subnet && ifa->ifa_prefixlen < 31) {
  1024. if (!(ok & BRD1_OK))
  1025. fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
  1026. prim, 0);
  1027. if (!(ok & BRD0_OK))
  1028. fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
  1029. prim, 0);
  1030. }
  1031. if (!(ok & LOCAL_OK)) {
  1032. unsigned int addr_type;
  1033. fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);
  1034. /* Check, that this local address finally disappeared. */
  1035. addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
  1036. ifa->ifa_local);
  1037. if (gone && addr_type != RTN_LOCAL) {
  1038. /* And the last, but not the least thing.
  1039. * We must flush stray FIB entries.
  1040. *
  1041. * First of all, we scan fib_info list searching
  1042. * for stray nexthop entries, then ignite fib_flush.
  1043. */
  1044. if (fib_sync_down_addr(dev, ifa->ifa_local))
  1045. fib_flush(dev_net(dev));
  1046. }
  1047. }
  1048. #undef LOCAL_OK
  1049. #undef BRD_OK
  1050. #undef BRD0_OK
  1051. #undef BRD1_OK
  1052. }
  1053. static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
  1054. {
  1055. struct fib_result res;
  1056. struct flowi4 fl4 = {
  1057. .flowi4_mark = frn->fl_mark,
  1058. .daddr = frn->fl_addr,
  1059. .flowi4_tos = frn->fl_tos,
  1060. .flowi4_scope = frn->fl_scope,
  1061. };
  1062. struct fib_table *tb;
  1063. rcu_read_lock();
  1064. tb = fib_get_table(net, frn->tb_id_in);
  1065. frn->err = -ENOENT;
  1066. if (tb) {
  1067. local_bh_disable();
  1068. frn->tb_id = tb->tb_id;
  1069. frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
  1070. if (!frn->err) {
  1071. frn->prefixlen = res.prefixlen;
  1072. frn->nh_sel = res.nh_sel;
  1073. frn->type = res.type;
  1074. frn->scope = res.scope;
  1075. }
  1076. local_bh_enable();
  1077. }
  1078. rcu_read_unlock();
  1079. }
  1080. static void nl_fib_input(struct sk_buff *skb)
  1081. {
  1082. struct net *net;
  1083. struct fib_result_nl *frn;
  1084. struct nlmsghdr *nlh;
  1085. u32 portid;
  1086. net = sock_net(skb->sk);
  1087. nlh = nlmsg_hdr(skb);
  1088. if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
  1089. skb->len < nlh->nlmsg_len ||
  1090. nlmsg_len(nlh) < sizeof(*frn))
  1091. return;
  1092. skb = netlink_skb_clone(skb, GFP_KERNEL);
  1093. if (!skb)
  1094. return;
  1095. nlh = nlmsg_hdr(skb);
  1096. frn = (struct fib_result_nl *) nlmsg_data(nlh);
  1097. nl_fib_lookup(net, frn);
  1098. portid = NETLINK_CB(skb).portid; /* netlink portid */
  1099. NETLINK_CB(skb).portid = 0; /* from kernel */
  1100. NETLINK_CB(skb).dst_group = 0; /* unicast */
  1101. netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
  1102. }
  1103. static int __net_init nl_fib_lookup_init(struct net *net)
  1104. {
  1105. struct sock *sk;
  1106. struct netlink_kernel_cfg cfg = {
  1107. .input = nl_fib_input,
  1108. };
  1109. sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg);
  1110. if (!sk)
  1111. return -EAFNOSUPPORT;
  1112. net->ipv4.fibnl = sk;
  1113. return 0;
  1114. }
  1115. static void nl_fib_lookup_exit(struct net *net)
  1116. {
  1117. netlink_kernel_release(net->ipv4.fibnl);
  1118. net->ipv4.fibnl = NULL;
  1119. }
  1120. static void fib_disable_ip(struct net_device *dev, unsigned long event,
  1121. bool force)
  1122. {
  1123. if (fib_sync_down_dev(dev, event, force))
  1124. fib_flush(dev_net(dev));
  1125. else
  1126. rt_cache_flush(dev_net(dev));
  1127. arp_ifdown(dev);
  1128. }
  1129. static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
  1130. {
  1131. struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
  1132. struct net_device *dev = ifa->ifa_dev->dev;
  1133. struct net *net = dev_net(dev);
  1134. switch (event) {
  1135. case NETDEV_UP:
  1136. fib_add_ifaddr(ifa);
  1137. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  1138. fib_sync_up(dev, RTNH_F_DEAD);
  1139. #endif
  1140. atomic_inc(&net->ipv4.dev_addr_genid);
  1141. rt_cache_flush(dev_net(dev));
  1142. break;
  1143. case NETDEV_DOWN:
  1144. fib_del_ifaddr(ifa, NULL);
  1145. atomic_inc(&net->ipv4.dev_addr_genid);
  1146. if (!ifa->ifa_dev->ifa_list) {
  1147. /* Last address was deleted from this interface.
  1148. * Disable IP.
  1149. */
  1150. fib_disable_ip(dev, event, true);
  1151. } else {
  1152. rt_cache_flush(dev_net(dev));
  1153. }
  1154. break;
  1155. }
  1156. return NOTIFY_DONE;
  1157. }
  1158. static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
  1159. {
  1160. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1161. struct netdev_notifier_changeupper_info *upper_info = ptr;
  1162. struct netdev_notifier_info_ext *info_ext = ptr;
  1163. struct in_device *in_dev;
  1164. struct net *net = dev_net(dev);
  1165. unsigned int flags;
  1166. if (event == NETDEV_UNREGISTER) {
  1167. fib_disable_ip(dev, event, true);
  1168. rt_flush_dev(dev);
  1169. return NOTIFY_DONE;
  1170. }
  1171. in_dev = __in_dev_get_rtnl(dev);
  1172. if (!in_dev)
  1173. return NOTIFY_DONE;
  1174. switch (event) {
  1175. case NETDEV_UP:
  1176. for_ifa(in_dev) {
  1177. fib_add_ifaddr(ifa);
  1178. } endfor_ifa(in_dev);
  1179. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  1180. fib_sync_up(dev, RTNH_F_DEAD);
  1181. #endif
  1182. atomic_inc(&net->ipv4.dev_addr_genid);
  1183. rt_cache_flush(net);
  1184. break;
  1185. case NETDEV_DOWN:
  1186. fib_disable_ip(dev, event, false);
  1187. break;
  1188. case NETDEV_CHANGE:
  1189. flags = dev_get_flags(dev);
  1190. if (flags & (IFF_RUNNING | IFF_LOWER_UP))
  1191. fib_sync_up(dev, RTNH_F_LINKDOWN);
  1192. else
  1193. fib_sync_down_dev(dev, event, false);
  1194. rt_cache_flush(net);
  1195. break;
  1196. case NETDEV_CHANGEMTU:
  1197. fib_sync_mtu(dev, info_ext->ext.mtu);
  1198. rt_cache_flush(net);
  1199. break;
  1200. case NETDEV_CHANGEUPPER:
  1201. upper_info = ptr;
  1202. /* flush all routes if dev is linked to or unlinked from
  1203. * an L3 master device (e.g., VRF)
  1204. */
  1205. if (upper_info->upper_dev &&
  1206. netif_is_l3_master(upper_info->upper_dev))
  1207. fib_disable_ip(dev, NETDEV_DOWN, true);
  1208. break;
  1209. }
  1210. return NOTIFY_DONE;
  1211. }
  1212. static struct notifier_block fib_inetaddr_notifier = {
  1213. .notifier_call = fib_inetaddr_event,
  1214. };
  1215. static struct notifier_block fib_netdev_notifier = {
  1216. .notifier_call = fib_netdev_event,
  1217. };
  1218. static int __net_init ip_fib_net_init(struct net *net)
  1219. {
  1220. int err;
  1221. size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
  1222. err = fib4_notifier_init(net);
  1223. if (err)
  1224. return err;
  1225. /* Avoid false sharing : Use at least a full cache line */
  1226. size = max_t(size_t, size, L1_CACHE_BYTES);
  1227. net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
  1228. if (!net->ipv4.fib_table_hash) {
  1229. err = -ENOMEM;
  1230. goto err_table_hash_alloc;
  1231. }
  1232. err = fib4_rules_init(net);
  1233. if (err < 0)
  1234. goto err_rules_init;
  1235. return 0;
  1236. err_rules_init:
  1237. kfree(net->ipv4.fib_table_hash);
  1238. err_table_hash_alloc:
  1239. fib4_notifier_exit(net);
  1240. return err;
  1241. }
  1242. static void ip_fib_net_exit(struct net *net)
  1243. {
  1244. int i;
  1245. rtnl_lock();
  1246. #ifdef CONFIG_IP_MULTIPLE_TABLES
  1247. RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
  1248. RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
  1249. #endif
  1250. /* Destroy the tables in reverse order to guarantee that the
  1251. * local table, ID 255, is destroyed before the main table, ID
  1252. * 254. This is necessary as the local table may contain
  1253. * references to data contained in the main table.
  1254. */
  1255. for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
  1256. struct hlist_head *head = &net->ipv4.fib_table_hash[i];
  1257. struct hlist_node *tmp;
  1258. struct fib_table *tb;
  1259. hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
  1260. hlist_del(&tb->tb_hlist);
  1261. fib_table_flush(net, tb);
  1262. fib_free_table(tb);
  1263. }
  1264. }
  1265. #ifdef CONFIG_IP_MULTIPLE_TABLES
  1266. fib4_rules_exit(net);
  1267. #endif
  1268. rtnl_unlock();
  1269. kfree(net->ipv4.fib_table_hash);
  1270. fib4_notifier_exit(net);
  1271. }
  1272. static int __net_init fib_net_init(struct net *net)
  1273. {
  1274. int error;
  1275. #ifdef CONFIG_IP_ROUTE_CLASSID
  1276. net->ipv4.fib_num_tclassid_users = 0;
  1277. #endif
  1278. error = ip_fib_net_init(net);
  1279. if (error < 0)
  1280. goto out;
  1281. error = nl_fib_lookup_init(net);
  1282. if (error < 0)
  1283. goto out_nlfl;
  1284. error = fib_proc_init(net);
  1285. if (error < 0)
  1286. goto out_proc;
  1287. out:
  1288. return error;
  1289. out_proc:
  1290. nl_fib_lookup_exit(net);
  1291. out_nlfl:
  1292. ip_fib_net_exit(net);
  1293. goto out;
  1294. }
  1295. static void __net_exit fib_net_exit(struct net *net)
  1296. {
  1297. fib_proc_exit(net);
  1298. nl_fib_lookup_exit(net);
  1299. ip_fib_net_exit(net);
  1300. }
  1301. static struct pernet_operations fib_net_ops = {
  1302. .init = fib_net_init,
  1303. .exit = fib_net_exit,
  1304. };
  1305. void __init ip_fib_init(void)
  1306. {
  1307. fib_trie_init();
  1308. register_pernet_subsys(&fib_net_ops);
  1309. register_netdevice_notifier(&fib_netdev_notifier);
  1310. register_inetaddr_notifier(&fib_inetaddr_notifier);
  1311. rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
  1312. rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
  1313. rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
  1314. }