af_mpls.c 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165
  1. #include <linux/types.h>
  2. #include <linux/skbuff.h>
  3. #include <linux/socket.h>
  4. #include <linux/sysctl.h>
  5. #include <linux/net.h>
  6. #include <linux/module.h>
  7. #include <linux/if_arp.h>
  8. #include <linux/ipv6.h>
  9. #include <linux/mpls.h>
  10. #include <linux/netconf.h>
  11. #include <linux/vmalloc.h>
  12. #include <linux/percpu.h>
  13. #include <net/ip.h>
  14. #include <net/dst.h>
  15. #include <net/sock.h>
  16. #include <net/arp.h>
  17. #include <net/ip_fib.h>
  18. #include <net/netevent.h>
  19. #include <net/netns/generic.h>
  20. #if IS_ENABLED(CONFIG_IPV6)
  21. #include <net/ipv6.h>
  22. #endif
  23. #include <net/addrconf.h>
  24. #include <net/nexthop.h>
  25. #include "internal.h"
  26. /* Maximum number of labels to look ahead at when selecting a path of
  27. * a multipath route
  28. */
  29. #define MAX_MP_SELECT_LABELS 4
  30. #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
  31. static int zero = 0;
  32. static int one = 1;
  33. static int label_limit = (1 << 20) - 1;
  34. static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
  35. struct nlmsghdr *nlh, struct net *net, u32 portid,
  36. unsigned int nlm_flags);
  37. static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
  38. {
  39. struct mpls_route *rt = NULL;
  40. if (index < net->mpls.platform_labels) {
  41. struct mpls_route __rcu **platform_label =
  42. rcu_dereference(net->mpls.platform_label);
  43. rt = rcu_dereference(platform_label[index]);
  44. }
  45. return rt;
  46. }
  47. bool mpls_output_possible(const struct net_device *dev)
  48. {
  49. return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
  50. }
  51. EXPORT_SYMBOL_GPL(mpls_output_possible);
  52. static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
  53. {
  54. u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
  55. int nh_index = nh - rt->rt_nh;
  56. return nh0_via + rt->rt_max_alen * nh_index;
  57. }
  58. static const u8 *mpls_nh_via(const struct mpls_route *rt,
  59. const struct mpls_nh *nh)
  60. {
  61. return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh);
  62. }
  63. static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
  64. {
  65. /* The size of the layer 2.5 labels to be added for this route */
  66. return nh->nh_labels * sizeof(struct mpls_shim_hdr);
  67. }
  68. unsigned int mpls_dev_mtu(const struct net_device *dev)
  69. {
  70. /* The amount of data the layer 2 frame can hold */
  71. return dev->mtu;
  72. }
  73. EXPORT_SYMBOL_GPL(mpls_dev_mtu);
  74. bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
  75. {
  76. if (skb->len <= mtu)
  77. return false;
  78. if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
  79. return false;
  80. return true;
  81. }
  82. EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
  83. void mpls_stats_inc_outucastpkts(struct net_device *dev,
  84. const struct sk_buff *skb)
  85. {
  86. struct mpls_dev *mdev;
  87. if (skb->protocol == htons(ETH_P_MPLS_UC)) {
  88. mdev = mpls_dev_get(dev);
  89. if (mdev)
  90. MPLS_INC_STATS_LEN(mdev, skb->len,
  91. tx_packets,
  92. tx_bytes);
  93. } else if (skb->protocol == htons(ETH_P_IP)) {
  94. IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
  95. #if IS_ENABLED(CONFIG_IPV6)
  96. } else if (skb->protocol == htons(ETH_P_IPV6)) {
  97. struct inet6_dev *in6dev = __in6_dev_get(dev);
  98. if (in6dev)
  99. IP6_UPD_PO_STATS(dev_net(dev), in6dev,
  100. IPSTATS_MIB_OUT, skb->len);
  101. #endif
  102. }
  103. }
  104. EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts);
  105. static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
  106. {
  107. struct mpls_entry_decoded dec;
  108. unsigned int mpls_hdr_len = 0;
  109. struct mpls_shim_hdr *hdr;
  110. bool eli_seen = false;
  111. int label_index;
  112. u32 hash = 0;
  113. for (label_index = 0; label_index < MAX_MP_SELECT_LABELS;
  114. label_index++) {
  115. mpls_hdr_len += sizeof(*hdr);
  116. if (!pskb_may_pull(skb, mpls_hdr_len))
  117. break;
  118. /* Read and decode the current label */
  119. hdr = mpls_hdr(skb) + label_index;
  120. dec = mpls_entry_decode(hdr);
  121. /* RFC6790 - reserved labels MUST NOT be used as keys
  122. * for the load-balancing function
  123. */
  124. if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
  125. hash = jhash_1word(dec.label, hash);
  126. /* The entropy label follows the entropy label
  127. * indicator, so this means that the entropy
  128. * label was just added to the hash - no need to
  129. * go any deeper either in the label stack or in the
  130. * payload
  131. */
  132. if (eli_seen)
  133. break;
  134. } else if (dec.label == MPLS_LABEL_ENTROPY) {
  135. eli_seen = true;
  136. }
  137. if (!dec.bos)
  138. continue;
  139. /* found bottom label; does skb have room for a header? */
  140. if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) {
  141. const struct iphdr *v4hdr;
  142. v4hdr = (const struct iphdr *)(hdr + 1);
  143. if (v4hdr->version == 4) {
  144. hash = jhash_3words(ntohl(v4hdr->saddr),
  145. ntohl(v4hdr->daddr),
  146. v4hdr->protocol, hash);
  147. } else if (v4hdr->version == 6 &&
  148. pskb_may_pull(skb, mpls_hdr_len +
  149. sizeof(struct ipv6hdr))) {
  150. const struct ipv6hdr *v6hdr;
  151. v6hdr = (const struct ipv6hdr *)(hdr + 1);
  152. hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
  153. hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
  154. hash = jhash_1word(v6hdr->nexthdr, hash);
  155. }
  156. }
  157. break;
  158. }
  159. return hash;
  160. }
  161. static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
  162. struct sk_buff *skb)
  163. {
  164. int alive = ACCESS_ONCE(rt->rt_nhn_alive);
  165. u32 hash = 0;
  166. int nh_index = 0;
  167. int n = 0;
  168. /* No need to look further into packet if there's only
  169. * one path
  170. */
  171. if (rt->rt_nhn == 1)
  172. goto out;
  173. if (alive <= 0)
  174. return NULL;
  175. hash = mpls_multipath_hash(rt, skb);
  176. nh_index = hash % alive;
  177. if (alive == rt->rt_nhn)
  178. goto out;
  179. for_nexthops(rt) {
  180. if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
  181. continue;
  182. if (n == nh_index)
  183. return nh;
  184. n++;
  185. } endfor_nexthops(rt);
  186. out:
  187. return &rt->rt_nh[nh_index];
  188. }
  189. static bool mpls_egress(struct net *net, struct mpls_route *rt,
  190. struct sk_buff *skb, struct mpls_entry_decoded dec)
  191. {
  192. enum mpls_payload_type payload_type;
  193. bool success = false;
  194. /* The IPv4 code below accesses through the IPv4 header
  195. * checksum, which is 12 bytes into the packet.
  196. * The IPv6 code below accesses through the IPv6 hop limit
  197. * which is 8 bytes into the packet.
  198. *
  199. * For all supported cases there should always be at least 12
  200. * bytes of packet data present. The IPv4 header is 20 bytes
  201. * without options and the IPv6 header is always 40 bytes
  202. * long.
  203. */
  204. if (!pskb_may_pull(skb, 12))
  205. return false;
  206. payload_type = rt->rt_payload_type;
  207. if (payload_type == MPT_UNSPEC)
  208. payload_type = ip_hdr(skb)->version;
  209. switch (payload_type) {
  210. case MPT_IPV4: {
  211. struct iphdr *hdr4 = ip_hdr(skb);
  212. u8 new_ttl;
  213. skb->protocol = htons(ETH_P_IP);
  214. /* If propagating TTL, take the decremented TTL from
  215. * the incoming MPLS header, otherwise decrement the
  216. * TTL, but only if not 0 to avoid underflow.
  217. */
  218. if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
  219. (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
  220. net->mpls.ip_ttl_propagate))
  221. new_ttl = dec.ttl;
  222. else
  223. new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
  224. csum_replace2(&hdr4->check,
  225. htons(hdr4->ttl << 8),
  226. htons(new_ttl << 8));
  227. hdr4->ttl = new_ttl;
  228. success = true;
  229. break;
  230. }
  231. case MPT_IPV6: {
  232. struct ipv6hdr *hdr6 = ipv6_hdr(skb);
  233. skb->protocol = htons(ETH_P_IPV6);
  234. /* If propagating TTL, take the decremented TTL from
  235. * the incoming MPLS header, otherwise decrement the
  236. * hop limit, but only if not 0 to avoid underflow.
  237. */
  238. if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
  239. (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
  240. net->mpls.ip_ttl_propagate))
  241. hdr6->hop_limit = dec.ttl;
  242. else if (hdr6->hop_limit)
  243. hdr6->hop_limit = hdr6->hop_limit - 1;
  244. success = true;
  245. break;
  246. }
  247. case MPT_UNSPEC:
  248. /* Should have decided which protocol it is by now */
  249. break;
  250. }
  251. return success;
  252. }
  253. static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
  254. struct packet_type *pt, struct net_device *orig_dev)
  255. {
  256. struct net *net = dev_net(dev);
  257. struct mpls_shim_hdr *hdr;
  258. struct mpls_route *rt;
  259. struct mpls_nh *nh;
  260. struct mpls_entry_decoded dec;
  261. struct net_device *out_dev;
  262. struct mpls_dev *out_mdev;
  263. struct mpls_dev *mdev;
  264. unsigned int hh_len;
  265. unsigned int new_header_size;
  266. unsigned int mtu;
  267. int err;
  268. /* Careful this entire function runs inside of an rcu critical section */
  269. mdev = mpls_dev_get(dev);
  270. if (!mdev)
  271. goto drop;
  272. MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets,
  273. rx_bytes);
  274. if (!mdev->input_enabled) {
  275. MPLS_INC_STATS(mdev, rx_dropped);
  276. goto drop;
  277. }
  278. if (skb->pkt_type != PACKET_HOST)
  279. goto err;
  280. if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
  281. goto err;
  282. if (!pskb_may_pull(skb, sizeof(*hdr)))
  283. goto err;
  284. /* Read and decode the label */
  285. hdr = mpls_hdr(skb);
  286. dec = mpls_entry_decode(hdr);
  287. rt = mpls_route_input_rcu(net, dec.label);
  288. if (!rt) {
  289. MPLS_INC_STATS(mdev, rx_noroute);
  290. goto drop;
  291. }
  292. nh = mpls_select_multipath(rt, skb);
  293. if (!nh)
  294. goto err;
  295. /* Pop the label */
  296. skb_pull(skb, sizeof(*hdr));
  297. skb_reset_network_header(skb);
  298. skb_orphan(skb);
  299. if (skb_warn_if_lro(skb))
  300. goto err;
  301. skb_forward_csum(skb);
  302. /* Verify ttl is valid */
  303. if (dec.ttl <= 1)
  304. goto err;
  305. dec.ttl -= 1;
  306. /* Find the output device */
  307. out_dev = rcu_dereference(nh->nh_dev);
  308. if (!mpls_output_possible(out_dev))
  309. goto tx_err;
  310. /* Verify the destination can hold the packet */
  311. new_header_size = mpls_nh_header_size(nh);
  312. mtu = mpls_dev_mtu(out_dev);
  313. if (mpls_pkt_too_big(skb, mtu - new_header_size))
  314. goto tx_err;
  315. hh_len = LL_RESERVED_SPACE(out_dev);
  316. if (!out_dev->header_ops)
  317. hh_len = 0;
  318. /* Ensure there is enough space for the headers in the skb */
  319. if (skb_cow(skb, hh_len + new_header_size))
  320. goto tx_err;
  321. skb->dev = out_dev;
  322. skb->protocol = htons(ETH_P_MPLS_UC);
  323. if (unlikely(!new_header_size && dec.bos)) {
  324. /* Penultimate hop popping */
  325. if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
  326. goto err;
  327. } else {
  328. bool bos;
  329. int i;
  330. skb_push(skb, new_header_size);
  331. skb_reset_network_header(skb);
  332. /* Push the new labels */
  333. hdr = mpls_hdr(skb);
  334. bos = dec.bos;
  335. for (i = nh->nh_labels - 1; i >= 0; i--) {
  336. hdr[i] = mpls_entry_encode(nh->nh_label[i],
  337. dec.ttl, 0, bos);
  338. bos = false;
  339. }
  340. }
  341. mpls_stats_inc_outucastpkts(out_dev, skb);
  342. /* If via wasn't specified then send out using device address */
  343. if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC)
  344. err = neigh_xmit(NEIGH_LINK_TABLE, out_dev,
  345. out_dev->dev_addr, skb);
  346. else
  347. err = neigh_xmit(nh->nh_via_table, out_dev,
  348. mpls_nh_via(rt, nh), skb);
  349. if (err)
  350. net_dbg_ratelimited("%s: packet transmission failed: %d\n",
  351. __func__, err);
  352. return 0;
  353. tx_err:
  354. out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL;
  355. if (out_mdev)
  356. MPLS_INC_STATS(out_mdev, tx_errors);
  357. goto drop;
  358. err:
  359. MPLS_INC_STATS(mdev, rx_errors);
  360. drop:
  361. kfree_skb(skb);
  362. return NET_RX_DROP;
  363. }
  364. static struct packet_type mpls_packet_type __read_mostly = {
  365. .type = cpu_to_be16(ETH_P_MPLS_UC),
  366. .func = mpls_forward,
  367. };
  368. static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
  369. [RTA_DST] = { .type = NLA_U32 },
  370. [RTA_OIF] = { .type = NLA_U32 },
  371. [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
  372. };
  373. struct mpls_route_config {
  374. u32 rc_protocol;
  375. u32 rc_ifindex;
  376. u8 rc_via_table;
  377. u8 rc_via_alen;
  378. u8 rc_via[MAX_VIA_ALEN];
  379. u32 rc_label;
  380. u8 rc_ttl_propagate;
  381. u8 rc_output_labels;
  382. u32 rc_output_label[MAX_NEW_LABELS];
  383. u32 rc_nlflags;
  384. enum mpls_payload_type rc_payload_type;
  385. struct nl_info rc_nlinfo;
  386. struct rtnexthop *rc_mp;
  387. int rc_mp_len;
  388. };
  389. static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
  390. {
  391. u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
  392. struct mpls_route *rt;
  393. rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
  394. VIA_ALEN_ALIGN) +
  395. num_nh * max_alen_aligned,
  396. GFP_KERNEL);
  397. if (rt) {
  398. rt->rt_nhn = num_nh;
  399. rt->rt_nhn_alive = num_nh;
  400. rt->rt_max_alen = max_alen_aligned;
  401. }
  402. return rt;
  403. }
  404. static void mpls_rt_free(struct mpls_route *rt)
  405. {
  406. if (rt)
  407. kfree_rcu(rt, rt_rcu);
  408. }
  409. static void mpls_notify_route(struct net *net, unsigned index,
  410. struct mpls_route *old, struct mpls_route *new,
  411. const struct nl_info *info)
  412. {
  413. struct nlmsghdr *nlh = info ? info->nlh : NULL;
  414. unsigned portid = info ? info->portid : 0;
  415. int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
  416. struct mpls_route *rt = new ? new : old;
  417. unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
  418. /* Ignore reserved labels for now */
  419. if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
  420. rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
  421. }
  422. static void mpls_route_update(struct net *net, unsigned index,
  423. struct mpls_route *new,
  424. const struct nl_info *info)
  425. {
  426. struct mpls_route __rcu **platform_label;
  427. struct mpls_route *rt;
  428. ASSERT_RTNL();
  429. platform_label = rtnl_dereference(net->mpls.platform_label);
  430. rt = rtnl_dereference(platform_label[index]);
  431. rcu_assign_pointer(platform_label[index], new);
  432. mpls_notify_route(net, index, rt, new, info);
  433. /* If we removed a route free it now */
  434. mpls_rt_free(rt);
  435. }
  436. static unsigned find_free_label(struct net *net)
  437. {
  438. struct mpls_route __rcu **platform_label;
  439. size_t platform_labels;
  440. unsigned index;
  441. platform_label = rtnl_dereference(net->mpls.platform_label);
  442. platform_labels = net->mpls.platform_labels;
  443. for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels;
  444. index++) {
  445. if (!rtnl_dereference(platform_label[index]))
  446. return index;
  447. }
  448. return LABEL_NOT_SPECIFIED;
  449. }
  450. #if IS_ENABLED(CONFIG_INET)
  451. static struct net_device *inet_fib_lookup_dev(struct net *net,
  452. const void *addr)
  453. {
  454. struct net_device *dev;
  455. struct rtable *rt;
  456. struct in_addr daddr;
  457. memcpy(&daddr, addr, sizeof(struct in_addr));
  458. rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
  459. if (IS_ERR(rt))
  460. return ERR_CAST(rt);
  461. dev = rt->dst.dev;
  462. dev_hold(dev);
  463. ip_rt_put(rt);
  464. return dev;
  465. }
  466. #else
  467. static struct net_device *inet_fib_lookup_dev(struct net *net,
  468. const void *addr)
  469. {
  470. return ERR_PTR(-EAFNOSUPPORT);
  471. }
  472. #endif
  473. #if IS_ENABLED(CONFIG_IPV6)
  474. static struct net_device *inet6_fib_lookup_dev(struct net *net,
  475. const void *addr)
  476. {
  477. struct net_device *dev;
  478. struct dst_entry *dst;
  479. struct flowi6 fl6;
  480. int err;
  481. if (!ipv6_stub)
  482. return ERR_PTR(-EAFNOSUPPORT);
  483. memset(&fl6, 0, sizeof(fl6));
  484. memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
  485. err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
  486. if (err)
  487. return ERR_PTR(err);
  488. dev = dst->dev;
  489. dev_hold(dev);
  490. dst_release(dst);
  491. return dev;
  492. }
  493. #else
  494. static struct net_device *inet6_fib_lookup_dev(struct net *net,
  495. const void *addr)
  496. {
  497. return ERR_PTR(-EAFNOSUPPORT);
  498. }
  499. #endif
  500. static struct net_device *find_outdev(struct net *net,
  501. struct mpls_route *rt,
  502. struct mpls_nh *nh, int oif)
  503. {
  504. struct net_device *dev = NULL;
  505. if (!oif) {
  506. switch (nh->nh_via_table) {
  507. case NEIGH_ARP_TABLE:
  508. dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh));
  509. break;
  510. case NEIGH_ND_TABLE:
  511. dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh));
  512. break;
  513. case NEIGH_LINK_TABLE:
  514. break;
  515. }
  516. } else {
  517. dev = dev_get_by_index(net, oif);
  518. }
  519. if (!dev)
  520. return ERR_PTR(-ENODEV);
  521. if (IS_ERR(dev))
  522. return dev;
  523. /* The caller is holding rtnl anyways, so release the dev reference */
  524. dev_put(dev);
  525. return dev;
  526. }
  527. static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
  528. struct mpls_nh *nh, int oif)
  529. {
  530. struct net_device *dev = NULL;
  531. int err = -ENODEV;
  532. dev = find_outdev(net, rt, nh, oif);
  533. if (IS_ERR(dev)) {
  534. err = PTR_ERR(dev);
  535. dev = NULL;
  536. goto errout;
  537. }
  538. /* Ensure this is a supported device */
  539. err = -EINVAL;
  540. if (!mpls_dev_get(dev))
  541. goto errout;
  542. if ((nh->nh_via_table == NEIGH_LINK_TABLE) &&
  543. (dev->addr_len != nh->nh_via_alen))
  544. goto errout;
  545. RCU_INIT_POINTER(nh->nh_dev, dev);
  546. if (!(dev->flags & IFF_UP)) {
  547. nh->nh_flags |= RTNH_F_DEAD;
  548. } else {
  549. unsigned int flags;
  550. flags = dev_get_flags(dev);
  551. if (!(flags & (IFF_RUNNING | IFF_LOWER_UP)))
  552. nh->nh_flags |= RTNH_F_LINKDOWN;
  553. }
  554. return 0;
  555. errout:
  556. return err;
  557. }
  558. static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
  559. struct mpls_route *rt)
  560. {
  561. struct net *net = cfg->rc_nlinfo.nl_net;
  562. struct mpls_nh *nh = rt->rt_nh;
  563. int err;
  564. int i;
  565. if (!nh)
  566. return -ENOMEM;
  567. err = -EINVAL;
  568. /* Ensure only a supported number of labels are present */
  569. if (cfg->rc_output_labels > MAX_NEW_LABELS)
  570. goto errout;
  571. nh->nh_labels = cfg->rc_output_labels;
  572. for (i = 0; i < nh->nh_labels; i++)
  573. nh->nh_label[i] = cfg->rc_output_label[i];
  574. nh->nh_via_table = cfg->rc_via_table;
  575. memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen);
  576. nh->nh_via_alen = cfg->rc_via_alen;
  577. err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex);
  578. if (err)
  579. goto errout;
  580. if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
  581. rt->rt_nhn_alive--;
  582. return 0;
  583. errout:
  584. return err;
  585. }
  586. static int mpls_nh_build(struct net *net, struct mpls_route *rt,
  587. struct mpls_nh *nh, int oif, struct nlattr *via,
  588. struct nlattr *newdst)
  589. {
  590. int err = -ENOMEM;
  591. if (!nh)
  592. goto errout;
  593. if (newdst) {
  594. err = nla_get_labels(newdst, MAX_NEW_LABELS,
  595. &nh->nh_labels, nh->nh_label);
  596. if (err)
  597. goto errout;
  598. }
  599. if (via) {
  600. err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
  601. __mpls_nh_via(rt, nh));
  602. if (err)
  603. goto errout;
  604. } else {
  605. nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC;
  606. }
  607. err = mpls_nh_assign_dev(net, rt, nh, oif);
  608. if (err)
  609. goto errout;
  610. return 0;
  611. errout:
  612. return err;
  613. }
  614. static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
  615. u8 cfg_via_alen, u8 *max_via_alen)
  616. {
  617. int nhs = 0;
  618. int remaining = len;
  619. if (!rtnh) {
  620. *max_via_alen = cfg_via_alen;
  621. return 1;
  622. }
  623. *max_via_alen = 0;
  624. while (rtnh_ok(rtnh, remaining)) {
  625. struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
  626. int attrlen;
  627. attrlen = rtnh_attrlen(rtnh);
  628. nla = nla_find(attrs, attrlen, RTA_VIA);
  629. if (nla && nla_len(nla) >=
  630. offsetof(struct rtvia, rtvia_addr)) {
  631. int via_alen = nla_len(nla) -
  632. offsetof(struct rtvia, rtvia_addr);
  633. if (via_alen <= MAX_VIA_ALEN)
  634. *max_via_alen = max_t(u16, *max_via_alen,
  635. via_alen);
  636. }
  637. nhs++;
  638. rtnh = rtnh_next(rtnh, &remaining);
  639. }
  640. /* leftover implies invalid nexthop configuration, discard it */
  641. return remaining > 0 ? 0 : nhs;
  642. }
  643. static int mpls_nh_build_multi(struct mpls_route_config *cfg,
  644. struct mpls_route *rt)
  645. {
  646. struct rtnexthop *rtnh = cfg->rc_mp;
  647. struct nlattr *nla_via, *nla_newdst;
  648. int remaining = cfg->rc_mp_len;
  649. int nhs = 0;
  650. int err = 0;
  651. change_nexthops(rt) {
  652. int attrlen;
  653. nla_via = NULL;
  654. nla_newdst = NULL;
  655. err = -EINVAL;
  656. if (!rtnh_ok(rtnh, remaining))
  657. goto errout;
  658. /* neither weighted multipath nor any flags
  659. * are supported
  660. */
  661. if (rtnh->rtnh_hops || rtnh->rtnh_flags)
  662. goto errout;
  663. attrlen = rtnh_attrlen(rtnh);
  664. if (attrlen > 0) {
  665. struct nlattr *attrs = rtnh_attrs(rtnh);
  666. nla_via = nla_find(attrs, attrlen, RTA_VIA);
  667. nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
  668. }
  669. err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
  670. rtnh->rtnh_ifindex, nla_via, nla_newdst);
  671. if (err)
  672. goto errout;
  673. if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
  674. rt->rt_nhn_alive--;
  675. rtnh = rtnh_next(rtnh, &remaining);
  676. nhs++;
  677. } endfor_nexthops(rt);
  678. rt->rt_nhn = nhs;
  679. return 0;
  680. errout:
  681. return err;
  682. }
  683. static int mpls_route_add(struct mpls_route_config *cfg)
  684. {
  685. struct mpls_route __rcu **platform_label;
  686. struct net *net = cfg->rc_nlinfo.nl_net;
  687. struct mpls_route *rt, *old;
  688. int err = -EINVAL;
  689. u8 max_via_alen;
  690. unsigned index;
  691. int nhs;
  692. index = cfg->rc_label;
  693. /* If a label was not specified during insert pick one */
  694. if ((index == LABEL_NOT_SPECIFIED) &&
  695. (cfg->rc_nlflags & NLM_F_CREATE)) {
  696. index = find_free_label(net);
  697. }
  698. /* Reserved labels may not be set */
  699. if (index < MPLS_LABEL_FIRST_UNRESERVED)
  700. goto errout;
  701. /* The full 20 bit range may not be supported. */
  702. if (index >= net->mpls.platform_labels)
  703. goto errout;
  704. /* Append makes no sense with mpls */
  705. err = -EOPNOTSUPP;
  706. if (cfg->rc_nlflags & NLM_F_APPEND)
  707. goto errout;
  708. err = -EEXIST;
  709. platform_label = rtnl_dereference(net->mpls.platform_label);
  710. old = rtnl_dereference(platform_label[index]);
  711. if ((cfg->rc_nlflags & NLM_F_EXCL) && old)
  712. goto errout;
  713. err = -EEXIST;
  714. if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old)
  715. goto errout;
  716. err = -ENOENT;
  717. if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
  718. goto errout;
  719. err = -EINVAL;
  720. nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
  721. cfg->rc_via_alen, &max_via_alen);
  722. if (nhs == 0)
  723. goto errout;
  724. err = -ENOMEM;
  725. rt = mpls_rt_alloc(nhs, max_via_alen);
  726. if (!rt)
  727. goto errout;
  728. rt->rt_protocol = cfg->rc_protocol;
  729. rt->rt_payload_type = cfg->rc_payload_type;
  730. rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
  731. if (cfg->rc_mp)
  732. err = mpls_nh_build_multi(cfg, rt);
  733. else
  734. err = mpls_nh_build_from_cfg(cfg, rt);
  735. if (err)
  736. goto freert;
  737. mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
  738. return 0;
  739. freert:
  740. mpls_rt_free(rt);
  741. errout:
  742. return err;
  743. }
  744. static int mpls_route_del(struct mpls_route_config *cfg)
  745. {
  746. struct net *net = cfg->rc_nlinfo.nl_net;
  747. unsigned index;
  748. int err = -EINVAL;
  749. index = cfg->rc_label;
  750. /* Reserved labels may not be removed */
  751. if (index < MPLS_LABEL_FIRST_UNRESERVED)
  752. goto errout;
  753. /* The full 20 bit range may not be supported */
  754. if (index >= net->mpls.platform_labels)
  755. goto errout;
  756. mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
  757. err = 0;
  758. errout:
  759. return err;
  760. }
  761. static void mpls_get_stats(struct mpls_dev *mdev,
  762. struct mpls_link_stats *stats)
  763. {
  764. struct mpls_pcpu_stats *p;
  765. int i;
  766. memset(stats, 0, sizeof(*stats));
  767. for_each_possible_cpu(i) {
  768. struct mpls_link_stats local;
  769. unsigned int start;
  770. p = per_cpu_ptr(mdev->stats, i);
  771. do {
  772. start = u64_stats_fetch_begin(&p->syncp);
  773. local = p->stats;
  774. } while (u64_stats_fetch_retry(&p->syncp, start));
  775. stats->rx_packets += local.rx_packets;
  776. stats->rx_bytes += local.rx_bytes;
  777. stats->tx_packets += local.tx_packets;
  778. stats->tx_bytes += local.tx_bytes;
  779. stats->rx_errors += local.rx_errors;
  780. stats->tx_errors += local.tx_errors;
  781. stats->rx_dropped += local.rx_dropped;
  782. stats->tx_dropped += local.tx_dropped;
  783. stats->rx_noroute += local.rx_noroute;
  784. }
  785. }
  786. static int mpls_fill_stats_af(struct sk_buff *skb,
  787. const struct net_device *dev)
  788. {
  789. struct mpls_link_stats *stats;
  790. struct mpls_dev *mdev;
  791. struct nlattr *nla;
  792. mdev = mpls_dev_get(dev);
  793. if (!mdev)
  794. return -ENODATA;
  795. nla = nla_reserve_64bit(skb, MPLS_STATS_LINK,
  796. sizeof(struct mpls_link_stats),
  797. MPLS_STATS_UNSPEC);
  798. if (!nla)
  799. return -EMSGSIZE;
  800. stats = nla_data(nla);
  801. mpls_get_stats(mdev, stats);
  802. return 0;
  803. }
  804. static size_t mpls_get_stats_af_size(const struct net_device *dev)
  805. {
  806. struct mpls_dev *mdev;
  807. mdev = mpls_dev_get(dev);
  808. if (!mdev)
  809. return 0;
  810. return nla_total_size_64bit(sizeof(struct mpls_link_stats));
  811. }
  812. static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev,
  813. u32 portid, u32 seq, int event,
  814. unsigned int flags, int type)
  815. {
  816. struct nlmsghdr *nlh;
  817. struct netconfmsg *ncm;
  818. bool all = false;
  819. nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
  820. flags);
  821. if (!nlh)
  822. return -EMSGSIZE;
  823. if (type == NETCONFA_ALL)
  824. all = true;
  825. ncm = nlmsg_data(nlh);
  826. ncm->ncm_family = AF_MPLS;
  827. if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0)
  828. goto nla_put_failure;
  829. if ((all || type == NETCONFA_INPUT) &&
  830. nla_put_s32(skb, NETCONFA_INPUT,
  831. mdev->input_enabled) < 0)
  832. goto nla_put_failure;
  833. nlmsg_end(skb, nlh);
  834. return 0;
  835. nla_put_failure:
  836. nlmsg_cancel(skb, nlh);
  837. return -EMSGSIZE;
  838. }
  839. static int mpls_netconf_msgsize_devconf(int type)
  840. {
  841. int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
  842. + nla_total_size(4); /* NETCONFA_IFINDEX */
  843. bool all = false;
  844. if (type == NETCONFA_ALL)
  845. all = true;
  846. if (all || type == NETCONFA_INPUT)
  847. size += nla_total_size(4);
  848. return size;
  849. }
  850. static void mpls_netconf_notify_devconf(struct net *net, int type,
  851. struct mpls_dev *mdev)
  852. {
  853. struct sk_buff *skb;
  854. int err = -ENOBUFS;
  855. skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL);
  856. if (!skb)
  857. goto errout;
  858. err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF,
  859. 0, type);
  860. if (err < 0) {
  861. /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
  862. WARN_ON(err == -EMSGSIZE);
  863. kfree_skb(skb);
  864. goto errout;
  865. }
  866. rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL);
  867. return;
  868. errout:
  869. if (err < 0)
  870. rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
  871. }
  872. static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
  873. [NETCONFA_IFINDEX] = { .len = sizeof(int) },
  874. };
  875. static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
  876. struct nlmsghdr *nlh)
  877. {
  878. struct net *net = sock_net(in_skb->sk);
  879. struct nlattr *tb[NETCONFA_MAX + 1];
  880. struct netconfmsg *ncm;
  881. struct net_device *dev;
  882. struct mpls_dev *mdev;
  883. struct sk_buff *skb;
  884. int ifindex;
  885. int err;
  886. err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
  887. devconf_mpls_policy);
  888. if (err < 0)
  889. goto errout;
  890. err = -EINVAL;
  891. if (!tb[NETCONFA_IFINDEX])
  892. goto errout;
  893. ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
  894. dev = __dev_get_by_index(net, ifindex);
  895. if (!dev)
  896. goto errout;
  897. mdev = mpls_dev_get(dev);
  898. if (!mdev)
  899. goto errout;
  900. err = -ENOBUFS;
  901. skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
  902. if (!skb)
  903. goto errout;
  904. err = mpls_netconf_fill_devconf(skb, mdev,
  905. NETLINK_CB(in_skb).portid,
  906. nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
  907. NETCONFA_ALL);
  908. if (err < 0) {
  909. /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
  910. WARN_ON(err == -EMSGSIZE);
  911. kfree_skb(skb);
  912. goto errout;
  913. }
  914. err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
  915. errout:
  916. return err;
  917. }
  918. static int mpls_netconf_dump_devconf(struct sk_buff *skb,
  919. struct netlink_callback *cb)
  920. {
  921. struct net *net = sock_net(skb->sk);
  922. struct hlist_head *head;
  923. struct net_device *dev;
  924. struct mpls_dev *mdev;
  925. int idx, s_idx;
  926. int h, s_h;
  927. s_h = cb->args[0];
  928. s_idx = idx = cb->args[1];
  929. for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
  930. idx = 0;
  931. head = &net->dev_index_head[h];
  932. rcu_read_lock();
  933. cb->seq = net->dev_base_seq;
  934. hlist_for_each_entry_rcu(dev, head, index_hlist) {
  935. if (idx < s_idx)
  936. goto cont;
  937. mdev = mpls_dev_get(dev);
  938. if (!mdev)
  939. goto cont;
  940. if (mpls_netconf_fill_devconf(skb, mdev,
  941. NETLINK_CB(cb->skb).portid,
  942. cb->nlh->nlmsg_seq,
  943. RTM_NEWNETCONF,
  944. NLM_F_MULTI,
  945. NETCONFA_ALL) < 0) {
  946. rcu_read_unlock();
  947. goto done;
  948. }
  949. nl_dump_check_consistent(cb, nlmsg_hdr(skb));
  950. cont:
  951. idx++;
  952. }
  953. rcu_read_unlock();
  954. }
  955. done:
  956. cb->args[0] = h;
  957. cb->args[1] = idx;
  958. return skb->len;
  959. }
  960. #define MPLS_PERDEV_SYSCTL_OFFSET(field) \
  961. (&((struct mpls_dev *)0)->field)
  962. static int mpls_conf_proc(struct ctl_table *ctl, int write,
  963. void __user *buffer,
  964. size_t *lenp, loff_t *ppos)
  965. {
  966. int oval = *(int *)ctl->data;
  967. int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  968. if (write) {
  969. struct mpls_dev *mdev = ctl->extra1;
  970. int i = (int *)ctl->data - (int *)mdev;
  971. struct net *net = ctl->extra2;
  972. int val = *(int *)ctl->data;
  973. if (i == offsetof(struct mpls_dev, input_enabled) &&
  974. val != oval) {
  975. mpls_netconf_notify_devconf(net,
  976. NETCONFA_INPUT,
  977. mdev);
  978. }
  979. }
  980. return ret;
  981. }
  982. static const struct ctl_table mpls_dev_table[] = {
  983. {
  984. .procname = "input",
  985. .maxlen = sizeof(int),
  986. .mode = 0644,
  987. .proc_handler = mpls_conf_proc,
  988. .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
  989. },
  990. { }
  991. };
  992. static int mpls_dev_sysctl_register(struct net_device *dev,
  993. struct mpls_dev *mdev)
  994. {
  995. char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
  996. struct net *net = dev_net(dev);
  997. struct ctl_table *table;
  998. int i;
  999. table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL);
  1000. if (!table)
  1001. goto out;
  1002. /* Table data contains only offsets relative to the base of
  1003. * the mdev at this point, so make them absolute.
  1004. */
  1005. for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) {
  1006. table[i].data = (char *)mdev + (uintptr_t)table[i].data;
  1007. table[i].extra1 = mdev;
  1008. table[i].extra2 = net;
  1009. }
  1010. snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
  1011. mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
  1012. if (!mdev->sysctl)
  1013. goto free;
  1014. return 0;
  1015. free:
  1016. kfree(table);
  1017. out:
  1018. return -ENOBUFS;
  1019. }
  1020. static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
  1021. {
  1022. struct ctl_table *table;
  1023. table = mdev->sysctl->ctl_table_arg;
  1024. unregister_net_sysctl_table(mdev->sysctl);
  1025. kfree(table);
  1026. }
  1027. static struct mpls_dev *mpls_add_dev(struct net_device *dev)
  1028. {
  1029. struct mpls_dev *mdev;
  1030. int err = -ENOMEM;
  1031. int i;
  1032. ASSERT_RTNL();
  1033. mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
  1034. if (!mdev)
  1035. return ERR_PTR(err);
  1036. mdev->stats = alloc_percpu(struct mpls_pcpu_stats);
  1037. if (!mdev->stats)
  1038. goto free;
  1039. for_each_possible_cpu(i) {
  1040. struct mpls_pcpu_stats *mpls_stats;
  1041. mpls_stats = per_cpu_ptr(mdev->stats, i);
  1042. u64_stats_init(&mpls_stats->syncp);
  1043. }
  1044. err = mpls_dev_sysctl_register(dev, mdev);
  1045. if (err)
  1046. goto free;
  1047. mdev->dev = dev;
  1048. rcu_assign_pointer(dev->mpls_ptr, mdev);
  1049. return mdev;
  1050. free:
  1051. free_percpu(mdev->stats);
  1052. kfree(mdev);
  1053. return ERR_PTR(err);
  1054. }
  1055. static void mpls_dev_destroy_rcu(struct rcu_head *head)
  1056. {
  1057. struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu);
  1058. free_percpu(mdev->stats);
  1059. kfree(mdev);
  1060. }
  1061. static void mpls_ifdown(struct net_device *dev, int event)
  1062. {
  1063. struct mpls_route __rcu **platform_label;
  1064. struct net *net = dev_net(dev);
  1065. unsigned index;
  1066. platform_label = rtnl_dereference(net->mpls.platform_label);
  1067. for (index = 0; index < net->mpls.platform_labels; index++) {
  1068. struct mpls_route *rt = rtnl_dereference(platform_label[index]);
  1069. if (!rt)
  1070. continue;
  1071. change_nexthops(rt) {
  1072. if (rtnl_dereference(nh->nh_dev) != dev)
  1073. continue;
  1074. switch (event) {
  1075. case NETDEV_DOWN:
  1076. case NETDEV_UNREGISTER:
  1077. nh->nh_flags |= RTNH_F_DEAD;
  1078. /* fall through */
  1079. case NETDEV_CHANGE:
  1080. nh->nh_flags |= RTNH_F_LINKDOWN;
  1081. ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
  1082. break;
  1083. }
  1084. if (event == NETDEV_UNREGISTER)
  1085. RCU_INIT_POINTER(nh->nh_dev, NULL);
  1086. } endfor_nexthops(rt);
  1087. }
  1088. }
  1089. static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
  1090. {
  1091. struct mpls_route __rcu **platform_label;
  1092. struct net *net = dev_net(dev);
  1093. unsigned index;
  1094. int alive;
  1095. platform_label = rtnl_dereference(net->mpls.platform_label);
  1096. for (index = 0; index < net->mpls.platform_labels; index++) {
  1097. struct mpls_route *rt = rtnl_dereference(platform_label[index]);
  1098. if (!rt)
  1099. continue;
  1100. alive = 0;
  1101. change_nexthops(rt) {
  1102. struct net_device *nh_dev =
  1103. rtnl_dereference(nh->nh_dev);
  1104. if (!(nh->nh_flags & nh_flags)) {
  1105. alive++;
  1106. continue;
  1107. }
  1108. if (nh_dev != dev)
  1109. continue;
  1110. alive++;
  1111. nh->nh_flags &= ~nh_flags;
  1112. } endfor_nexthops(rt);
  1113. ACCESS_ONCE(rt->rt_nhn_alive) = alive;
  1114. }
  1115. }
  1116. static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
  1117. void *ptr)
  1118. {
  1119. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1120. struct mpls_dev *mdev;
  1121. unsigned int flags;
  1122. if (event == NETDEV_REGISTER) {
  1123. /* For now just support Ethernet, IPGRE, SIT and IPIP devices */
  1124. if (dev->type == ARPHRD_ETHER ||
  1125. dev->type == ARPHRD_LOOPBACK ||
  1126. dev->type == ARPHRD_IPGRE ||
  1127. dev->type == ARPHRD_SIT ||
  1128. dev->type == ARPHRD_TUNNEL) {
  1129. mdev = mpls_add_dev(dev);
  1130. if (IS_ERR(mdev))
  1131. return notifier_from_errno(PTR_ERR(mdev));
  1132. }
  1133. return NOTIFY_OK;
  1134. }
  1135. mdev = mpls_dev_get(dev);
  1136. if (!mdev)
  1137. return NOTIFY_OK;
  1138. switch (event) {
  1139. case NETDEV_DOWN:
  1140. mpls_ifdown(dev, event);
  1141. break;
  1142. case NETDEV_UP:
  1143. flags = dev_get_flags(dev);
  1144. if (flags & (IFF_RUNNING | IFF_LOWER_UP))
  1145. mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
  1146. else
  1147. mpls_ifup(dev, RTNH_F_DEAD);
  1148. break;
  1149. case NETDEV_CHANGE:
  1150. flags = dev_get_flags(dev);
  1151. if (flags & (IFF_RUNNING | IFF_LOWER_UP))
  1152. mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
  1153. else
  1154. mpls_ifdown(dev, event);
  1155. break;
  1156. case NETDEV_UNREGISTER:
  1157. mpls_ifdown(dev, event);
  1158. mdev = mpls_dev_get(dev);
  1159. if (mdev) {
  1160. mpls_dev_sysctl_unregister(mdev);
  1161. RCU_INIT_POINTER(dev->mpls_ptr, NULL);
  1162. call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
  1163. }
  1164. break;
  1165. case NETDEV_CHANGENAME:
  1166. mdev = mpls_dev_get(dev);
  1167. if (mdev) {
  1168. int err;
  1169. mpls_dev_sysctl_unregister(mdev);
  1170. err = mpls_dev_sysctl_register(dev, mdev);
  1171. if (err)
  1172. return notifier_from_errno(err);
  1173. }
  1174. break;
  1175. }
  1176. return NOTIFY_OK;
  1177. }
  1178. static struct notifier_block mpls_dev_notifier = {
  1179. .notifier_call = mpls_dev_notify,
  1180. };
  1181. static int nla_put_via(struct sk_buff *skb,
  1182. u8 table, const void *addr, int alen)
  1183. {
  1184. static const int table_to_family[NEIGH_NR_TABLES + 1] = {
  1185. AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
  1186. };
  1187. struct nlattr *nla;
  1188. struct rtvia *via;
  1189. int family = AF_UNSPEC;
  1190. nla = nla_reserve(skb, RTA_VIA, alen + 2);
  1191. if (!nla)
  1192. return -EMSGSIZE;
  1193. if (table <= NEIGH_NR_TABLES)
  1194. family = table_to_family[table];
  1195. via = nla_data(nla);
  1196. via->rtvia_family = family;
  1197. memcpy(via->rtvia_addr, addr, alen);
  1198. return 0;
  1199. }
  1200. int nla_put_labels(struct sk_buff *skb, int attrtype,
  1201. u8 labels, const u32 label[])
  1202. {
  1203. struct nlattr *nla;
  1204. struct mpls_shim_hdr *nla_label;
  1205. bool bos;
  1206. int i;
  1207. nla = nla_reserve(skb, attrtype, labels*4);
  1208. if (!nla)
  1209. return -EMSGSIZE;
  1210. nla_label = nla_data(nla);
  1211. bos = true;
  1212. for (i = labels - 1; i >= 0; i--) {
  1213. nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos);
  1214. bos = false;
  1215. }
  1216. return 0;
  1217. }
  1218. EXPORT_SYMBOL_GPL(nla_put_labels);
  1219. int nla_get_labels(const struct nlattr *nla,
  1220. u32 max_labels, u8 *labels, u32 label[])
  1221. {
  1222. unsigned len = nla_len(nla);
  1223. unsigned nla_labels;
  1224. struct mpls_shim_hdr *nla_label;
  1225. bool bos;
  1226. int i;
  1227. /* len needs to be an even multiple of 4 (the label size) */
  1228. if (len & 3)
  1229. return -EINVAL;
  1230. /* Limit the number of new labels allowed */
  1231. nla_labels = len/4;
  1232. if (nla_labels > max_labels)
  1233. return -EINVAL;
  1234. nla_label = nla_data(nla);
  1235. bos = true;
  1236. for (i = nla_labels - 1; i >= 0; i--, bos = false) {
  1237. struct mpls_entry_decoded dec;
  1238. dec = mpls_entry_decode(nla_label + i);
  1239. /* Ensure the bottom of stack flag is properly set
  1240. * and ttl and tc are both clear.
  1241. */
  1242. if ((dec.bos != bos) || dec.ttl || dec.tc)
  1243. return -EINVAL;
  1244. switch (dec.label) {
  1245. case MPLS_LABEL_IMPLNULL:
  1246. /* RFC3032: This is a label that an LSR may
  1247. * assign and distribute, but which never
  1248. * actually appears in the encapsulation.
  1249. */
  1250. return -EINVAL;
  1251. }
  1252. label[i] = dec.label;
  1253. }
  1254. *labels = nla_labels;
  1255. return 0;
  1256. }
  1257. EXPORT_SYMBOL_GPL(nla_get_labels);
  1258. int nla_get_via(const struct nlattr *nla, u8 *via_alen,
  1259. u8 *via_table, u8 via_addr[])
  1260. {
  1261. struct rtvia *via = nla_data(nla);
  1262. int err = -EINVAL;
  1263. int alen;
  1264. if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
  1265. goto errout;
  1266. alen = nla_len(nla) -
  1267. offsetof(struct rtvia, rtvia_addr);
  1268. if (alen > MAX_VIA_ALEN)
  1269. goto errout;
  1270. /* Validate the address family */
  1271. switch (via->rtvia_family) {
  1272. case AF_PACKET:
  1273. *via_table = NEIGH_LINK_TABLE;
  1274. break;
  1275. case AF_INET:
  1276. *via_table = NEIGH_ARP_TABLE;
  1277. if (alen != 4)
  1278. goto errout;
  1279. break;
  1280. case AF_INET6:
  1281. *via_table = NEIGH_ND_TABLE;
  1282. if (alen != 16)
  1283. goto errout;
  1284. break;
  1285. default:
  1286. /* Unsupported address family */
  1287. goto errout;
  1288. }
  1289. memcpy(via_addr, via->rtvia_addr, alen);
  1290. *via_alen = alen;
  1291. err = 0;
  1292. errout:
  1293. return err;
  1294. }
  1295. static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
  1296. struct mpls_route_config *cfg)
  1297. {
  1298. struct rtmsg *rtm;
  1299. struct nlattr *tb[RTA_MAX+1];
  1300. int index;
  1301. int err;
  1302. err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
  1303. if (err < 0)
  1304. goto errout;
  1305. err = -EINVAL;
  1306. rtm = nlmsg_data(nlh);
  1307. memset(cfg, 0, sizeof(*cfg));
  1308. if (rtm->rtm_family != AF_MPLS)
  1309. goto errout;
  1310. if (rtm->rtm_dst_len != 20)
  1311. goto errout;
  1312. if (rtm->rtm_src_len != 0)
  1313. goto errout;
  1314. if (rtm->rtm_tos != 0)
  1315. goto errout;
  1316. if (rtm->rtm_table != RT_TABLE_MAIN)
  1317. goto errout;
  1318. /* Any value is acceptable for rtm_protocol */
  1319. /* As mpls uses destination specific addresses
  1320. * (or source specific address in the case of multicast)
  1321. * all addresses have universal scope.
  1322. */
  1323. if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
  1324. goto errout;
  1325. if (rtm->rtm_type != RTN_UNICAST)
  1326. goto errout;
  1327. if (rtm->rtm_flags != 0)
  1328. goto errout;
  1329. cfg->rc_label = LABEL_NOT_SPECIFIED;
  1330. cfg->rc_protocol = rtm->rtm_protocol;
  1331. cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
  1332. cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
  1333. cfg->rc_nlflags = nlh->nlmsg_flags;
  1334. cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
  1335. cfg->rc_nlinfo.nlh = nlh;
  1336. cfg->rc_nlinfo.nl_net = sock_net(skb->sk);
  1337. for (index = 0; index <= RTA_MAX; index++) {
  1338. struct nlattr *nla = tb[index];
  1339. if (!nla)
  1340. continue;
  1341. switch (index) {
  1342. case RTA_OIF:
  1343. cfg->rc_ifindex = nla_get_u32(nla);
  1344. break;
  1345. case RTA_NEWDST:
  1346. if (nla_get_labels(nla, MAX_NEW_LABELS,
  1347. &cfg->rc_output_labels,
  1348. cfg->rc_output_label))
  1349. goto errout;
  1350. break;
  1351. case RTA_DST:
  1352. {
  1353. u8 label_count;
  1354. if (nla_get_labels(nla, 1, &label_count,
  1355. &cfg->rc_label))
  1356. goto errout;
  1357. /* Reserved labels may not be set */
  1358. if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
  1359. goto errout;
  1360. break;
  1361. }
  1362. case RTA_VIA:
  1363. {
  1364. if (nla_get_via(nla, &cfg->rc_via_alen,
  1365. &cfg->rc_via_table, cfg->rc_via))
  1366. goto errout;
  1367. break;
  1368. }
  1369. case RTA_MULTIPATH:
  1370. {
  1371. cfg->rc_mp = nla_data(nla);
  1372. cfg->rc_mp_len = nla_len(nla);
  1373. break;
  1374. }
  1375. case RTA_TTL_PROPAGATE:
  1376. {
  1377. u8 ttl_propagate = nla_get_u8(nla);
  1378. if (ttl_propagate > 1)
  1379. goto errout;
  1380. cfg->rc_ttl_propagate = ttl_propagate ?
  1381. MPLS_TTL_PROP_ENABLED :
  1382. MPLS_TTL_PROP_DISABLED;
  1383. break;
  1384. }
  1385. default:
  1386. /* Unsupported attribute */
  1387. goto errout;
  1388. }
  1389. }
  1390. err = 0;
  1391. errout:
  1392. return err;
  1393. }
  1394. static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
  1395. {
  1396. struct mpls_route_config cfg;
  1397. int err;
  1398. err = rtm_to_route_config(skb, nlh, &cfg);
  1399. if (err < 0)
  1400. return err;
  1401. return mpls_route_del(&cfg);
  1402. }
  1403. static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
  1404. {
  1405. struct mpls_route_config cfg;
  1406. int err;
  1407. err = rtm_to_route_config(skb, nlh, &cfg);
  1408. if (err < 0)
  1409. return err;
  1410. return mpls_route_add(&cfg);
  1411. }
  1412. static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
  1413. u32 label, struct mpls_route *rt, int flags)
  1414. {
  1415. struct net_device *dev;
  1416. struct nlmsghdr *nlh;
  1417. struct rtmsg *rtm;
  1418. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
  1419. if (nlh == NULL)
  1420. return -EMSGSIZE;
  1421. rtm = nlmsg_data(nlh);
  1422. rtm->rtm_family = AF_MPLS;
  1423. rtm->rtm_dst_len = 20;
  1424. rtm->rtm_src_len = 0;
  1425. rtm->rtm_tos = 0;
  1426. rtm->rtm_table = RT_TABLE_MAIN;
  1427. rtm->rtm_protocol = rt->rt_protocol;
  1428. rtm->rtm_scope = RT_SCOPE_UNIVERSE;
  1429. rtm->rtm_type = RTN_UNICAST;
  1430. rtm->rtm_flags = 0;
  1431. if (nla_put_labels(skb, RTA_DST, 1, &label))
  1432. goto nla_put_failure;
  1433. if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
  1434. bool ttl_propagate =
  1435. rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
  1436. if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
  1437. ttl_propagate))
  1438. goto nla_put_failure;
  1439. }
  1440. if (rt->rt_nhn == 1) {
  1441. const struct mpls_nh *nh = rt->rt_nh;
  1442. if (nh->nh_labels &&
  1443. nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
  1444. nh->nh_label))
  1445. goto nla_put_failure;
  1446. if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
  1447. nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
  1448. nh->nh_via_alen))
  1449. goto nla_put_failure;
  1450. dev = rtnl_dereference(nh->nh_dev);
  1451. if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
  1452. goto nla_put_failure;
  1453. if (nh->nh_flags & RTNH_F_LINKDOWN)
  1454. rtm->rtm_flags |= RTNH_F_LINKDOWN;
  1455. if (nh->nh_flags & RTNH_F_DEAD)
  1456. rtm->rtm_flags |= RTNH_F_DEAD;
  1457. } else {
  1458. struct rtnexthop *rtnh;
  1459. struct nlattr *mp;
  1460. int dead = 0;
  1461. int linkdown = 0;
  1462. mp = nla_nest_start(skb, RTA_MULTIPATH);
  1463. if (!mp)
  1464. goto nla_put_failure;
  1465. for_nexthops(rt) {
  1466. rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
  1467. if (!rtnh)
  1468. goto nla_put_failure;
  1469. dev = rtnl_dereference(nh->nh_dev);
  1470. if (dev)
  1471. rtnh->rtnh_ifindex = dev->ifindex;
  1472. if (nh->nh_flags & RTNH_F_LINKDOWN) {
  1473. rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
  1474. linkdown++;
  1475. }
  1476. if (nh->nh_flags & RTNH_F_DEAD) {
  1477. rtnh->rtnh_flags |= RTNH_F_DEAD;
  1478. dead++;
  1479. }
  1480. if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
  1481. nh->nh_labels,
  1482. nh->nh_label))
  1483. goto nla_put_failure;
  1484. if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
  1485. nla_put_via(skb, nh->nh_via_table,
  1486. mpls_nh_via(rt, nh),
  1487. nh->nh_via_alen))
  1488. goto nla_put_failure;
  1489. /* length of rtnetlink header + attributes */
  1490. rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
  1491. } endfor_nexthops(rt);
  1492. if (linkdown == rt->rt_nhn)
  1493. rtm->rtm_flags |= RTNH_F_LINKDOWN;
  1494. if (dead == rt->rt_nhn)
  1495. rtm->rtm_flags |= RTNH_F_DEAD;
  1496. nla_nest_end(skb, mp);
  1497. }
  1498. nlmsg_end(skb, nlh);
  1499. return 0;
  1500. nla_put_failure:
  1501. nlmsg_cancel(skb, nlh);
  1502. return -EMSGSIZE;
  1503. }
  1504. static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
  1505. {
  1506. struct net *net = sock_net(skb->sk);
  1507. struct mpls_route __rcu **platform_label;
  1508. size_t platform_labels;
  1509. unsigned int index;
  1510. ASSERT_RTNL();
  1511. index = cb->args[0];
  1512. if (index < MPLS_LABEL_FIRST_UNRESERVED)
  1513. index = MPLS_LABEL_FIRST_UNRESERVED;
  1514. platform_label = rtnl_dereference(net->mpls.platform_label);
  1515. platform_labels = net->mpls.platform_labels;
  1516. for (; index < platform_labels; index++) {
  1517. struct mpls_route *rt;
  1518. rt = rtnl_dereference(platform_label[index]);
  1519. if (!rt)
  1520. continue;
  1521. if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
  1522. cb->nlh->nlmsg_seq, RTM_NEWROUTE,
  1523. index, rt, NLM_F_MULTI) < 0)
  1524. break;
  1525. }
  1526. cb->args[0] = index;
  1527. return skb->len;
  1528. }
  1529. static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
  1530. {
  1531. size_t payload =
  1532. NLMSG_ALIGN(sizeof(struct rtmsg))
  1533. + nla_total_size(4) /* RTA_DST */
  1534. + nla_total_size(1); /* RTA_TTL_PROPAGATE */
  1535. if (rt->rt_nhn == 1) {
  1536. struct mpls_nh *nh = rt->rt_nh;
  1537. if (nh->nh_dev)
  1538. payload += nla_total_size(4); /* RTA_OIF */
  1539. if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */
  1540. payload += nla_total_size(2 + nh->nh_via_alen);
  1541. if (nh->nh_labels) /* RTA_NEWDST */
  1542. payload += nla_total_size(nh->nh_labels * 4);
  1543. } else {
  1544. /* each nexthop is packed in an attribute */
  1545. size_t nhsize = 0;
  1546. for_nexthops(rt) {
  1547. nhsize += nla_total_size(sizeof(struct rtnexthop));
  1548. /* RTA_VIA */
  1549. if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
  1550. nhsize += nla_total_size(2 + nh->nh_via_alen);
  1551. if (nh->nh_labels)
  1552. nhsize += nla_total_size(nh->nh_labels * 4);
  1553. } endfor_nexthops(rt);
  1554. /* nested attribute */
  1555. payload += nla_total_size(nhsize);
  1556. }
  1557. return payload;
  1558. }
  1559. static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
  1560. struct nlmsghdr *nlh, struct net *net, u32 portid,
  1561. unsigned int nlm_flags)
  1562. {
  1563. struct sk_buff *skb;
  1564. u32 seq = nlh ? nlh->nlmsg_seq : 0;
  1565. int err = -ENOBUFS;
  1566. skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
  1567. if (skb == NULL)
  1568. goto errout;
  1569. err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
  1570. if (err < 0) {
  1571. /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
  1572. WARN_ON(err == -EMSGSIZE);
  1573. kfree_skb(skb);
  1574. goto errout;
  1575. }
  1576. rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
  1577. return;
  1578. errout:
  1579. if (err < 0)
  1580. rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
  1581. }
  1582. static int resize_platform_label_table(struct net *net, size_t limit)
  1583. {
  1584. size_t size = sizeof(struct mpls_route *) * limit;
  1585. size_t old_limit;
  1586. size_t cp_size;
  1587. struct mpls_route __rcu **labels = NULL, **old;
  1588. struct mpls_route *rt0 = NULL, *rt2 = NULL;
  1589. unsigned index;
  1590. if (size) {
  1591. labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
  1592. if (!labels)
  1593. labels = vzalloc(size);
  1594. if (!labels)
  1595. goto nolabels;
  1596. }
  1597. /* In case the predefined labels need to be populated */
  1598. if (limit > MPLS_LABEL_IPV4NULL) {
  1599. struct net_device *lo = net->loopback_dev;
  1600. rt0 = mpls_rt_alloc(1, lo->addr_len);
  1601. if (!rt0)
  1602. goto nort0;
  1603. RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
  1604. rt0->rt_protocol = RTPROT_KERNEL;
  1605. rt0->rt_payload_type = MPT_IPV4;
  1606. rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
  1607. rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
  1608. rt0->rt_nh->nh_via_alen = lo->addr_len;
  1609. memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
  1610. lo->addr_len);
  1611. }
  1612. if (limit > MPLS_LABEL_IPV6NULL) {
  1613. struct net_device *lo = net->loopback_dev;
  1614. rt2 = mpls_rt_alloc(1, lo->addr_len);
  1615. if (!rt2)
  1616. goto nort2;
  1617. RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
  1618. rt2->rt_protocol = RTPROT_KERNEL;
  1619. rt2->rt_payload_type = MPT_IPV6;
  1620. rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
  1621. rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
  1622. rt2->rt_nh->nh_via_alen = lo->addr_len;
  1623. memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
  1624. lo->addr_len);
  1625. }
  1626. rtnl_lock();
  1627. /* Remember the original table */
  1628. old = rtnl_dereference(net->mpls.platform_label);
  1629. old_limit = net->mpls.platform_labels;
  1630. /* Free any labels beyond the new table */
  1631. for (index = limit; index < old_limit; index++)
  1632. mpls_route_update(net, index, NULL, NULL);
  1633. /* Copy over the old labels */
  1634. cp_size = size;
  1635. if (old_limit < limit)
  1636. cp_size = old_limit * sizeof(struct mpls_route *);
  1637. memcpy(labels, old, cp_size);
  1638. /* If needed set the predefined labels */
  1639. if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
  1640. (limit > MPLS_LABEL_IPV6NULL)) {
  1641. RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
  1642. rt2 = NULL;
  1643. }
  1644. if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
  1645. (limit > MPLS_LABEL_IPV4NULL)) {
  1646. RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
  1647. rt0 = NULL;
  1648. }
  1649. /* Update the global pointers */
  1650. net->mpls.platform_labels = limit;
  1651. rcu_assign_pointer(net->mpls.platform_label, labels);
  1652. rtnl_unlock();
  1653. mpls_rt_free(rt2);
  1654. mpls_rt_free(rt0);
  1655. if (old) {
  1656. synchronize_rcu();
  1657. kvfree(old);
  1658. }
  1659. return 0;
  1660. nort2:
  1661. mpls_rt_free(rt0);
  1662. nort0:
  1663. kvfree(labels);
  1664. nolabels:
  1665. return -ENOMEM;
  1666. }
  1667. static int mpls_platform_labels(struct ctl_table *table, int write,
  1668. void __user *buffer, size_t *lenp, loff_t *ppos)
  1669. {
  1670. struct net *net = table->data;
  1671. int platform_labels = net->mpls.platform_labels;
  1672. int ret;
  1673. struct ctl_table tmp = {
  1674. .procname = table->procname,
  1675. .data = &platform_labels,
  1676. .maxlen = sizeof(int),
  1677. .mode = table->mode,
  1678. .extra1 = &zero,
  1679. .extra2 = &label_limit,
  1680. };
  1681. ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  1682. if (write && ret == 0)
  1683. ret = resize_platform_label_table(net, platform_labels);
  1684. return ret;
  1685. }
  1686. #define MPLS_NS_SYSCTL_OFFSET(field) \
  1687. (&((struct net *)0)->field)
  1688. static const struct ctl_table mpls_table[] = {
  1689. {
  1690. .procname = "platform_labels",
  1691. .data = NULL,
  1692. .maxlen = sizeof(int),
  1693. .mode = 0644,
  1694. .proc_handler = mpls_platform_labels,
  1695. },
  1696. {
  1697. .procname = "ip_ttl_propagate",
  1698. .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
  1699. .maxlen = sizeof(int),
  1700. .mode = 0644,
  1701. .proc_handler = proc_dointvec_minmax,
  1702. .extra1 = &zero,
  1703. .extra2 = &one,
  1704. },
  1705. { }
  1706. };
  1707. static int mpls_net_init(struct net *net)
  1708. {
  1709. struct ctl_table *table;
  1710. int i;
  1711. net->mpls.platform_labels = 0;
  1712. net->mpls.platform_label = NULL;
  1713. net->mpls.ip_ttl_propagate = 1;
  1714. table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
  1715. if (table == NULL)
  1716. return -ENOMEM;
  1717. /* Table data contains only offsets relative to the base of
  1718. * the mdev at this point, so make them absolute.
  1719. */
  1720. for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
  1721. table[i].data = (char *)net + (uintptr_t)table[i].data;
  1722. net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
  1723. if (net->mpls.ctl == NULL) {
  1724. kfree(table);
  1725. return -ENOMEM;
  1726. }
  1727. return 0;
  1728. }
  1729. static void mpls_net_exit(struct net *net)
  1730. {
  1731. struct mpls_route __rcu **platform_label;
  1732. size_t platform_labels;
  1733. struct ctl_table *table;
  1734. unsigned int index;
  1735. table = net->mpls.ctl->ctl_table_arg;
  1736. unregister_net_sysctl_table(net->mpls.ctl);
  1737. kfree(table);
  1738. /* An rcu grace period has passed since there was a device in
  1739. * the network namespace (and thus the last in flight packet)
  1740. * left this network namespace. This is because
  1741. * unregister_netdevice_many and netdev_run_todo has completed
  1742. * for each network device that was in this network namespace.
  1743. *
  1744. * As such no additional rcu synchronization is necessary when
  1745. * freeing the platform_label table.
  1746. */
  1747. rtnl_lock();
  1748. platform_label = rtnl_dereference(net->mpls.platform_label);
  1749. platform_labels = net->mpls.platform_labels;
  1750. for (index = 0; index < platform_labels; index++) {
  1751. struct mpls_route *rt = rtnl_dereference(platform_label[index]);
  1752. RCU_INIT_POINTER(platform_label[index], NULL);
  1753. mpls_rt_free(rt);
  1754. }
  1755. rtnl_unlock();
  1756. kvfree(platform_label);
  1757. }
  1758. static struct pernet_operations mpls_net_ops = {
  1759. .init = mpls_net_init,
  1760. .exit = mpls_net_exit,
  1761. };
  1762. static struct rtnl_af_ops mpls_af_ops __read_mostly = {
  1763. .family = AF_MPLS,
  1764. .fill_stats_af = mpls_fill_stats_af,
  1765. .get_stats_af_size = mpls_get_stats_af_size,
  1766. };
  1767. static int __init mpls_init(void)
  1768. {
  1769. int err;
  1770. BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
  1771. err = register_pernet_subsys(&mpls_net_ops);
  1772. if (err)
  1773. goto out;
  1774. err = register_netdevice_notifier(&mpls_dev_notifier);
  1775. if (err)
  1776. goto out_unregister_pernet;
  1777. dev_add_pack(&mpls_packet_type);
  1778. rtnl_af_register(&mpls_af_ops);
  1779. rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
  1780. rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
  1781. rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL);
  1782. rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
  1783. mpls_netconf_dump_devconf, NULL);
  1784. err = 0;
  1785. out:
  1786. return err;
  1787. out_unregister_pernet:
  1788. unregister_pernet_subsys(&mpls_net_ops);
  1789. goto out;
  1790. }
  1791. module_init(mpls_init);
  1792. static void __exit mpls_exit(void)
  1793. {
  1794. rtnl_unregister_all(PF_MPLS);
  1795. rtnl_af_unregister(&mpls_af_ops);
  1796. dev_remove_pack(&mpls_packet_type);
  1797. unregister_netdevice_notifier(&mpls_dev_notifier);
  1798. unregister_pernet_subsys(&mpls_net_ops);
  1799. }
  1800. module_exit(mpls_exit);
  1801. MODULE_DESCRIPTION("MultiProtocol Label Switching");
  1802. MODULE_LICENSE("GPL v2");
  1803. MODULE_ALIAS_NETPROTO(PF_MPLS);