addr.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. /*
  2. * Copyright (c) 2005 Voltaire Inc. All rights reserved.
  3. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  4. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  5. * Copyright (c) 2005 Intel Corporation. All rights reserved.
  6. *
  7. * This software is available to you under a choice of one of two
  8. * licenses. You may choose to be licensed under the terms of the GNU
  9. * General Public License (GPL) Version 2, available from the file
  10. * COPYING in the main directory of this source tree, or the
  11. * OpenIB.org BSD license below:
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer.
  20. *
  21. * - Redistributions in binary form must reproduce the above
  22. * copyright notice, this list of conditions and the following
  23. * disclaimer in the documentation and/or other materials
  24. * provided with the distribution.
  25. *
  26. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33. * SOFTWARE.
  34. */
  35. #include <linux/mutex.h>
  36. #include <linux/inetdevice.h>
  37. #include <linux/slab.h>
  38. #include <linux/workqueue.h>
  39. #include <linux/module.h>
  40. #include <net/arp.h>
  41. #include <net/neighbour.h>
  42. #include <net/route.h>
  43. #include <net/netevent.h>
  44. #include <net/addrconf.h>
  45. #include <net/ip6_route.h>
  46. #include <rdma/ib_addr.h>
  47. #include <rdma/ib.h>
  48. struct addr_req {
  49. struct list_head list;
  50. struct sockaddr_storage src_addr;
  51. struct sockaddr_storage dst_addr;
  52. struct rdma_dev_addr *addr;
  53. struct rdma_addr_client *client;
  54. void *context;
  55. void (*callback)(int status, struct sockaddr *src_addr,
  56. struct rdma_dev_addr *addr, void *context);
  57. unsigned long timeout;
  58. int status;
  59. };
  60. static void process_req(struct work_struct *work);
  61. static DEFINE_MUTEX(lock);
  62. static LIST_HEAD(req_list);
  63. static DECLARE_DELAYED_WORK(work, process_req);
  64. static struct workqueue_struct *addr_wq;
  65. int rdma_addr_size(struct sockaddr *addr)
  66. {
  67. switch (addr->sa_family) {
  68. case AF_INET:
  69. return sizeof(struct sockaddr_in);
  70. case AF_INET6:
  71. return sizeof(struct sockaddr_in6);
  72. case AF_IB:
  73. return sizeof(struct sockaddr_ib);
  74. default:
  75. return 0;
  76. }
  77. }
  78. EXPORT_SYMBOL(rdma_addr_size);
  79. static struct rdma_addr_client self;
  80. void rdma_addr_register_client(struct rdma_addr_client *client)
  81. {
  82. atomic_set(&client->refcount, 1);
  83. init_completion(&client->comp);
  84. }
  85. EXPORT_SYMBOL(rdma_addr_register_client);
  86. static inline void put_client(struct rdma_addr_client *client)
  87. {
  88. if (atomic_dec_and_test(&client->refcount))
  89. complete(&client->comp);
  90. }
  91. void rdma_addr_unregister_client(struct rdma_addr_client *client)
  92. {
  93. put_client(client);
  94. wait_for_completion(&client->comp);
  95. }
  96. EXPORT_SYMBOL(rdma_addr_unregister_client);
  97. int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
  98. const unsigned char *dst_dev_addr)
  99. {
  100. dev_addr->dev_type = dev->type;
  101. memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
  102. memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
  103. if (dst_dev_addr)
  104. memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
  105. dev_addr->bound_dev_if = dev->ifindex;
  106. return 0;
  107. }
  108. EXPORT_SYMBOL(rdma_copy_addr);
  109. int rdma_translate_ip(const struct sockaddr *addr,
  110. struct rdma_dev_addr *dev_addr,
  111. u16 *vlan_id)
  112. {
  113. struct net_device *dev;
  114. int ret = -EADDRNOTAVAIL;
  115. if (dev_addr->bound_dev_if) {
  116. dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
  117. if (!dev)
  118. return -ENODEV;
  119. ret = rdma_copy_addr(dev_addr, dev, NULL);
  120. dev_put(dev);
  121. return ret;
  122. }
  123. switch (addr->sa_family) {
  124. case AF_INET:
  125. dev = ip_dev_find(dev_addr->net,
  126. ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
  127. if (!dev)
  128. return ret;
  129. ret = rdma_copy_addr(dev_addr, dev, NULL);
  130. if (vlan_id)
  131. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  132. dev_put(dev);
  133. break;
  134. #if IS_ENABLED(CONFIG_IPV6)
  135. case AF_INET6:
  136. rcu_read_lock();
  137. for_each_netdev_rcu(dev_addr->net, dev) {
  138. if (ipv6_chk_addr(dev_addr->net,
  139. &((const struct sockaddr_in6 *)addr)->sin6_addr,
  140. dev, 1)) {
  141. ret = rdma_copy_addr(dev_addr, dev, NULL);
  142. if (vlan_id)
  143. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  144. break;
  145. }
  146. }
  147. rcu_read_unlock();
  148. break;
  149. #endif
  150. }
  151. return ret;
  152. }
  153. EXPORT_SYMBOL(rdma_translate_ip);
  154. static void set_timeout(unsigned long time)
  155. {
  156. unsigned long delay;
  157. delay = time - jiffies;
  158. if ((long)delay < 0)
  159. delay = 0;
  160. mod_delayed_work(addr_wq, &work, delay);
  161. }
  162. static void queue_req(struct addr_req *req)
  163. {
  164. struct addr_req *temp_req;
  165. mutex_lock(&lock);
  166. list_for_each_entry_reverse(temp_req, &req_list, list) {
  167. if (time_after_eq(req->timeout, temp_req->timeout))
  168. break;
  169. }
  170. list_add(&req->list, &temp_req->list);
  171. if (req_list.next == &req->list)
  172. set_timeout(req->timeout);
  173. mutex_unlock(&lock);
  174. }
  175. static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
  176. const void *daddr)
  177. {
  178. struct neighbour *n;
  179. int ret;
  180. n = dst_neigh_lookup(dst, daddr);
  181. rcu_read_lock();
  182. if (!n || !(n->nud_state & NUD_VALID)) {
  183. if (n)
  184. neigh_event_send(n, NULL);
  185. ret = -ENODATA;
  186. } else {
  187. ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
  188. }
  189. rcu_read_unlock();
  190. if (n)
  191. neigh_release(n);
  192. return ret;
  193. }
  194. static int addr4_resolve(struct sockaddr_in *src_in,
  195. const struct sockaddr_in *dst_in,
  196. struct rdma_dev_addr *addr,
  197. struct rtable **prt)
  198. {
  199. __be32 src_ip = src_in->sin_addr.s_addr;
  200. __be32 dst_ip = dst_in->sin_addr.s_addr;
  201. struct rtable *rt;
  202. struct flowi4 fl4;
  203. int ret;
  204. memset(&fl4, 0, sizeof(fl4));
  205. fl4.daddr = dst_ip;
  206. fl4.saddr = src_ip;
  207. fl4.flowi4_oif = addr->bound_dev_if;
  208. rt = ip_route_output_key(addr->net, &fl4);
  209. if (IS_ERR(rt)) {
  210. ret = PTR_ERR(rt);
  211. goto out;
  212. }
  213. src_in->sin_family = AF_INET;
  214. src_in->sin_addr.s_addr = fl4.saddr;
  215. /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
  216. * routable) and we could set the network type accordingly.
  217. */
  218. if (rt->rt_uses_gateway)
  219. addr->network = RDMA_NETWORK_IPV4;
  220. addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
  221. *prt = rt;
  222. return 0;
  223. out:
  224. return ret;
  225. }
  226. #if IS_ENABLED(CONFIG_IPV6)
  227. static int addr6_resolve(struct sockaddr_in6 *src_in,
  228. const struct sockaddr_in6 *dst_in,
  229. struct rdma_dev_addr *addr,
  230. struct dst_entry **pdst)
  231. {
  232. struct flowi6 fl6;
  233. struct dst_entry *dst;
  234. struct rt6_info *rt;
  235. int ret;
  236. memset(&fl6, 0, sizeof fl6);
  237. fl6.daddr = dst_in->sin6_addr;
  238. fl6.saddr = src_in->sin6_addr;
  239. fl6.flowi6_oif = addr->bound_dev_if;
  240. dst = ip6_route_output(addr->net, NULL, &fl6);
  241. if ((ret = dst->error))
  242. goto put;
  243. rt = (struct rt6_info *)dst;
  244. if (ipv6_addr_any(&fl6.saddr)) {
  245. ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
  246. &fl6.daddr, 0, &fl6.saddr);
  247. if (ret)
  248. goto put;
  249. src_in->sin6_family = AF_INET6;
  250. src_in->sin6_addr = fl6.saddr;
  251. }
  252. /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
  253. * routable) and we could set the network type accordingly.
  254. */
  255. if (rt->rt6i_flags & RTF_GATEWAY)
  256. addr->network = RDMA_NETWORK_IPV6;
  257. addr->hoplimit = ip6_dst_hoplimit(dst);
  258. *pdst = dst;
  259. return 0;
  260. put:
  261. dst_release(dst);
  262. return ret;
  263. }
  264. #else
  265. static int addr6_resolve(struct sockaddr_in6 *src_in,
  266. const struct sockaddr_in6 *dst_in,
  267. struct rdma_dev_addr *addr,
  268. struct dst_entry **pdst)
  269. {
  270. return -EADDRNOTAVAIL;
  271. }
  272. #endif
  273. static int addr_resolve_neigh(struct dst_entry *dst,
  274. const struct sockaddr *dst_in,
  275. struct rdma_dev_addr *addr)
  276. {
  277. if (dst->dev->flags & IFF_LOOPBACK) {
  278. int ret;
  279. ret = rdma_translate_ip(dst_in, addr, NULL);
  280. if (!ret)
  281. memcpy(addr->dst_dev_addr, addr->src_dev_addr,
  282. MAX_ADDR_LEN);
  283. return ret;
  284. }
  285. /* If the device doesn't do ARP internally */
  286. if (!(dst->dev->flags & IFF_NOARP)) {
  287. const struct sockaddr_in *dst_in4 =
  288. (const struct sockaddr_in *)dst_in;
  289. const struct sockaddr_in6 *dst_in6 =
  290. (const struct sockaddr_in6 *)dst_in;
  291. return dst_fetch_ha(dst, addr,
  292. dst_in->sa_family == AF_INET ?
  293. (const void *)&dst_in4->sin_addr.s_addr :
  294. (const void *)&dst_in6->sin6_addr);
  295. }
  296. return rdma_copy_addr(addr, dst->dev, NULL);
  297. }
  298. static int addr_resolve(struct sockaddr *src_in,
  299. const struct sockaddr *dst_in,
  300. struct rdma_dev_addr *addr,
  301. bool resolve_neigh)
  302. {
  303. struct net_device *ndev;
  304. struct dst_entry *dst;
  305. int ret;
  306. if (src_in->sa_family == AF_INET) {
  307. struct rtable *rt = NULL;
  308. const struct sockaddr_in *dst_in4 =
  309. (const struct sockaddr_in *)dst_in;
  310. ret = addr4_resolve((struct sockaddr_in *)src_in,
  311. dst_in4, addr, &rt);
  312. if (ret)
  313. return ret;
  314. if (resolve_neigh)
  315. ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
  316. ndev = rt->dst.dev;
  317. dev_hold(ndev);
  318. ip_rt_put(rt);
  319. } else {
  320. const struct sockaddr_in6 *dst_in6 =
  321. (const struct sockaddr_in6 *)dst_in;
  322. ret = addr6_resolve((struct sockaddr_in6 *)src_in,
  323. dst_in6, addr,
  324. &dst);
  325. if (ret)
  326. return ret;
  327. if (resolve_neigh)
  328. ret = addr_resolve_neigh(dst, dst_in, addr);
  329. ndev = dst->dev;
  330. dev_hold(ndev);
  331. dst_release(dst);
  332. }
  333. addr->bound_dev_if = ndev->ifindex;
  334. addr->net = dev_net(ndev);
  335. dev_put(ndev);
  336. return ret;
  337. }
  338. static void process_req(struct work_struct *work)
  339. {
  340. struct addr_req *req, *temp_req;
  341. struct sockaddr *src_in, *dst_in;
  342. struct list_head done_list;
  343. INIT_LIST_HEAD(&done_list);
  344. mutex_lock(&lock);
  345. list_for_each_entry_safe(req, temp_req, &req_list, list) {
  346. if (req->status == -ENODATA) {
  347. src_in = (struct sockaddr *) &req->src_addr;
  348. dst_in = (struct sockaddr *) &req->dst_addr;
  349. req->status = addr_resolve(src_in, dst_in, req->addr,
  350. true);
  351. if (req->status && time_after_eq(jiffies, req->timeout))
  352. req->status = -ETIMEDOUT;
  353. else if (req->status == -ENODATA)
  354. continue;
  355. }
  356. list_move_tail(&req->list, &done_list);
  357. }
  358. if (!list_empty(&req_list)) {
  359. req = list_entry(req_list.next, struct addr_req, list);
  360. set_timeout(req->timeout);
  361. }
  362. mutex_unlock(&lock);
  363. list_for_each_entry_safe(req, temp_req, &done_list, list) {
  364. list_del(&req->list);
  365. req->callback(req->status, (struct sockaddr *) &req->src_addr,
  366. req->addr, req->context);
  367. put_client(req->client);
  368. kfree(req);
  369. }
  370. }
  371. int rdma_resolve_ip(struct rdma_addr_client *client,
  372. struct sockaddr *src_addr, struct sockaddr *dst_addr,
  373. struct rdma_dev_addr *addr, int timeout_ms,
  374. void (*callback)(int status, struct sockaddr *src_addr,
  375. struct rdma_dev_addr *addr, void *context),
  376. void *context)
  377. {
  378. struct sockaddr *src_in, *dst_in;
  379. struct addr_req *req;
  380. int ret = 0;
  381. req = kzalloc(sizeof *req, GFP_KERNEL);
  382. if (!req)
  383. return -ENOMEM;
  384. src_in = (struct sockaddr *) &req->src_addr;
  385. dst_in = (struct sockaddr *) &req->dst_addr;
  386. if (src_addr) {
  387. if (src_addr->sa_family != dst_addr->sa_family) {
  388. ret = -EINVAL;
  389. goto err;
  390. }
  391. memcpy(src_in, src_addr, rdma_addr_size(src_addr));
  392. } else {
  393. src_in->sa_family = dst_addr->sa_family;
  394. }
  395. memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
  396. req->addr = addr;
  397. req->callback = callback;
  398. req->context = context;
  399. req->client = client;
  400. atomic_inc(&client->refcount);
  401. req->status = addr_resolve(src_in, dst_in, addr, true);
  402. switch (req->status) {
  403. case 0:
  404. req->timeout = jiffies;
  405. queue_req(req);
  406. break;
  407. case -ENODATA:
  408. req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
  409. queue_req(req);
  410. break;
  411. default:
  412. ret = req->status;
  413. atomic_dec(&client->refcount);
  414. goto err;
  415. }
  416. return ret;
  417. err:
  418. kfree(req);
  419. return ret;
  420. }
  421. EXPORT_SYMBOL(rdma_resolve_ip);
  422. int rdma_resolve_ip_route(struct sockaddr *src_addr,
  423. const struct sockaddr *dst_addr,
  424. struct rdma_dev_addr *addr)
  425. {
  426. struct sockaddr_storage ssrc_addr = {};
  427. struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
  428. if (src_addr) {
  429. if (src_addr->sa_family != dst_addr->sa_family)
  430. return -EINVAL;
  431. memcpy(src_in, src_addr, rdma_addr_size(src_addr));
  432. } else {
  433. src_in->sa_family = dst_addr->sa_family;
  434. }
  435. return addr_resolve(src_in, dst_addr, addr, false);
  436. }
  437. EXPORT_SYMBOL(rdma_resolve_ip_route);
  438. void rdma_addr_cancel(struct rdma_dev_addr *addr)
  439. {
  440. struct addr_req *req, *temp_req;
  441. mutex_lock(&lock);
  442. list_for_each_entry_safe(req, temp_req, &req_list, list) {
  443. if (req->addr == addr) {
  444. req->status = -ECANCELED;
  445. req->timeout = jiffies;
  446. list_move(&req->list, &req_list);
  447. set_timeout(req->timeout);
  448. break;
  449. }
  450. }
  451. mutex_unlock(&lock);
  452. }
  453. EXPORT_SYMBOL(rdma_addr_cancel);
  454. struct resolve_cb_context {
  455. struct rdma_dev_addr *addr;
  456. struct completion comp;
  457. };
  458. static void resolve_cb(int status, struct sockaddr *src_addr,
  459. struct rdma_dev_addr *addr, void *context)
  460. {
  461. memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
  462. rdma_dev_addr));
  463. complete(&((struct resolve_cb_context *)context)->comp);
  464. }
  465. int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
  466. const union ib_gid *dgid,
  467. u8 *dmac, u16 *vlan_id, int *if_index,
  468. int *hoplimit)
  469. {
  470. int ret = 0;
  471. struct rdma_dev_addr dev_addr;
  472. struct resolve_cb_context ctx;
  473. struct net_device *dev;
  474. union {
  475. struct sockaddr _sockaddr;
  476. struct sockaddr_in _sockaddr_in;
  477. struct sockaddr_in6 _sockaddr_in6;
  478. } sgid_addr, dgid_addr;
  479. rdma_gid2ip(&sgid_addr._sockaddr, sgid);
  480. rdma_gid2ip(&dgid_addr._sockaddr, dgid);
  481. memset(&dev_addr, 0, sizeof(dev_addr));
  482. if (if_index)
  483. dev_addr.bound_dev_if = *if_index;
  484. dev_addr.net = &init_net;
  485. ctx.addr = &dev_addr;
  486. init_completion(&ctx.comp);
  487. ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
  488. &dev_addr, 1000, resolve_cb, &ctx);
  489. if (ret)
  490. return ret;
  491. wait_for_completion(&ctx.comp);
  492. memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
  493. dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
  494. if (!dev)
  495. return -ENODEV;
  496. if (if_index)
  497. *if_index = dev_addr.bound_dev_if;
  498. if (vlan_id)
  499. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  500. if (hoplimit)
  501. *hoplimit = dev_addr.hoplimit;
  502. dev_put(dev);
  503. return ret;
  504. }
  505. EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
  506. int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
  507. {
  508. int ret = 0;
  509. struct rdma_dev_addr dev_addr;
  510. union {
  511. struct sockaddr _sockaddr;
  512. struct sockaddr_in _sockaddr_in;
  513. struct sockaddr_in6 _sockaddr_in6;
  514. } gid_addr;
  515. rdma_gid2ip(&gid_addr._sockaddr, sgid);
  516. memset(&dev_addr, 0, sizeof(dev_addr));
  517. dev_addr.net = &init_net;
  518. ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
  519. if (ret)
  520. return ret;
  521. memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
  522. return ret;
  523. }
  524. EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
  525. static int netevent_callback(struct notifier_block *self, unsigned long event,
  526. void *ctx)
  527. {
  528. if (event == NETEVENT_NEIGH_UPDATE) {
  529. struct neighbour *neigh = ctx;
  530. if (neigh->nud_state & NUD_VALID) {
  531. set_timeout(jiffies);
  532. }
  533. }
  534. return 0;
  535. }
  536. static struct notifier_block nb = {
  537. .notifier_call = netevent_callback
  538. };
  539. int addr_init(void)
  540. {
  541. addr_wq = create_singlethread_workqueue("ib_addr");
  542. if (!addr_wq)
  543. return -ENOMEM;
  544. register_netevent_notifier(&nb);
  545. rdma_addr_register_client(&self);
  546. return 0;
  547. }
  548. void addr_cleanup(void)
  549. {
  550. rdma_addr_unregister_client(&self);
  551. unregister_netevent_notifier(&nb);
  552. destroy_workqueue(addr_wq);
  553. }