addr.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. /*
  2. * Copyright (c) 2005 Voltaire Inc. All rights reserved.
  3. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  4. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  5. * Copyright (c) 2005 Intel Corporation. All rights reserved.
  6. *
  7. * This software is available to you under a choice of one of two
  8. * licenses. You may choose to be licensed under the terms of the GNU
  9. * General Public License (GPL) Version 2, available from the file
  10. * COPYING in the main directory of this source tree, or the
  11. * OpenIB.org BSD license below:
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer.
  20. *
  21. * - Redistributions in binary form must reproduce the above
  22. * copyright notice, this list of conditions and the following
  23. * disclaimer in the documentation and/or other materials
  24. * provided with the distribution.
  25. *
  26. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33. * SOFTWARE.
  34. */
  35. #include <linux/mutex.h>
  36. #include <linux/inetdevice.h>
  37. #include <linux/slab.h>
  38. #include <linux/workqueue.h>
  39. #include <linux/module.h>
  40. #include <net/arp.h>
  41. #include <net/neighbour.h>
  42. #include <net/route.h>
  43. #include <net/netevent.h>
  44. #include <net/addrconf.h>
  45. #include <net/ip6_route.h>
  46. #include <rdma/ib_addr.h>
  47. #include <rdma/ib.h>
  48. MODULE_AUTHOR("Sean Hefty");
  49. MODULE_DESCRIPTION("IB Address Translation");
  50. MODULE_LICENSE("Dual BSD/GPL");
  51. struct addr_req {
  52. struct list_head list;
  53. struct sockaddr_storage src_addr;
  54. struct sockaddr_storage dst_addr;
  55. struct rdma_dev_addr *addr;
  56. struct rdma_addr_client *client;
  57. void *context;
  58. void (*callback)(int status, struct sockaddr *src_addr,
  59. struct rdma_dev_addr *addr, void *context);
  60. unsigned long timeout;
  61. int status;
  62. };
  63. static void process_req(struct work_struct *work);
  64. static DEFINE_MUTEX(lock);
  65. static LIST_HEAD(req_list);
  66. static DECLARE_DELAYED_WORK(work, process_req);
  67. static struct workqueue_struct *addr_wq;
  68. int rdma_addr_size(struct sockaddr *addr)
  69. {
  70. switch (addr->sa_family) {
  71. case AF_INET:
  72. return sizeof(struct sockaddr_in);
  73. case AF_INET6:
  74. return sizeof(struct sockaddr_in6);
  75. case AF_IB:
  76. return sizeof(struct sockaddr_ib);
  77. default:
  78. return 0;
  79. }
  80. }
  81. EXPORT_SYMBOL(rdma_addr_size);
  82. static struct rdma_addr_client self;
  83. void rdma_addr_register_client(struct rdma_addr_client *client)
  84. {
  85. atomic_set(&client->refcount, 1);
  86. init_completion(&client->comp);
  87. }
  88. EXPORT_SYMBOL(rdma_addr_register_client);
  89. static inline void put_client(struct rdma_addr_client *client)
  90. {
  91. if (atomic_dec_and_test(&client->refcount))
  92. complete(&client->comp);
  93. }
  94. void rdma_addr_unregister_client(struct rdma_addr_client *client)
  95. {
  96. put_client(client);
  97. wait_for_completion(&client->comp);
  98. }
  99. EXPORT_SYMBOL(rdma_addr_unregister_client);
  100. int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
  101. const unsigned char *dst_dev_addr)
  102. {
  103. dev_addr->dev_type = dev->type;
  104. memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
  105. memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
  106. if (dst_dev_addr)
  107. memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
  108. dev_addr->bound_dev_if = dev->ifindex;
  109. return 0;
  110. }
  111. EXPORT_SYMBOL(rdma_copy_addr);
  112. int rdma_translate_ip(const struct sockaddr *addr,
  113. struct rdma_dev_addr *dev_addr,
  114. u16 *vlan_id)
  115. {
  116. struct net_device *dev;
  117. int ret = -EADDRNOTAVAIL;
  118. if (dev_addr->bound_dev_if) {
  119. dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
  120. if (!dev)
  121. return -ENODEV;
  122. ret = rdma_copy_addr(dev_addr, dev, NULL);
  123. dev_put(dev);
  124. return ret;
  125. }
  126. switch (addr->sa_family) {
  127. case AF_INET:
  128. dev = ip_dev_find(dev_addr->net,
  129. ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
  130. if (!dev)
  131. return ret;
  132. ret = rdma_copy_addr(dev_addr, dev, NULL);
  133. if (vlan_id)
  134. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  135. dev_put(dev);
  136. break;
  137. #if IS_ENABLED(CONFIG_IPV6)
  138. case AF_INET6:
  139. rcu_read_lock();
  140. for_each_netdev_rcu(dev_addr->net, dev) {
  141. if (ipv6_chk_addr(dev_addr->net,
  142. &((const struct sockaddr_in6 *)addr)->sin6_addr,
  143. dev, 1)) {
  144. ret = rdma_copy_addr(dev_addr, dev, NULL);
  145. if (vlan_id)
  146. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  147. break;
  148. }
  149. }
  150. rcu_read_unlock();
  151. break;
  152. #endif
  153. }
  154. return ret;
  155. }
  156. EXPORT_SYMBOL(rdma_translate_ip);
  157. static void set_timeout(unsigned long time)
  158. {
  159. unsigned long delay;
  160. delay = time - jiffies;
  161. if ((long)delay < 0)
  162. delay = 0;
  163. mod_delayed_work(addr_wq, &work, delay);
  164. }
  165. static void queue_req(struct addr_req *req)
  166. {
  167. struct addr_req *temp_req;
  168. mutex_lock(&lock);
  169. list_for_each_entry_reverse(temp_req, &req_list, list) {
  170. if (time_after_eq(req->timeout, temp_req->timeout))
  171. break;
  172. }
  173. list_add(&req->list, &temp_req->list);
  174. if (req_list.next == &req->list)
  175. set_timeout(req->timeout);
  176. mutex_unlock(&lock);
  177. }
  178. static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
  179. const void *daddr)
  180. {
  181. struct neighbour *n;
  182. int ret;
  183. n = dst_neigh_lookup(dst, daddr);
  184. rcu_read_lock();
  185. if (!n || !(n->nud_state & NUD_VALID)) {
  186. if (n)
  187. neigh_event_send(n, NULL);
  188. ret = -ENODATA;
  189. } else {
  190. ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
  191. }
  192. rcu_read_unlock();
  193. if (n)
  194. neigh_release(n);
  195. return ret;
  196. }
  197. static int addr4_resolve(struct sockaddr_in *src_in,
  198. const struct sockaddr_in *dst_in,
  199. struct rdma_dev_addr *addr,
  200. struct rtable **prt)
  201. {
  202. __be32 src_ip = src_in->sin_addr.s_addr;
  203. __be32 dst_ip = dst_in->sin_addr.s_addr;
  204. struct rtable *rt;
  205. struct flowi4 fl4;
  206. int ret;
  207. memset(&fl4, 0, sizeof(fl4));
  208. fl4.daddr = dst_ip;
  209. fl4.saddr = src_ip;
  210. fl4.flowi4_oif = addr->bound_dev_if;
  211. rt = ip_route_output_key(addr->net, &fl4);
  212. if (IS_ERR(rt)) {
  213. ret = PTR_ERR(rt);
  214. goto out;
  215. }
  216. src_in->sin_family = AF_INET;
  217. src_in->sin_addr.s_addr = fl4.saddr;
  218. /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
  219. * routable) and we could set the network type accordingly.
  220. */
  221. if (rt->rt_uses_gateway)
  222. addr->network = RDMA_NETWORK_IPV4;
  223. *prt = rt;
  224. return 0;
  225. out:
  226. return ret;
  227. }
  228. #if IS_ENABLED(CONFIG_IPV6)
  229. static int addr6_resolve(struct sockaddr_in6 *src_in,
  230. const struct sockaddr_in6 *dst_in,
  231. struct rdma_dev_addr *addr,
  232. struct dst_entry **pdst)
  233. {
  234. struct flowi6 fl6;
  235. struct dst_entry *dst;
  236. struct rt6_info *rt;
  237. int ret;
  238. memset(&fl6, 0, sizeof fl6);
  239. fl6.daddr = dst_in->sin6_addr;
  240. fl6.saddr = src_in->sin6_addr;
  241. fl6.flowi6_oif = addr->bound_dev_if;
  242. dst = ip6_route_output(addr->net, NULL, &fl6);
  243. if ((ret = dst->error))
  244. goto put;
  245. rt = (struct rt6_info *)dst;
  246. if (ipv6_addr_any(&fl6.saddr)) {
  247. ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
  248. &fl6.daddr, 0, &fl6.saddr);
  249. if (ret)
  250. goto put;
  251. src_in->sin6_family = AF_INET6;
  252. src_in->sin6_addr = fl6.saddr;
  253. }
  254. /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
  255. * routable) and we could set the network type accordingly.
  256. */
  257. if (rt->rt6i_flags & RTF_GATEWAY)
  258. addr->network = RDMA_NETWORK_IPV6;
  259. *pdst = dst;
  260. return 0;
  261. put:
  262. dst_release(dst);
  263. return ret;
  264. }
  265. #else
  266. static int addr6_resolve(struct sockaddr_in6 *src_in,
  267. const struct sockaddr_in6 *dst_in,
  268. struct rdma_dev_addr *addr,
  269. struct dst_entry **pdst)
  270. {
  271. return -EADDRNOTAVAIL;
  272. }
  273. #endif
  274. static int addr_resolve_neigh(struct dst_entry *dst,
  275. const struct sockaddr *dst_in,
  276. struct rdma_dev_addr *addr)
  277. {
  278. if (dst->dev->flags & IFF_LOOPBACK) {
  279. int ret;
  280. ret = rdma_translate_ip(dst_in, addr, NULL);
  281. if (!ret)
  282. memcpy(addr->dst_dev_addr, addr->src_dev_addr,
  283. MAX_ADDR_LEN);
  284. return ret;
  285. }
  286. /* If the device doesn't do ARP internally */
  287. if (!(dst->dev->flags & IFF_NOARP)) {
  288. const struct sockaddr_in *dst_in4 =
  289. (const struct sockaddr_in *)dst_in;
  290. const struct sockaddr_in6 *dst_in6 =
  291. (const struct sockaddr_in6 *)dst_in;
  292. return dst_fetch_ha(dst, addr,
  293. dst_in->sa_family == AF_INET ?
  294. (const void *)&dst_in4->sin_addr.s_addr :
  295. (const void *)&dst_in6->sin6_addr);
  296. }
  297. return rdma_copy_addr(addr, dst->dev, NULL);
  298. }
  299. static int addr_resolve(struct sockaddr *src_in,
  300. const struct sockaddr *dst_in,
  301. struct rdma_dev_addr *addr,
  302. bool resolve_neigh)
  303. {
  304. struct net_device *ndev;
  305. struct dst_entry *dst;
  306. int ret;
  307. if (src_in->sa_family == AF_INET) {
  308. struct rtable *rt = NULL;
  309. const struct sockaddr_in *dst_in4 =
  310. (const struct sockaddr_in *)dst_in;
  311. ret = addr4_resolve((struct sockaddr_in *)src_in,
  312. dst_in4, addr, &rt);
  313. if (ret)
  314. return ret;
  315. if (resolve_neigh)
  316. ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
  317. ndev = rt->dst.dev;
  318. dev_hold(ndev);
  319. ip_rt_put(rt);
  320. } else {
  321. const struct sockaddr_in6 *dst_in6 =
  322. (const struct sockaddr_in6 *)dst_in;
  323. ret = addr6_resolve((struct sockaddr_in6 *)src_in,
  324. dst_in6, addr,
  325. &dst);
  326. if (ret)
  327. return ret;
  328. if (resolve_neigh)
  329. ret = addr_resolve_neigh(dst, dst_in, addr);
  330. ndev = dst->dev;
  331. dev_hold(ndev);
  332. dst_release(dst);
  333. }
  334. addr->bound_dev_if = ndev->ifindex;
  335. addr->net = dev_net(ndev);
  336. dev_put(ndev);
  337. return ret;
  338. }
  339. static void process_req(struct work_struct *work)
  340. {
  341. struct addr_req *req, *temp_req;
  342. struct sockaddr *src_in, *dst_in;
  343. struct list_head done_list;
  344. INIT_LIST_HEAD(&done_list);
  345. mutex_lock(&lock);
  346. list_for_each_entry_safe(req, temp_req, &req_list, list) {
  347. if (req->status == -ENODATA) {
  348. src_in = (struct sockaddr *) &req->src_addr;
  349. dst_in = (struct sockaddr *) &req->dst_addr;
  350. req->status = addr_resolve(src_in, dst_in, req->addr,
  351. true);
  352. if (req->status && time_after_eq(jiffies, req->timeout))
  353. req->status = -ETIMEDOUT;
  354. else if (req->status == -ENODATA)
  355. continue;
  356. }
  357. list_move_tail(&req->list, &done_list);
  358. }
  359. if (!list_empty(&req_list)) {
  360. req = list_entry(req_list.next, struct addr_req, list);
  361. set_timeout(req->timeout);
  362. }
  363. mutex_unlock(&lock);
  364. list_for_each_entry_safe(req, temp_req, &done_list, list) {
  365. list_del(&req->list);
  366. req->callback(req->status, (struct sockaddr *) &req->src_addr,
  367. req->addr, req->context);
  368. put_client(req->client);
  369. kfree(req);
  370. }
  371. }
  372. int rdma_resolve_ip(struct rdma_addr_client *client,
  373. struct sockaddr *src_addr, struct sockaddr *dst_addr,
  374. struct rdma_dev_addr *addr, int timeout_ms,
  375. void (*callback)(int status, struct sockaddr *src_addr,
  376. struct rdma_dev_addr *addr, void *context),
  377. void *context)
  378. {
  379. struct sockaddr *src_in, *dst_in;
  380. struct addr_req *req;
  381. int ret = 0;
  382. req = kzalloc(sizeof *req, GFP_KERNEL);
  383. if (!req)
  384. return -ENOMEM;
  385. src_in = (struct sockaddr *) &req->src_addr;
  386. dst_in = (struct sockaddr *) &req->dst_addr;
  387. if (src_addr) {
  388. if (src_addr->sa_family != dst_addr->sa_family) {
  389. ret = -EINVAL;
  390. goto err;
  391. }
  392. memcpy(src_in, src_addr, rdma_addr_size(src_addr));
  393. } else {
  394. src_in->sa_family = dst_addr->sa_family;
  395. }
  396. memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
  397. req->addr = addr;
  398. req->callback = callback;
  399. req->context = context;
  400. req->client = client;
  401. atomic_inc(&client->refcount);
  402. req->status = addr_resolve(src_in, dst_in, addr, true);
  403. switch (req->status) {
  404. case 0:
  405. req->timeout = jiffies;
  406. queue_req(req);
  407. break;
  408. case -ENODATA:
  409. req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
  410. queue_req(req);
  411. break;
  412. default:
  413. ret = req->status;
  414. atomic_dec(&client->refcount);
  415. goto err;
  416. }
  417. return ret;
  418. err:
  419. kfree(req);
  420. return ret;
  421. }
  422. EXPORT_SYMBOL(rdma_resolve_ip);
  423. int rdma_resolve_ip_route(struct sockaddr *src_addr,
  424. const struct sockaddr *dst_addr,
  425. struct rdma_dev_addr *addr)
  426. {
  427. struct sockaddr_storage ssrc_addr = {};
  428. struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
  429. if (src_addr->sa_family != dst_addr->sa_family)
  430. return -EINVAL;
  431. if (src_addr)
  432. memcpy(src_in, src_addr, rdma_addr_size(src_addr));
  433. else
  434. src_in->sa_family = dst_addr->sa_family;
  435. return addr_resolve(src_in, dst_addr, addr, false);
  436. }
  437. EXPORT_SYMBOL(rdma_resolve_ip_route);
  438. void rdma_addr_cancel(struct rdma_dev_addr *addr)
  439. {
  440. struct addr_req *req, *temp_req;
  441. mutex_lock(&lock);
  442. list_for_each_entry_safe(req, temp_req, &req_list, list) {
  443. if (req->addr == addr) {
  444. req->status = -ECANCELED;
  445. req->timeout = jiffies;
  446. list_move(&req->list, &req_list);
  447. set_timeout(req->timeout);
  448. break;
  449. }
  450. }
  451. mutex_unlock(&lock);
  452. }
  453. EXPORT_SYMBOL(rdma_addr_cancel);
  454. struct resolve_cb_context {
  455. struct rdma_dev_addr *addr;
  456. struct completion comp;
  457. };
  458. static void resolve_cb(int status, struct sockaddr *src_addr,
  459. struct rdma_dev_addr *addr, void *context)
  460. {
  461. memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
  462. rdma_dev_addr));
  463. complete(&((struct resolve_cb_context *)context)->comp);
  464. }
  465. int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
  466. u8 *dmac, u16 *vlan_id, int *if_index)
  467. {
  468. int ret = 0;
  469. struct rdma_dev_addr dev_addr;
  470. struct resolve_cb_context ctx;
  471. struct net_device *dev;
  472. union {
  473. struct sockaddr _sockaddr;
  474. struct sockaddr_in _sockaddr_in;
  475. struct sockaddr_in6 _sockaddr_in6;
  476. } sgid_addr, dgid_addr;
  477. rdma_gid2ip(&sgid_addr._sockaddr, sgid);
  478. rdma_gid2ip(&dgid_addr._sockaddr, dgid);
  479. memset(&dev_addr, 0, sizeof(dev_addr));
  480. if (if_index)
  481. dev_addr.bound_dev_if = *if_index;
  482. dev_addr.net = &init_net;
  483. ctx.addr = &dev_addr;
  484. init_completion(&ctx.comp);
  485. ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
  486. &dev_addr, 1000, resolve_cb, &ctx);
  487. if (ret)
  488. return ret;
  489. wait_for_completion(&ctx.comp);
  490. memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
  491. dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
  492. if (!dev)
  493. return -ENODEV;
  494. if (if_index)
  495. *if_index = dev_addr.bound_dev_if;
  496. if (vlan_id)
  497. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  498. dev_put(dev);
  499. return ret;
  500. }
  501. EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
  502. int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
  503. {
  504. int ret = 0;
  505. struct rdma_dev_addr dev_addr;
  506. union {
  507. struct sockaddr _sockaddr;
  508. struct sockaddr_in _sockaddr_in;
  509. struct sockaddr_in6 _sockaddr_in6;
  510. } gid_addr;
  511. rdma_gid2ip(&gid_addr._sockaddr, sgid);
  512. memset(&dev_addr, 0, sizeof(dev_addr));
  513. dev_addr.net = &init_net;
  514. ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
  515. if (ret)
  516. return ret;
  517. memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
  518. return ret;
  519. }
  520. EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
  521. static int netevent_callback(struct notifier_block *self, unsigned long event,
  522. void *ctx)
  523. {
  524. if (event == NETEVENT_NEIGH_UPDATE) {
  525. struct neighbour *neigh = ctx;
  526. if (neigh->nud_state & NUD_VALID) {
  527. set_timeout(jiffies);
  528. }
  529. }
  530. return 0;
  531. }
  532. static struct notifier_block nb = {
  533. .notifier_call = netevent_callback
  534. };
  535. static int __init addr_init(void)
  536. {
  537. addr_wq = create_singlethread_workqueue("ib_addr");
  538. if (!addr_wq)
  539. return -ENOMEM;
  540. register_netevent_notifier(&nb);
  541. rdma_addr_register_client(&self);
  542. return 0;
  543. }
  544. static void __exit addr_cleanup(void)
  545. {
  546. rdma_addr_unregister_client(&self);
  547. unregister_netevent_notifier(&nb);
  548. destroy_workqueue(addr_wq);
  549. }
  550. module_init(addr_init);
  551. module_exit(addr_cleanup);