addr.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872
  1. /*
  2. * Copyright (c) 2005 Voltaire Inc. All rights reserved.
  3. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  4. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  5. * Copyright (c) 2005 Intel Corporation. All rights reserved.
  6. *
  7. * This software is available to you under a choice of one of two
  8. * licenses. You may choose to be licensed under the terms of the GNU
  9. * General Public License (GPL) Version 2, available from the file
  10. * COPYING in the main directory of this source tree, or the
  11. * OpenIB.org BSD license below:
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer.
  20. *
  21. * - Redistributions in binary form must reproduce the above
  22. * copyright notice, this list of conditions and the following
  23. * disclaimer in the documentation and/or other materials
  24. * provided with the distribution.
  25. *
  26. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33. * SOFTWARE.
  34. */
  35. #include <linux/mutex.h>
  36. #include <linux/inetdevice.h>
  37. #include <linux/slab.h>
  38. #include <linux/workqueue.h>
  39. #include <linux/module.h>
  40. #include <net/arp.h>
  41. #include <net/neighbour.h>
  42. #include <net/route.h>
  43. #include <net/netevent.h>
  44. #include <net/addrconf.h>
  45. #include <net/ip6_route.h>
  46. #include <rdma/ib_addr.h>
  47. #include <rdma/ib.h>
  48. #include <rdma/rdma_netlink.h>
  49. #include <net/netlink.h>
  50. #include "core_priv.h"
  51. struct addr_req {
  52. struct list_head list;
  53. struct sockaddr_storage src_addr;
  54. struct sockaddr_storage dst_addr;
  55. struct rdma_dev_addr *addr;
  56. struct rdma_addr_client *client;
  57. void *context;
  58. void (*callback)(int status, struct sockaddr *src_addr,
  59. struct rdma_dev_addr *addr, void *context);
  60. unsigned long timeout;
  61. struct delayed_work work;
  62. int status;
  63. u32 seq;
  64. };
  65. static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
  66. static void process_req(struct work_struct *work);
  67. static DEFINE_MUTEX(lock);
  68. static LIST_HEAD(req_list);
  69. static DECLARE_DELAYED_WORK(work, process_req);
  70. static struct workqueue_struct *addr_wq;
  71. static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
  72. [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
  73. .len = sizeof(struct rdma_nla_ls_gid)},
  74. };
  75. static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
  76. {
  77. struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
  78. int ret;
  79. if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
  80. return false;
  81. ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
  82. nlmsg_len(nlh), ib_nl_addr_policy, NULL);
  83. if (ret)
  84. return false;
  85. return true;
  86. }
  87. static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
  88. {
  89. const struct nlattr *head, *curr;
  90. union ib_gid gid;
  91. struct addr_req *req;
  92. int len, rem;
  93. int found = 0;
  94. head = (const struct nlattr *)nlmsg_data(nlh);
  95. len = nlmsg_len(nlh);
  96. nla_for_each_attr(curr, head, len, rem) {
  97. if (curr->nla_type == LS_NLA_TYPE_DGID)
  98. memcpy(&gid, nla_data(curr), nla_len(curr));
  99. }
  100. mutex_lock(&lock);
  101. list_for_each_entry(req, &req_list, list) {
  102. if (nlh->nlmsg_seq != req->seq)
  103. continue;
  104. /* We set the DGID part, the rest was set earlier */
  105. rdma_addr_set_dgid(req->addr, &gid);
  106. req->status = 0;
  107. found = 1;
  108. break;
  109. }
  110. mutex_unlock(&lock);
  111. if (!found)
  112. pr_info("Couldn't find request waiting for DGID: %pI6\n",
  113. &gid);
  114. }
  115. int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
  116. struct netlink_callback *cb)
  117. {
  118. const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
  119. if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
  120. !(NETLINK_CB(skb).sk) ||
  121. !netlink_capable(skb, CAP_NET_ADMIN))
  122. return -EPERM;
  123. if (ib_nl_is_good_ip_resp(nlh))
  124. ib_nl_process_good_ip_rsep(nlh);
  125. return skb->len;
  126. }
  127. static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
  128. const void *daddr,
  129. u32 seq, u16 family)
  130. {
  131. struct sk_buff *skb = NULL;
  132. struct nlmsghdr *nlh;
  133. struct rdma_ls_ip_resolve_header *header;
  134. void *data;
  135. size_t size;
  136. int attrtype;
  137. int len;
  138. if (family == AF_INET) {
  139. size = sizeof(struct in_addr);
  140. attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
  141. } else {
  142. size = sizeof(struct in6_addr);
  143. attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
  144. }
  145. len = nla_total_size(sizeof(size));
  146. len += NLMSG_ALIGN(sizeof(*header));
  147. skb = nlmsg_new(len, GFP_KERNEL);
  148. if (!skb)
  149. return -ENOMEM;
  150. data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
  151. RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
  152. if (!data) {
  153. nlmsg_free(skb);
  154. return -ENODATA;
  155. }
  156. /* Construct the family header first */
  157. header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
  158. header->ifindex = dev_addr->bound_dev_if;
  159. nla_put(skb, attrtype, size, daddr);
  160. /* Repair the nlmsg header length */
  161. nlmsg_end(skb, nlh);
  162. ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
  163. /* Make the request retry, so when we get the response from userspace
  164. * we will have something.
  165. */
  166. return -ENODATA;
  167. }
  168. int rdma_addr_size(struct sockaddr *addr)
  169. {
  170. switch (addr->sa_family) {
  171. case AF_INET:
  172. return sizeof(struct sockaddr_in);
  173. case AF_INET6:
  174. return sizeof(struct sockaddr_in6);
  175. case AF_IB:
  176. return sizeof(struct sockaddr_ib);
  177. default:
  178. return 0;
  179. }
  180. }
  181. EXPORT_SYMBOL(rdma_addr_size);
  182. static struct rdma_addr_client self;
  183. void rdma_addr_register_client(struct rdma_addr_client *client)
  184. {
  185. atomic_set(&client->refcount, 1);
  186. init_completion(&client->comp);
  187. }
  188. EXPORT_SYMBOL(rdma_addr_register_client);
  189. static inline void put_client(struct rdma_addr_client *client)
  190. {
  191. if (atomic_dec_and_test(&client->refcount))
  192. complete(&client->comp);
  193. }
  194. void rdma_addr_unregister_client(struct rdma_addr_client *client)
  195. {
  196. put_client(client);
  197. wait_for_completion(&client->comp);
  198. }
  199. EXPORT_SYMBOL(rdma_addr_unregister_client);
  200. int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
  201. const unsigned char *dst_dev_addr)
  202. {
  203. dev_addr->dev_type = dev->type;
  204. memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
  205. memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
  206. if (dst_dev_addr)
  207. memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
  208. dev_addr->bound_dev_if = dev->ifindex;
  209. return 0;
  210. }
  211. EXPORT_SYMBOL(rdma_copy_addr);
  212. int rdma_translate_ip(const struct sockaddr *addr,
  213. struct rdma_dev_addr *dev_addr,
  214. u16 *vlan_id)
  215. {
  216. struct net_device *dev;
  217. int ret = -EADDRNOTAVAIL;
  218. if (dev_addr->bound_dev_if) {
  219. dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
  220. if (!dev)
  221. return -ENODEV;
  222. ret = rdma_copy_addr(dev_addr, dev, NULL);
  223. dev_put(dev);
  224. return ret;
  225. }
  226. switch (addr->sa_family) {
  227. case AF_INET:
  228. dev = ip_dev_find(dev_addr->net,
  229. ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
  230. if (!dev)
  231. return ret;
  232. ret = rdma_copy_addr(dev_addr, dev, NULL);
  233. dev_addr->bound_dev_if = dev->ifindex;
  234. if (vlan_id)
  235. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  236. dev_put(dev);
  237. break;
  238. #if IS_ENABLED(CONFIG_IPV6)
  239. case AF_INET6:
  240. rcu_read_lock();
  241. for_each_netdev_rcu(dev_addr->net, dev) {
  242. if (ipv6_chk_addr(dev_addr->net,
  243. &((const struct sockaddr_in6 *)addr)->sin6_addr,
  244. dev, 1)) {
  245. ret = rdma_copy_addr(dev_addr, dev, NULL);
  246. dev_addr->bound_dev_if = dev->ifindex;
  247. if (vlan_id)
  248. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  249. break;
  250. }
  251. }
  252. rcu_read_unlock();
  253. break;
  254. #endif
  255. }
  256. return ret;
  257. }
  258. EXPORT_SYMBOL(rdma_translate_ip);
  259. static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
  260. {
  261. unsigned long delay;
  262. delay = time - jiffies;
  263. if ((long)delay < 0)
  264. delay = 0;
  265. mod_delayed_work(addr_wq, delayed_work, delay);
  266. }
  267. static void queue_req(struct addr_req *req)
  268. {
  269. struct addr_req *temp_req;
  270. mutex_lock(&lock);
  271. list_for_each_entry_reverse(temp_req, &req_list, list) {
  272. if (time_after_eq(req->timeout, temp_req->timeout))
  273. break;
  274. }
  275. list_add(&req->list, &temp_req->list);
  276. set_timeout(&req->work, req->timeout);
  277. mutex_unlock(&lock);
  278. }
  279. static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
  280. const void *daddr, u32 seq, u16 family)
  281. {
  282. if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
  283. return -EADDRNOTAVAIL;
  284. /* We fill in what we can, the response will fill the rest */
  285. rdma_copy_addr(dev_addr, dst->dev, NULL);
  286. return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
  287. }
  288. static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
  289. const void *daddr)
  290. {
  291. struct neighbour *n;
  292. int ret;
  293. n = dst_neigh_lookup(dst, daddr);
  294. rcu_read_lock();
  295. if (!n || !(n->nud_state & NUD_VALID)) {
  296. if (n)
  297. neigh_event_send(n, NULL);
  298. ret = -ENODATA;
  299. } else {
  300. ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
  301. }
  302. rcu_read_unlock();
  303. if (n)
  304. neigh_release(n);
  305. return ret;
  306. }
  307. static bool has_gateway(struct dst_entry *dst, sa_family_t family)
  308. {
  309. struct rtable *rt;
  310. struct rt6_info *rt6;
  311. if (family == AF_INET) {
  312. rt = container_of(dst, struct rtable, dst);
  313. return rt->rt_uses_gateway;
  314. }
  315. rt6 = container_of(dst, struct rt6_info, dst);
  316. return rt6->rt6i_flags & RTF_GATEWAY;
  317. }
  318. static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
  319. const struct sockaddr *dst_in, u32 seq)
  320. {
  321. const struct sockaddr_in *dst_in4 =
  322. (const struct sockaddr_in *)dst_in;
  323. const struct sockaddr_in6 *dst_in6 =
  324. (const struct sockaddr_in6 *)dst_in;
  325. const void *daddr = (dst_in->sa_family == AF_INET) ?
  326. (const void *)&dst_in4->sin_addr.s_addr :
  327. (const void *)&dst_in6->sin6_addr;
  328. sa_family_t family = dst_in->sa_family;
  329. /* Gateway + ARPHRD_INFINIBAND -> IB router */
  330. if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
  331. return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
  332. else
  333. return dst_fetch_ha(dst, dev_addr, daddr);
  334. }
  335. static int addr4_resolve(struct sockaddr_in *src_in,
  336. const struct sockaddr_in *dst_in,
  337. struct rdma_dev_addr *addr,
  338. struct rtable **prt)
  339. {
  340. __be32 src_ip = src_in->sin_addr.s_addr;
  341. __be32 dst_ip = dst_in->sin_addr.s_addr;
  342. struct rtable *rt;
  343. struct flowi4 fl4;
  344. int ret;
  345. memset(&fl4, 0, sizeof(fl4));
  346. fl4.daddr = dst_ip;
  347. fl4.saddr = src_ip;
  348. fl4.flowi4_oif = addr->bound_dev_if;
  349. rt = ip_route_output_key(addr->net, &fl4);
  350. ret = PTR_ERR_OR_ZERO(rt);
  351. if (ret)
  352. return ret;
  353. src_in->sin_family = AF_INET;
  354. src_in->sin_addr.s_addr = fl4.saddr;
  355. /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
  356. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
  357. * type accordingly.
  358. */
  359. if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
  360. addr->network = RDMA_NETWORK_IPV4;
  361. addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
  362. *prt = rt;
  363. return 0;
  364. }
  365. #if IS_ENABLED(CONFIG_IPV6)
  366. static int addr6_resolve(struct sockaddr_in6 *src_in,
  367. const struct sockaddr_in6 *dst_in,
  368. struct rdma_dev_addr *addr,
  369. struct dst_entry **pdst)
  370. {
  371. struct flowi6 fl6;
  372. struct dst_entry *dst;
  373. struct rt6_info *rt;
  374. int ret;
  375. memset(&fl6, 0, sizeof fl6);
  376. fl6.daddr = dst_in->sin6_addr;
  377. fl6.saddr = src_in->sin6_addr;
  378. fl6.flowi6_oif = addr->bound_dev_if;
  379. ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
  380. if (ret < 0)
  381. return ret;
  382. rt = (struct rt6_info *)dst;
  383. if (ipv6_addr_any(&src_in->sin6_addr)) {
  384. src_in->sin6_family = AF_INET6;
  385. src_in->sin6_addr = fl6.saddr;
  386. }
  387. /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
  388. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
  389. * type accordingly.
  390. */
  391. if (rt->rt6i_flags & RTF_GATEWAY &&
  392. ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
  393. addr->network = RDMA_NETWORK_IPV6;
  394. addr->hoplimit = ip6_dst_hoplimit(dst);
  395. *pdst = dst;
  396. return 0;
  397. }
  398. #else
  399. static int addr6_resolve(struct sockaddr_in6 *src_in,
  400. const struct sockaddr_in6 *dst_in,
  401. struct rdma_dev_addr *addr,
  402. struct dst_entry **pdst)
  403. {
  404. return -EADDRNOTAVAIL;
  405. }
  406. #endif
  407. static int addr_resolve_neigh(struct dst_entry *dst,
  408. const struct sockaddr *dst_in,
  409. struct rdma_dev_addr *addr,
  410. u32 seq)
  411. {
  412. if (dst->dev->flags & IFF_LOOPBACK) {
  413. int ret;
  414. ret = rdma_translate_ip(dst_in, addr, NULL);
  415. if (!ret)
  416. memcpy(addr->dst_dev_addr, addr->src_dev_addr,
  417. MAX_ADDR_LEN);
  418. return ret;
  419. }
  420. /* If the device doesn't do ARP internally */
  421. if (!(dst->dev->flags & IFF_NOARP))
  422. return fetch_ha(dst, addr, dst_in, seq);
  423. return rdma_copy_addr(addr, dst->dev, NULL);
  424. }
  425. static int addr_resolve(struct sockaddr *src_in,
  426. const struct sockaddr *dst_in,
  427. struct rdma_dev_addr *addr,
  428. bool resolve_neigh,
  429. u32 seq)
  430. {
  431. struct net_device *ndev;
  432. struct dst_entry *dst;
  433. int ret;
  434. if (!addr->net) {
  435. pr_warn_ratelimited("%s: missing namespace\n", __func__);
  436. return -EINVAL;
  437. }
  438. if (src_in->sa_family == AF_INET) {
  439. struct rtable *rt = NULL;
  440. const struct sockaddr_in *dst_in4 =
  441. (const struct sockaddr_in *)dst_in;
  442. ret = addr4_resolve((struct sockaddr_in *)src_in,
  443. dst_in4, addr, &rt);
  444. if (ret)
  445. return ret;
  446. if (resolve_neigh)
  447. ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
  448. if (addr->bound_dev_if) {
  449. ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
  450. } else {
  451. ndev = rt->dst.dev;
  452. dev_hold(ndev);
  453. }
  454. ip_rt_put(rt);
  455. } else {
  456. const struct sockaddr_in6 *dst_in6 =
  457. (const struct sockaddr_in6 *)dst_in;
  458. ret = addr6_resolve((struct sockaddr_in6 *)src_in,
  459. dst_in6, addr,
  460. &dst);
  461. if (ret)
  462. return ret;
  463. if (resolve_neigh)
  464. ret = addr_resolve_neigh(dst, dst_in, addr, seq);
  465. if (addr->bound_dev_if) {
  466. ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
  467. } else {
  468. ndev = dst->dev;
  469. dev_hold(ndev);
  470. }
  471. dst_release(dst);
  472. }
  473. if (ndev->flags & IFF_LOOPBACK) {
  474. ret = rdma_translate_ip(dst_in, addr, NULL);
  475. /*
  476. * Put the loopback device and get the translated
  477. * device instead.
  478. */
  479. dev_put(ndev);
  480. ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
  481. } else {
  482. addr->bound_dev_if = ndev->ifindex;
  483. }
  484. dev_put(ndev);
  485. return ret;
  486. }
  487. static void process_one_req(struct work_struct *_work)
  488. {
  489. struct addr_req *req;
  490. struct sockaddr *src_in, *dst_in;
  491. mutex_lock(&lock);
  492. req = container_of(_work, struct addr_req, work.work);
  493. if (req->status == -ENODATA) {
  494. src_in = (struct sockaddr *)&req->src_addr;
  495. dst_in = (struct sockaddr *)&req->dst_addr;
  496. req->status = addr_resolve(src_in, dst_in, req->addr,
  497. true, req->seq);
  498. if (req->status && time_after_eq(jiffies, req->timeout)) {
  499. req->status = -ETIMEDOUT;
  500. } else if (req->status == -ENODATA) {
  501. /* requeue the work for retrying again */
  502. set_timeout(&req->work, req->timeout);
  503. mutex_unlock(&lock);
  504. return;
  505. }
  506. }
  507. list_del(&req->list);
  508. mutex_unlock(&lock);
  509. req->callback(req->status, (struct sockaddr *)&req->src_addr,
  510. req->addr, req->context);
  511. put_client(req->client);
  512. kfree(req);
  513. }
  514. static void process_req(struct work_struct *work)
  515. {
  516. struct addr_req *req, *temp_req;
  517. struct sockaddr *src_in, *dst_in;
  518. struct list_head done_list;
  519. INIT_LIST_HEAD(&done_list);
  520. mutex_lock(&lock);
  521. list_for_each_entry_safe(req, temp_req, &req_list, list) {
  522. if (req->status == -ENODATA) {
  523. src_in = (struct sockaddr *) &req->src_addr;
  524. dst_in = (struct sockaddr *) &req->dst_addr;
  525. req->status = addr_resolve(src_in, dst_in, req->addr,
  526. true, req->seq);
  527. if (req->status && time_after_eq(jiffies, req->timeout))
  528. req->status = -ETIMEDOUT;
  529. else if (req->status == -ENODATA) {
  530. set_timeout(&req->work, req->timeout);
  531. continue;
  532. }
  533. }
  534. list_move_tail(&req->list, &done_list);
  535. }
  536. mutex_unlock(&lock);
  537. list_for_each_entry_safe(req, temp_req, &done_list, list) {
  538. list_del(&req->list);
  539. /* It is safe to cancel other work items from this work item
  540. * because at a time there can be only one work item running
  541. * with this single threaded work queue.
  542. */
  543. cancel_delayed_work(&req->work);
  544. req->callback(req->status, (struct sockaddr *) &req->src_addr,
  545. req->addr, req->context);
  546. put_client(req->client);
  547. kfree(req);
  548. }
  549. }
  550. int rdma_resolve_ip(struct rdma_addr_client *client,
  551. struct sockaddr *src_addr, struct sockaddr *dst_addr,
  552. struct rdma_dev_addr *addr, int timeout_ms,
  553. void (*callback)(int status, struct sockaddr *src_addr,
  554. struct rdma_dev_addr *addr, void *context),
  555. void *context)
  556. {
  557. struct sockaddr *src_in, *dst_in;
  558. struct addr_req *req;
  559. int ret = 0;
  560. req = kzalloc(sizeof *req, GFP_KERNEL);
  561. if (!req)
  562. return -ENOMEM;
  563. src_in = (struct sockaddr *) &req->src_addr;
  564. dst_in = (struct sockaddr *) &req->dst_addr;
  565. if (src_addr) {
  566. if (src_addr->sa_family != dst_addr->sa_family) {
  567. ret = -EINVAL;
  568. goto err;
  569. }
  570. memcpy(src_in, src_addr, rdma_addr_size(src_addr));
  571. } else {
  572. src_in->sa_family = dst_addr->sa_family;
  573. }
  574. memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
  575. req->addr = addr;
  576. req->callback = callback;
  577. req->context = context;
  578. req->client = client;
  579. atomic_inc(&client->refcount);
  580. INIT_DELAYED_WORK(&req->work, process_one_req);
  581. req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
  582. req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
  583. switch (req->status) {
  584. case 0:
  585. req->timeout = jiffies;
  586. queue_req(req);
  587. break;
  588. case -ENODATA:
  589. req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
  590. queue_req(req);
  591. break;
  592. default:
  593. ret = req->status;
  594. atomic_dec(&client->refcount);
  595. goto err;
  596. }
  597. return ret;
  598. err:
  599. kfree(req);
  600. return ret;
  601. }
  602. EXPORT_SYMBOL(rdma_resolve_ip);
  603. int rdma_resolve_ip_route(struct sockaddr *src_addr,
  604. const struct sockaddr *dst_addr,
  605. struct rdma_dev_addr *addr)
  606. {
  607. struct sockaddr_storage ssrc_addr = {};
  608. struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
  609. if (src_addr) {
  610. if (src_addr->sa_family != dst_addr->sa_family)
  611. return -EINVAL;
  612. memcpy(src_in, src_addr, rdma_addr_size(src_addr));
  613. } else {
  614. src_in->sa_family = dst_addr->sa_family;
  615. }
  616. return addr_resolve(src_in, dst_addr, addr, false, 0);
  617. }
  618. EXPORT_SYMBOL(rdma_resolve_ip_route);
  619. void rdma_addr_cancel(struct rdma_dev_addr *addr)
  620. {
  621. struct addr_req *req, *temp_req;
  622. mutex_lock(&lock);
  623. list_for_each_entry_safe(req, temp_req, &req_list, list) {
  624. if (req->addr == addr) {
  625. req->status = -ECANCELED;
  626. req->timeout = jiffies;
  627. list_move(&req->list, &req_list);
  628. set_timeout(&req->work, req->timeout);
  629. break;
  630. }
  631. }
  632. mutex_unlock(&lock);
  633. }
  634. EXPORT_SYMBOL(rdma_addr_cancel);
  635. struct resolve_cb_context {
  636. struct rdma_dev_addr *addr;
  637. struct completion comp;
  638. int status;
  639. };
  640. static void resolve_cb(int status, struct sockaddr *src_addr,
  641. struct rdma_dev_addr *addr, void *context)
  642. {
  643. if (!status)
  644. memcpy(((struct resolve_cb_context *)context)->addr,
  645. addr, sizeof(struct rdma_dev_addr));
  646. ((struct resolve_cb_context *)context)->status = status;
  647. complete(&((struct resolve_cb_context *)context)->comp);
  648. }
  649. int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
  650. const union ib_gid *dgid,
  651. u8 *dmac, u16 *vlan_id, int *if_index,
  652. int *hoplimit)
  653. {
  654. int ret = 0;
  655. struct rdma_dev_addr dev_addr;
  656. struct resolve_cb_context ctx;
  657. struct net_device *dev;
  658. union {
  659. struct sockaddr _sockaddr;
  660. struct sockaddr_in _sockaddr_in;
  661. struct sockaddr_in6 _sockaddr_in6;
  662. } sgid_addr, dgid_addr;
  663. rdma_gid2ip(&sgid_addr._sockaddr, sgid);
  664. rdma_gid2ip(&dgid_addr._sockaddr, dgid);
  665. memset(&dev_addr, 0, sizeof(dev_addr));
  666. if (if_index)
  667. dev_addr.bound_dev_if = *if_index;
  668. dev_addr.net = &init_net;
  669. ctx.addr = &dev_addr;
  670. init_completion(&ctx.comp);
  671. ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
  672. &dev_addr, 1000, resolve_cb, &ctx);
  673. if (ret)
  674. return ret;
  675. wait_for_completion(&ctx.comp);
  676. ret = ctx.status;
  677. if (ret)
  678. return ret;
  679. memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
  680. dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
  681. if (!dev)
  682. return -ENODEV;
  683. if (if_index)
  684. *if_index = dev_addr.bound_dev_if;
  685. if (vlan_id)
  686. *vlan_id = rdma_vlan_dev_vlan_id(dev);
  687. if (hoplimit)
  688. *hoplimit = dev_addr.hoplimit;
  689. dev_put(dev);
  690. return ret;
  691. }
  692. EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
  693. int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
  694. {
  695. int ret = 0;
  696. struct rdma_dev_addr dev_addr;
  697. union {
  698. struct sockaddr _sockaddr;
  699. struct sockaddr_in _sockaddr_in;
  700. struct sockaddr_in6 _sockaddr_in6;
  701. } gid_addr;
  702. rdma_gid2ip(&gid_addr._sockaddr, sgid);
  703. memset(&dev_addr, 0, sizeof(dev_addr));
  704. dev_addr.net = &init_net;
  705. ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
  706. if (ret)
  707. return ret;
  708. memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
  709. return ret;
  710. }
  711. EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
  712. static int netevent_callback(struct notifier_block *self, unsigned long event,
  713. void *ctx)
  714. {
  715. if (event == NETEVENT_NEIGH_UPDATE) {
  716. struct neighbour *neigh = ctx;
  717. if (neigh->nud_state & NUD_VALID)
  718. set_timeout(&work, jiffies);
  719. }
  720. return 0;
  721. }
  722. static struct notifier_block nb = {
  723. .notifier_call = netevent_callback
  724. };
  725. int addr_init(void)
  726. {
  727. addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM);
  728. if (!addr_wq)
  729. return -ENOMEM;
  730. register_netevent_notifier(&nb);
  731. rdma_addr_register_client(&self);
  732. return 0;
  733. }
  734. void addr_cleanup(void)
  735. {
  736. rdma_addr_unregister_client(&self);
  737. unregister_netevent_notifier(&nb);
  738. destroy_workqueue(addr_wq);
  739. }