roce_gid_mgmt.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. /*
  2. * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include "core_priv.h"
  33. #include <linux/in.h>
  34. #include <linux/in6.h>
  35. /* For in6_dev_get/in6_dev_put */
  36. #include <net/addrconf.h>
  37. #include <net/bonding.h>
  38. #include <rdma/ib_cache.h>
  39. #include <rdma/ib_addr.h>
  40. static struct workqueue_struct *gid_cache_wq;
  41. enum gid_op_type {
  42. GID_DEL = 0,
  43. GID_ADD
  44. };
  45. struct update_gid_event_work {
  46. struct work_struct work;
  47. union ib_gid gid;
  48. struct ib_gid_attr gid_attr;
  49. enum gid_op_type gid_op;
  50. };
  51. #define ROCE_NETDEV_CALLBACK_SZ 3
  52. struct netdev_event_work_cmd {
  53. roce_netdev_callback cb;
  54. roce_netdev_filter filter;
  55. struct net_device *ndev;
  56. struct net_device *filter_ndev;
  57. };
  58. struct netdev_event_work {
  59. struct work_struct work;
  60. struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ];
  61. };
  62. static const struct {
  63. bool (*is_supported)(const struct ib_device *device, u8 port_num);
  64. enum ib_gid_type gid_type;
  65. } PORT_CAP_TO_GID_TYPE[] = {
  66. {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
  67. {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
  68. };
  69. #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
  70. unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
  71. {
  72. int i;
  73. unsigned int ret_flags = 0;
  74. if (!rdma_protocol_roce(ib_dev, port))
  75. return 1UL << IB_GID_TYPE_IB;
  76. for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
  77. if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
  78. ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
  79. return ret_flags;
  80. }
  81. EXPORT_SYMBOL(roce_gid_type_mask_support);
  82. static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
  83. u8 port, union ib_gid *gid,
  84. struct ib_gid_attr *gid_attr)
  85. {
  86. int i;
  87. unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
  88. for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
  89. if ((1UL << i) & gid_type_mask) {
  90. gid_attr->gid_type = i;
  91. switch (gid_op) {
  92. case GID_ADD:
  93. ib_cache_gid_add(ib_dev, port,
  94. gid, gid_attr);
  95. break;
  96. case GID_DEL:
  97. ib_cache_gid_del(ib_dev, port,
  98. gid, gid_attr);
  99. break;
  100. }
  101. }
  102. }
  103. }
  104. enum bonding_slave_state {
  105. BONDING_SLAVE_STATE_ACTIVE = 1UL << 0,
  106. BONDING_SLAVE_STATE_INACTIVE = 1UL << 1,
  107. /* No primary slave or the device isn't a slave in bonding */
  108. BONDING_SLAVE_STATE_NA = 1UL << 2,
  109. };
  110. static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev,
  111. struct net_device *upper)
  112. {
  113. if (upper && netif_is_bond_master(upper)) {
  114. struct net_device *pdev =
  115. bond_option_active_slave_get_rcu(netdev_priv(upper));
  116. if (pdev)
  117. return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE :
  118. BONDING_SLAVE_STATE_INACTIVE;
  119. }
  120. return BONDING_SLAVE_STATE_NA;
  121. }
  122. #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
  123. BONDING_SLAVE_STATE_NA)
  124. static bool
  125. is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
  126. struct net_device *rdma_ndev, void *cookie)
  127. {
  128. struct net_device *real_dev;
  129. bool res;
  130. if (!rdma_ndev)
  131. return false;
  132. rcu_read_lock();
  133. real_dev = rdma_vlan_dev_real_dev(cookie);
  134. if (!real_dev)
  135. real_dev = cookie;
  136. res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
  137. (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
  138. REQUIRED_BOND_STATES)) ||
  139. real_dev == rdma_ndev);
  140. rcu_read_unlock();
  141. return res;
  142. }
  143. static bool
  144. is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
  145. struct net_device *rdma_ndev, void *cookie)
  146. {
  147. struct net_device *master_dev;
  148. bool res;
  149. if (!rdma_ndev)
  150. return false;
  151. rcu_read_lock();
  152. master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
  153. res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) ==
  154. BONDING_SLAVE_STATE_INACTIVE;
  155. rcu_read_unlock();
  156. return res;
  157. }
  158. /** is_ndev_for_default_gid_filter - Check if a given netdevice
  159. * can be considered for default GIDs or not.
  160. * @ib_dev: IB device to check
  161. * @port: Port to consider for adding default GID
  162. * @rdma_ndev: rdma netdevice pointer
  163. * @cookie_ndev: Netdevice to consider to form a default GID
  164. *
  165. * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
  166. * considered for deriving default RoCE GID, returns false otherwise.
  167. */
  168. static bool
  169. is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
  170. struct net_device *rdma_ndev, void *cookie)
  171. {
  172. struct net_device *cookie_ndev = cookie;
  173. bool res;
  174. if (!rdma_ndev)
  175. return false;
  176. rcu_read_lock();
  177. /*
  178. * When rdma netdevice is used in bonding, bonding master netdevice
  179. * should be considered for default GIDs. Therefore, ignore slave rdma
  180. * netdevices when bonding is considered.
  181. * Additionally when event(cookie) netdevice is bond master device,
  182. * make sure that it the upper netdevice of rdma netdevice.
  183. */
  184. res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
  185. (netif_is_bond_master(cookie_ndev) &&
  186. rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
  187. rcu_read_unlock();
  188. return res;
  189. }
  190. static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
  191. struct net_device *rdma_ndev, void *cookie)
  192. {
  193. return true;
  194. }
  195. static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
  196. struct net_device *rdma_ndev, void *cookie)
  197. {
  198. bool res;
  199. if (!rdma_ndev)
  200. return false;
  201. if (rdma_ndev == cookie)
  202. return true;
  203. rcu_read_lock();
  204. res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
  205. rcu_read_unlock();
  206. return res;
  207. }
  208. /**
  209. * is_upper_ndev_bond_master_filter - Check if a given netdevice
  210. * is bond master device of netdevice of the the RDMA device of port.
  211. * @ib_dev: IB device to check
  212. * @port: Port to consider for adding default GID
  213. * @rdma_ndev: Pointer to rdma netdevice
  214. * @cookie: Netdevice to consider to form a default GID
  215. *
  216. * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
  217. * is bond master device and rdma_ndev is its lower netdevice. It might
  218. * not have been established as slave device yet.
  219. */
  220. static bool
  221. is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
  222. struct net_device *rdma_ndev,
  223. void *cookie)
  224. {
  225. struct net_device *cookie_ndev = cookie;
  226. bool match = false;
  227. rcu_read_lock();
  228. if (netif_is_bond_master(cookie_ndev) &&
  229. rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
  230. match = true;
  231. rcu_read_unlock();
  232. return match;
  233. }
  234. static void update_gid_ip(enum gid_op_type gid_op,
  235. struct ib_device *ib_dev,
  236. u8 port, struct net_device *ndev,
  237. struct sockaddr *addr)
  238. {
  239. union ib_gid gid;
  240. struct ib_gid_attr gid_attr;
  241. rdma_ip2gid(addr, &gid);
  242. memset(&gid_attr, 0, sizeof(gid_attr));
  243. gid_attr.ndev = ndev;
  244. update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
  245. }
  246. static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
  247. u8 port,
  248. struct net_device *rdma_ndev,
  249. struct net_device *event_ndev)
  250. {
  251. struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
  252. unsigned long gid_type_mask;
  253. if (!rdma_ndev)
  254. return;
  255. if (!real_dev)
  256. real_dev = event_ndev;
  257. rcu_read_lock();
  258. if (((rdma_ndev != event_ndev &&
  259. !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
  260. is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
  261. ==
  262. BONDING_SLAVE_STATE_INACTIVE)) {
  263. rcu_read_unlock();
  264. return;
  265. }
  266. rcu_read_unlock();
  267. gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
  268. ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
  269. gid_type_mask,
  270. IB_CACHE_GID_DEFAULT_MODE_DELETE);
  271. }
  272. static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
  273. u8 port, struct net_device *ndev)
  274. {
  275. struct in_device *in_dev;
  276. struct sin_list {
  277. struct list_head list;
  278. struct sockaddr_in ip;
  279. };
  280. struct sin_list *sin_iter;
  281. struct sin_list *sin_temp;
  282. LIST_HEAD(sin_list);
  283. if (ndev->reg_state >= NETREG_UNREGISTERING)
  284. return;
  285. rcu_read_lock();
  286. in_dev = __in_dev_get_rcu(ndev);
  287. if (!in_dev) {
  288. rcu_read_unlock();
  289. return;
  290. }
  291. for_ifa(in_dev) {
  292. struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
  293. if (!entry)
  294. continue;
  295. entry->ip.sin_family = AF_INET;
  296. entry->ip.sin_addr.s_addr = ifa->ifa_address;
  297. list_add_tail(&entry->list, &sin_list);
  298. }
  299. endfor_ifa(in_dev);
  300. rcu_read_unlock();
  301. list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) {
  302. update_gid_ip(GID_ADD, ib_dev, port, ndev,
  303. (struct sockaddr *)&sin_iter->ip);
  304. list_del(&sin_iter->list);
  305. kfree(sin_iter);
  306. }
  307. }
  308. static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
  309. u8 port, struct net_device *ndev)
  310. {
  311. struct inet6_ifaddr *ifp;
  312. struct inet6_dev *in6_dev;
  313. struct sin6_list {
  314. struct list_head list;
  315. struct sockaddr_in6 sin6;
  316. };
  317. struct sin6_list *sin6_iter;
  318. struct sin6_list *sin6_temp;
  319. struct ib_gid_attr gid_attr = {.ndev = ndev};
  320. LIST_HEAD(sin6_list);
  321. if (ndev->reg_state >= NETREG_UNREGISTERING)
  322. return;
  323. in6_dev = in6_dev_get(ndev);
  324. if (!in6_dev)
  325. return;
  326. read_lock_bh(&in6_dev->lock);
  327. list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
  328. struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
  329. if (!entry)
  330. continue;
  331. entry->sin6.sin6_family = AF_INET6;
  332. entry->sin6.sin6_addr = ifp->addr;
  333. list_add_tail(&entry->list, &sin6_list);
  334. }
  335. read_unlock_bh(&in6_dev->lock);
  336. in6_dev_put(in6_dev);
  337. list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) {
  338. union ib_gid gid;
  339. rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid);
  340. update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr);
  341. list_del(&sin6_iter->list);
  342. kfree(sin6_iter);
  343. }
  344. }
  345. static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
  346. struct net_device *ndev)
  347. {
  348. enum_netdev_ipv4_ips(ib_dev, port, ndev);
  349. if (IS_ENABLED(CONFIG_IPV6))
  350. enum_netdev_ipv6_ips(ib_dev, port, ndev);
  351. }
  352. static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
  353. struct net_device *rdma_ndev, void *cookie)
  354. {
  355. _add_netdev_ips(ib_dev, port, cookie);
  356. }
  357. static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
  358. struct net_device *rdma_ndev, void *cookie)
  359. {
  360. ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
  361. }
  362. /**
  363. * del_default_gids - Delete default GIDs of the event/cookie netdevice
  364. * @ib_dev: RDMA device pointer
  365. * @port: Port of the RDMA device whose GID table to consider
  366. * @rdma_ndev: Unused rdma netdevice
  367. * @cookie: Pointer to event netdevice
  368. *
  369. * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
  370. */
  371. static void del_default_gids(struct ib_device *ib_dev, u8 port,
  372. struct net_device *rdma_ndev, void *cookie)
  373. {
  374. struct net_device *cookie_ndev = cookie;
  375. unsigned long gid_type_mask;
  376. gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
  377. ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
  378. IB_CACHE_GID_DEFAULT_MODE_DELETE);
  379. }
  380. static void add_default_gids(struct ib_device *ib_dev, u8 port,
  381. struct net_device *rdma_ndev, void *cookie)
  382. {
  383. struct net_device *event_ndev = cookie;
  384. unsigned long gid_type_mask;
  385. gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
  386. ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
  387. IB_CACHE_GID_DEFAULT_MODE_SET);
  388. }
  389. static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
  390. u8 port,
  391. struct net_device *rdma_ndev,
  392. void *cookie)
  393. {
  394. struct net *net;
  395. struct net_device *ndev;
  396. /* Lock the rtnl to make sure the netdevs does not move under
  397. * our feet
  398. */
  399. rtnl_lock();
  400. down_read(&net_rwsem);
  401. for_each_net(net)
  402. for_each_netdev(net, ndev) {
  403. /*
  404. * Filter and add default GIDs of the primary netdevice
  405. * when not in bonding mode, or add default GIDs
  406. * of bond master device, when in bonding mode.
  407. */
  408. if (is_ndev_for_default_gid_filter(ib_dev, port,
  409. rdma_ndev, ndev))
  410. add_default_gids(ib_dev, port, rdma_ndev, ndev);
  411. if (is_eth_port_of_netdev_filter(ib_dev, port,
  412. rdma_ndev, ndev))
  413. _add_netdev_ips(ib_dev, port, ndev);
  414. }
  415. up_read(&net_rwsem);
  416. rtnl_unlock();
  417. }
  418. /**
  419. * rdma_roce_rescan_device - Rescan all of the network devices in the system
  420. * and add their gids, as needed, to the relevant RoCE devices.
  421. *
  422. * @device: the rdma device
  423. */
  424. void rdma_roce_rescan_device(struct ib_device *ib_dev)
  425. {
  426. ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
  427. enum_all_gids_of_dev_cb, NULL);
  428. }
  429. EXPORT_SYMBOL(rdma_roce_rescan_device);
  430. static void callback_for_addr_gid_device_scan(struct ib_device *device,
  431. u8 port,
  432. struct net_device *rdma_ndev,
  433. void *cookie)
  434. {
  435. struct update_gid_event_work *parsed = cookie;
  436. return update_gid(parsed->gid_op, device,
  437. port, &parsed->gid,
  438. &parsed->gid_attr);
  439. }
  440. struct upper_list {
  441. struct list_head list;
  442. struct net_device *upper;
  443. };
  444. static int netdev_upper_walk(struct net_device *upper, void *data)
  445. {
  446. struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
  447. struct list_head *upper_list = data;
  448. if (!entry)
  449. return 0;
  450. list_add_tail(&entry->list, upper_list);
  451. dev_hold(upper);
  452. entry->upper = upper;
  453. return 0;
  454. }
  455. static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
  456. void *cookie,
  457. void (*handle_netdev)(struct ib_device *ib_dev,
  458. u8 port,
  459. struct net_device *ndev))
  460. {
  461. struct net_device *ndev = cookie;
  462. struct upper_list *upper_iter;
  463. struct upper_list *upper_temp;
  464. LIST_HEAD(upper_list);
  465. rcu_read_lock();
  466. netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
  467. rcu_read_unlock();
  468. handle_netdev(ib_dev, port, ndev);
  469. list_for_each_entry_safe(upper_iter, upper_temp, &upper_list,
  470. list) {
  471. handle_netdev(ib_dev, port, upper_iter->upper);
  472. dev_put(upper_iter->upper);
  473. list_del(&upper_iter->list);
  474. kfree(upper_iter);
  475. }
  476. }
  477. static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
  478. struct net_device *event_ndev)
  479. {
  480. ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
  481. }
  482. static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
  483. struct net_device *rdma_ndev, void *cookie)
  484. {
  485. handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
  486. }
  487. static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
  488. struct net_device *rdma_ndev, void *cookie)
  489. {
  490. handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
  491. }
  492. static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
  493. struct net_device *rdma_ndev,
  494. void *cookie)
  495. {
  496. struct net_device *master_ndev;
  497. rcu_read_lock();
  498. master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
  499. if (master_ndev)
  500. dev_hold(master_ndev);
  501. rcu_read_unlock();
  502. if (master_ndev) {
  503. bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
  504. master_ndev);
  505. dev_put(master_ndev);
  506. }
  507. }
  508. /* The following functions operate on all IB devices. netdevice_event and
  509. * addr_event execute ib_enum_all_roce_netdevs through a work.
  510. * ib_enum_all_roce_netdevs iterates through all IB devices.
  511. */
  512. static void netdevice_event_work_handler(struct work_struct *_work)
  513. {
  514. struct netdev_event_work *work =
  515. container_of(_work, struct netdev_event_work, work);
  516. unsigned int i;
  517. for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) {
  518. ib_enum_all_roce_netdevs(work->cmds[i].filter,
  519. work->cmds[i].filter_ndev,
  520. work->cmds[i].cb,
  521. work->cmds[i].ndev);
  522. dev_put(work->cmds[i].ndev);
  523. dev_put(work->cmds[i].filter_ndev);
  524. }
  525. kfree(work);
  526. }
  527. static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
  528. struct net_device *ndev)
  529. {
  530. unsigned int i;
  531. struct netdev_event_work *ndev_work =
  532. kmalloc(sizeof(*ndev_work), GFP_KERNEL);
  533. if (!ndev_work)
  534. return NOTIFY_DONE;
  535. memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
  536. for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
  537. if (!ndev_work->cmds[i].ndev)
  538. ndev_work->cmds[i].ndev = ndev;
  539. if (!ndev_work->cmds[i].filter_ndev)
  540. ndev_work->cmds[i].filter_ndev = ndev;
  541. dev_hold(ndev_work->cmds[i].ndev);
  542. dev_hold(ndev_work->cmds[i].filter_ndev);
  543. }
  544. INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
  545. queue_work(gid_cache_wq, &ndev_work->work);
  546. return NOTIFY_DONE;
  547. }
  548. static const struct netdev_event_work_cmd add_cmd = {
  549. .cb = add_netdev_ips,
  550. .filter = is_eth_port_of_netdev_filter
  551. };
  552. static const struct netdev_event_work_cmd add_cmd_upper_ips = {
  553. .cb = add_netdev_upper_ips,
  554. .filter = is_eth_port_of_netdev_filter
  555. };
  556. static void
  557. ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
  558. struct netdev_event_work_cmd *cmds)
  559. {
  560. static const struct netdev_event_work_cmd
  561. upper_ips_del_cmd = {
  562. .cb = del_netdev_upper_ips,
  563. .filter = upper_device_filter
  564. };
  565. cmds[0] = upper_ips_del_cmd;
  566. cmds[0].ndev = changeupper_info->upper_dev;
  567. cmds[1] = add_cmd;
  568. }
  569. static const struct netdev_event_work_cmd bonding_default_add_cmd = {
  570. .cb = add_default_gids,
  571. .filter = is_upper_ndev_bond_master_filter
  572. };
  573. static void
  574. ndev_event_link(struct net_device *event_ndev,
  575. struct netdev_notifier_changeupper_info *changeupper_info,
  576. struct netdev_event_work_cmd *cmds)
  577. {
  578. static const struct netdev_event_work_cmd
  579. bonding_default_del_cmd = {
  580. .cb = del_default_gids,
  581. .filter = is_upper_ndev_bond_master_filter
  582. };
  583. /*
  584. * When a lower netdev is linked to its upper bonding
  585. * netdev, delete lower slave netdev's default GIDs.
  586. */
  587. cmds[0] = bonding_default_del_cmd;
  588. cmds[0].ndev = event_ndev;
  589. cmds[0].filter_ndev = changeupper_info->upper_dev;
  590. /* Now add bonding upper device default GIDs */
  591. cmds[1] = bonding_default_add_cmd;
  592. cmds[1].ndev = changeupper_info->upper_dev;
  593. cmds[1].filter_ndev = changeupper_info->upper_dev;
  594. /* Now add bonding upper device IP based GIDs */
  595. cmds[2] = add_cmd_upper_ips;
  596. cmds[2].ndev = changeupper_info->upper_dev;
  597. cmds[2].filter_ndev = changeupper_info->upper_dev;
  598. }
  599. static void netdevice_event_changeupper(struct net_device *event_ndev,
  600. struct netdev_notifier_changeupper_info *changeupper_info,
  601. struct netdev_event_work_cmd *cmds)
  602. {
  603. if (changeupper_info->linking)
  604. ndev_event_link(event_ndev, changeupper_info, cmds);
  605. else
  606. ndev_event_unlink(changeupper_info, cmds);
  607. }
  608. static const struct netdev_event_work_cmd add_default_gid_cmd = {
  609. .cb = add_default_gids,
  610. .filter = is_ndev_for_default_gid_filter,
  611. };
  612. static int netdevice_event(struct notifier_block *this, unsigned long event,
  613. void *ptr)
  614. {
  615. static const struct netdev_event_work_cmd del_cmd = {
  616. .cb = del_netdev_ips, .filter = pass_all_filter};
  617. static const struct netdev_event_work_cmd
  618. bonding_default_del_cmd_join = {
  619. .cb = del_netdev_default_ips_join,
  620. .filter = is_eth_port_inactive_slave_filter
  621. };
  622. static const struct netdev_event_work_cmd
  623. netdev_del_cmd = {
  624. .cb = del_netdev_ips,
  625. .filter = is_eth_port_of_netdev_filter
  626. };
  627. static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
  628. .cb = del_netdev_upper_ips, .filter = upper_device_filter};
  629. struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
  630. struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} };
  631. if (ndev->type != ARPHRD_ETHER)
  632. return NOTIFY_DONE;
  633. switch (event) {
  634. case NETDEV_REGISTER:
  635. case NETDEV_UP:
  636. cmds[0] = bonding_default_del_cmd_join;
  637. cmds[1] = add_default_gid_cmd;
  638. cmds[2] = add_cmd;
  639. break;
  640. case NETDEV_UNREGISTER:
  641. if (ndev->reg_state < NETREG_UNREGISTERED)
  642. cmds[0] = del_cmd;
  643. else
  644. return NOTIFY_DONE;
  645. break;
  646. case NETDEV_CHANGEADDR:
  647. cmds[0] = netdev_del_cmd;
  648. cmds[1] = add_default_gid_cmd;
  649. cmds[2] = add_cmd;
  650. break;
  651. case NETDEV_CHANGEUPPER:
  652. netdevice_event_changeupper(ndev,
  653. container_of(ptr, struct netdev_notifier_changeupper_info, info),
  654. cmds);
  655. break;
  656. case NETDEV_BONDING_FAILOVER:
  657. cmds[0] = bonding_event_ips_del_cmd;
  658. /* Add default GIDs of the bond device */
  659. cmds[1] = bonding_default_add_cmd;
  660. /* Add IP based GIDs of the bond device */
  661. cmds[2] = add_cmd_upper_ips;
  662. break;
  663. default:
  664. return NOTIFY_DONE;
  665. }
  666. return netdevice_queue_work(cmds, ndev);
  667. }
  668. static void update_gid_event_work_handler(struct work_struct *_work)
  669. {
  670. struct update_gid_event_work *work =
  671. container_of(_work, struct update_gid_event_work, work);
  672. ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
  673. work->gid_attr.ndev,
  674. callback_for_addr_gid_device_scan, work);
  675. dev_put(work->gid_attr.ndev);
  676. kfree(work);
  677. }
  678. static int addr_event(struct notifier_block *this, unsigned long event,
  679. struct sockaddr *sa, struct net_device *ndev)
  680. {
  681. struct update_gid_event_work *work;
  682. enum gid_op_type gid_op;
  683. if (ndev->type != ARPHRD_ETHER)
  684. return NOTIFY_DONE;
  685. switch (event) {
  686. case NETDEV_UP:
  687. gid_op = GID_ADD;
  688. break;
  689. case NETDEV_DOWN:
  690. gid_op = GID_DEL;
  691. break;
  692. default:
  693. return NOTIFY_DONE;
  694. }
  695. work = kmalloc(sizeof(*work), GFP_ATOMIC);
  696. if (!work)
  697. return NOTIFY_DONE;
  698. INIT_WORK(&work->work, update_gid_event_work_handler);
  699. rdma_ip2gid(sa, &work->gid);
  700. work->gid_op = gid_op;
  701. memset(&work->gid_attr, 0, sizeof(work->gid_attr));
  702. dev_hold(ndev);
  703. work->gid_attr.ndev = ndev;
  704. queue_work(gid_cache_wq, &work->work);
  705. return NOTIFY_DONE;
  706. }
  707. static int inetaddr_event(struct notifier_block *this, unsigned long event,
  708. void *ptr)
  709. {
  710. struct sockaddr_in in;
  711. struct net_device *ndev;
  712. struct in_ifaddr *ifa = ptr;
  713. in.sin_family = AF_INET;
  714. in.sin_addr.s_addr = ifa->ifa_address;
  715. ndev = ifa->ifa_dev->dev;
  716. return addr_event(this, event, (struct sockaddr *)&in, ndev);
  717. }
  718. static int inet6addr_event(struct notifier_block *this, unsigned long event,
  719. void *ptr)
  720. {
  721. struct sockaddr_in6 in6;
  722. struct net_device *ndev;
  723. struct inet6_ifaddr *ifa6 = ptr;
  724. in6.sin6_family = AF_INET6;
  725. in6.sin6_addr = ifa6->addr;
  726. ndev = ifa6->idev->dev;
  727. return addr_event(this, event, (struct sockaddr *)&in6, ndev);
  728. }
  729. static struct notifier_block nb_netdevice = {
  730. .notifier_call = netdevice_event
  731. };
  732. static struct notifier_block nb_inetaddr = {
  733. .notifier_call = inetaddr_event
  734. };
  735. static struct notifier_block nb_inet6addr = {
  736. .notifier_call = inet6addr_event
  737. };
  738. int __init roce_gid_mgmt_init(void)
  739. {
  740. gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
  741. if (!gid_cache_wq)
  742. return -ENOMEM;
  743. register_inetaddr_notifier(&nb_inetaddr);
  744. if (IS_ENABLED(CONFIG_IPV6))
  745. register_inet6addr_notifier(&nb_inet6addr);
  746. /* We relay on the netdevice notifier to enumerate all
  747. * existing devices in the system. Register to this notifier
  748. * last to make sure we will not miss any IP add/del
  749. * callbacks.
  750. */
  751. register_netdevice_notifier(&nb_netdevice);
  752. return 0;
  753. }
  754. void __exit roce_gid_mgmt_cleanup(void)
  755. {
  756. if (IS_ENABLED(CONFIG_IPV6))
  757. unregister_inet6addr_notifier(&nb_inet6addr);
  758. unregister_inetaddr_notifier(&nb_inetaddr);
  759. unregister_netdevice_notifier(&nb_netdevice);
  760. /* Ensure all gid deletion tasks complete before we go down,
  761. * to avoid any reference to free'd memory. By the time
  762. * ib-core is removed, all physical devices have been removed,
  763. * so no issue with remaining hardware contexts.
  764. */
  765. destroy_workqueue(gid_cache_wq);
  766. }