vnic_main.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. /*
  2. * Copyright(c) 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. /*
  48. * This file contains HFI1 support for VNIC functionality
  49. */
  50. #include <linux/io.h>
  51. #include <linux/if_vlan.h>
  52. #include "vnic.h"
  53. #define HFI_TX_TIMEOUT_MS 1000
  54. #define HFI1_VNIC_RCV_Q_SIZE 1024
  55. #define HFI1_VNIC_UP 0
  56. static DEFINE_SPINLOCK(vport_cntr_lock);
  57. static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  58. {
  59. unsigned int rcvctrl_ops = 0;
  60. int ret;
  61. uctxt->do_interrupt = &handle_receive_interrupt;
  62. /* Now allocate the RcvHdr queue and eager buffers. */
  63. ret = hfi1_create_rcvhdrq(dd, uctxt);
  64. if (ret)
  65. goto done;
  66. ret = hfi1_setup_eagerbufs(uctxt);
  67. if (ret)
  68. goto done;
  69. if (uctxt->rcvhdrtail_kvaddr)
  70. clear_rcvhdrtail(uctxt);
  71. rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  72. rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  73. if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  74. rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  75. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  76. rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  77. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  78. rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  79. if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  80. rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  81. hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
  82. done:
  83. return ret;
  84. }
  85. static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
  86. struct hfi1_ctxtdata **vnic_ctxt)
  87. {
  88. struct hfi1_ctxtdata *uctxt;
  89. int ret;
  90. if (dd->flags & HFI1_FROZEN)
  91. return -EIO;
  92. ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
  93. if (ret < 0) {
  94. dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
  95. return -ENOMEM;
  96. }
  97. uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
  98. HFI1_CAP_KGET(NODROP_RHQ_FULL) |
  99. HFI1_CAP_KGET(NODROP_EGR_FULL) |
  100. HFI1_CAP_KGET(DMA_RTAIL);
  101. uctxt->seq_cnt = 1;
  102. uctxt->is_vnic = true;
  103. if (dd->num_msix_entries)
  104. hfi1_set_vnic_msix_info(uctxt);
  105. hfi1_stats.sps_ctxts++;
  106. dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
  107. *vnic_ctxt = uctxt;
  108. return 0;
  109. }
  110. static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
  111. struct hfi1_ctxtdata *uctxt)
  112. {
  113. dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
  114. flush_wc();
  115. if (dd->num_msix_entries)
  116. hfi1_reset_vnic_msix_info(uctxt);
  117. /*
  118. * Disable receive context and interrupt available, reset all
  119. * RcvCtxtCtrl bits to default values.
  120. */
  121. hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
  122. HFI1_RCVCTRL_TIDFLOW_DIS |
  123. HFI1_RCVCTRL_INTRAVAIL_DIS |
  124. HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
  125. HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
  126. HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
  127. uctxt->event_flags = 0;
  128. hfi1_clear_tids(uctxt);
  129. hfi1_clear_ctxt_pkey(dd, uctxt);
  130. hfi1_stats.sps_ctxts--;
  131. hfi1_free_ctxt(uctxt);
  132. }
  133. void hfi1_vnic_setup(struct hfi1_devdata *dd)
  134. {
  135. idr_init(&dd->vnic.vesw_idr);
  136. }
  137. void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
  138. {
  139. idr_destroy(&dd->vnic.vesw_idr);
  140. }
  141. #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
  142. u64 *src64, *dst64; \
  143. for (src64 = &qstats->x_grp.unicast, \
  144. dst64 = &stats->x_grp.unicast; \
  145. dst64 <= &stats->x_grp.s_1519_max;) { \
  146. *dst64++ += *src64++; \
  147. } \
  148. } while (0)
  149. /* hfi1_vnic_update_stats - update statistics */
  150. static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
  151. struct opa_vnic_stats *stats)
  152. {
  153. struct net_device *netdev = vinfo->netdev;
  154. u8 i;
  155. /* add tx counters on different queues */
  156. for (i = 0; i < vinfo->num_tx_q; i++) {
  157. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  158. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  159. stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
  160. stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
  161. stats->tx_drop_state += qstats->tx_drop_state;
  162. stats->tx_dlid_zero += qstats->tx_dlid_zero;
  163. SUM_GRP_COUNTERS(stats, qstats, tx_grp);
  164. stats->netstats.tx_packets += qnstats->tx_packets;
  165. stats->netstats.tx_bytes += qnstats->tx_bytes;
  166. }
  167. /* add rx counters on different queues */
  168. for (i = 0; i < vinfo->num_rx_q; i++) {
  169. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  170. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  171. stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
  172. stats->netstats.rx_nohandler += qnstats->rx_nohandler;
  173. stats->rx_drop_state += qstats->rx_drop_state;
  174. stats->rx_oversize += qstats->rx_oversize;
  175. stats->rx_runt += qstats->rx_runt;
  176. SUM_GRP_COUNTERS(stats, qstats, rx_grp);
  177. stats->netstats.rx_packets += qnstats->rx_packets;
  178. stats->netstats.rx_bytes += qnstats->rx_bytes;
  179. }
  180. stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
  181. stats->netstats.tx_carrier_errors +
  182. stats->tx_drop_state + stats->tx_dlid_zero;
  183. stats->netstats.tx_dropped = stats->netstats.tx_errors;
  184. stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
  185. stats->netstats.rx_nohandler +
  186. stats->rx_drop_state + stats->rx_oversize +
  187. stats->rx_runt;
  188. stats->netstats.rx_dropped = stats->netstats.rx_errors;
  189. netdev->stats.tx_packets = stats->netstats.tx_packets;
  190. netdev->stats.tx_bytes = stats->netstats.tx_bytes;
  191. netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
  192. netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
  193. netdev->stats.tx_errors = stats->netstats.tx_errors;
  194. netdev->stats.tx_dropped = stats->netstats.tx_dropped;
  195. netdev->stats.rx_packets = stats->netstats.rx_packets;
  196. netdev->stats.rx_bytes = stats->netstats.rx_bytes;
  197. netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
  198. netdev->stats.multicast = stats->rx_grp.mcastbcast;
  199. netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
  200. netdev->stats.rx_errors = stats->netstats.rx_errors;
  201. netdev->stats.rx_dropped = stats->netstats.rx_dropped;
  202. }
  203. /* update_len_counters - update pkt's len histogram counters */
  204. static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
  205. int len)
  206. {
  207. /* account for 4 byte FCS */
  208. if (len >= 1515)
  209. grp->s_1519_max++;
  210. else if (len >= 1020)
  211. grp->s_1024_1518++;
  212. else if (len >= 508)
  213. grp->s_512_1023++;
  214. else if (len >= 252)
  215. grp->s_256_511++;
  216. else if (len >= 124)
  217. grp->s_128_255++;
  218. else if (len >= 61)
  219. grp->s_65_127++;
  220. else
  221. grp->s_64++;
  222. }
  223. /* hfi1_vnic_update_tx_counters - update transmit counters */
  224. static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
  225. u8 q_idx, struct sk_buff *skb, int err)
  226. {
  227. struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
  228. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  229. struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
  230. u16 vlan_tci;
  231. stats->netstats.tx_packets++;
  232. stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
  233. update_len_counters(tx_grp, skb->len);
  234. /* rest of the counts are for good packets only */
  235. if (unlikely(err))
  236. return;
  237. if (is_multicast_ether_addr(mac_hdr->h_dest))
  238. tx_grp->mcastbcast++;
  239. else
  240. tx_grp->unicast++;
  241. if (!__vlan_get_tag(skb, &vlan_tci))
  242. tx_grp->vlan++;
  243. else
  244. tx_grp->untagged++;
  245. }
  246. /* hfi1_vnic_update_rx_counters - update receive counters */
  247. static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
  248. u8 q_idx, struct sk_buff *skb, int err)
  249. {
  250. struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
  251. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  252. struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
  253. u16 vlan_tci;
  254. stats->netstats.rx_packets++;
  255. stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
  256. update_len_counters(rx_grp, skb->len);
  257. /* rest of the counts are for good packets only */
  258. if (unlikely(err))
  259. return;
  260. if (is_multicast_ether_addr(mac_hdr->h_dest))
  261. rx_grp->mcastbcast++;
  262. else
  263. rx_grp->unicast++;
  264. if (!__vlan_get_tag(skb, &vlan_tci))
  265. rx_grp->vlan++;
  266. else
  267. rx_grp->untagged++;
  268. }
  269. /* This function is overloaded for opa_vnic specific implementation */
  270. static void hfi1_vnic_get_stats64(struct net_device *netdev,
  271. struct rtnl_link_stats64 *stats)
  272. {
  273. struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
  274. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  275. hfi1_vnic_update_stats(vinfo, vstats);
  276. }
  277. static u64 create_bypass_pbc(u32 vl, u32 dw_len)
  278. {
  279. u64 pbc;
  280. pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
  281. | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
  282. | PBC_PACKET_BYPASS
  283. | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
  284. | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
  285. return pbc;
  286. }
  287. /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
  288. static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
  289. u8 q_idx)
  290. {
  291. netif_stop_subqueue(vinfo->netdev, q_idx);
  292. if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
  293. return;
  294. netif_start_subqueue(vinfo->netdev, q_idx);
  295. }
  296. static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
  297. struct net_device *netdev)
  298. {
  299. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  300. u8 pad_len, q_idx = skb->queue_mapping;
  301. struct hfi1_devdata *dd = vinfo->dd;
  302. struct opa_vnic_skb_mdata *mdata;
  303. u32 pkt_len, total_len;
  304. int err = -EINVAL;
  305. u64 pbc;
  306. v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
  307. if (unlikely(!netif_oper_up(netdev))) {
  308. vinfo->stats[q_idx].tx_drop_state++;
  309. goto tx_finish;
  310. }
  311. /* take out meta data */
  312. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  313. skb_pull(skb, sizeof(*mdata));
  314. if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
  315. vinfo->stats[q_idx].tx_dlid_zero++;
  316. goto tx_finish;
  317. }
  318. /* add tail padding (for 8 bytes size alignment) and icrc */
  319. pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
  320. pad_len += OPA_VNIC_ICRC_TAIL_LEN;
  321. /*
  322. * pkt_len is how much data we have to write, includes header and data.
  323. * total_len is length of the packet in Dwords plus the PBC should not
  324. * include the CRC.
  325. */
  326. pkt_len = (skb->len + pad_len) >> 2;
  327. total_len = pkt_len + 2; /* PBC + packet */
  328. pbc = create_bypass_pbc(mdata->vl, total_len);
  329. skb_get(skb);
  330. v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
  331. err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
  332. if (unlikely(err)) {
  333. if (err == -ENOMEM)
  334. vinfo->stats[q_idx].netstats.tx_fifo_errors++;
  335. else if (err != -EBUSY)
  336. vinfo->stats[q_idx].netstats.tx_carrier_errors++;
  337. }
  338. /* remove the header before updating tx counters */
  339. skb_pull(skb, OPA_VNIC_HDR_LEN);
  340. if (unlikely(err == -EBUSY)) {
  341. hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
  342. dev_kfree_skb_any(skb);
  343. return NETDEV_TX_BUSY;
  344. }
  345. tx_finish:
  346. /* update tx counters */
  347. hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
  348. dev_kfree_skb_any(skb);
  349. return NETDEV_TX_OK;
  350. }
  351. static u16 hfi1_vnic_select_queue(struct net_device *netdev,
  352. struct sk_buff *skb,
  353. void *accel_priv,
  354. select_queue_fallback_t fallback)
  355. {
  356. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  357. struct opa_vnic_skb_mdata *mdata;
  358. struct sdma_engine *sde;
  359. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  360. sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
  361. return sde->this_idx;
  362. }
  363. /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
  364. static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
  365. struct sk_buff *skb)
  366. {
  367. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  368. int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
  369. int rc = -EFAULT;
  370. skb_pull(skb, OPA_VNIC_HDR_LEN);
  371. /* Validate Packet length */
  372. if (unlikely(skb->len > max_len))
  373. vinfo->stats[rxq->idx].rx_oversize++;
  374. else if (unlikely(skb->len < ETH_ZLEN))
  375. vinfo->stats[rxq->idx].rx_runt++;
  376. else
  377. rc = 0;
  378. return rc;
  379. }
  380. static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
  381. {
  382. unsigned char *pad_info;
  383. struct sk_buff *skb;
  384. skb = skb_dequeue(&rxq->skbq);
  385. if (unlikely(!skb))
  386. return NULL;
  387. /* remove tail padding and icrc */
  388. pad_info = skb->data + skb->len - 1;
  389. skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
  390. ((*pad_info) & 0x7)));
  391. return skb;
  392. }
  393. /* hfi1_vnic_handle_rx - handle skb receive */
  394. static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
  395. int *work_done, int work_to_do)
  396. {
  397. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  398. struct sk_buff *skb;
  399. int rc;
  400. while (1) {
  401. if (*work_done >= work_to_do)
  402. break;
  403. skb = hfi1_vnic_get_skb(rxq);
  404. if (unlikely(!skb))
  405. break;
  406. rc = hfi1_vnic_decap_skb(rxq, skb);
  407. /* update rx counters */
  408. hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
  409. if (unlikely(rc)) {
  410. dev_kfree_skb_any(skb);
  411. continue;
  412. }
  413. skb_checksum_none_assert(skb);
  414. skb->protocol = eth_type_trans(skb, rxq->netdev);
  415. napi_gro_receive(&rxq->napi, skb);
  416. (*work_done)++;
  417. }
  418. }
  419. /* hfi1_vnic_napi - napi receive polling callback function */
  420. static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
  421. {
  422. struct hfi1_vnic_rx_queue *rxq = container_of(napi,
  423. struct hfi1_vnic_rx_queue, napi);
  424. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  425. int work_done = 0;
  426. v_dbg("napi %d budget %d\n", rxq->idx, budget);
  427. hfi1_vnic_handle_rx(rxq, &work_done, budget);
  428. v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
  429. if (work_done < budget)
  430. napi_complete(napi);
  431. return work_done;
  432. }
  433. void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
  434. {
  435. struct hfi1_devdata *dd = packet->rcd->dd;
  436. struct hfi1_vnic_vport_info *vinfo = NULL;
  437. struct hfi1_vnic_rx_queue *rxq;
  438. struct sk_buff *skb;
  439. int l4_type, vesw_id = -1;
  440. u8 q_idx;
  441. l4_type = hfi1_16B_get_l4(packet->ebuf);
  442. if (likely(l4_type == OPA_16B_L4_ETHR)) {
  443. vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
  444. vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
  445. /*
  446. * In case of invalid vesw id, count the error on
  447. * the first available vport.
  448. */
  449. if (unlikely(!vinfo)) {
  450. struct hfi1_vnic_vport_info *vinfo_tmp;
  451. int id_tmp = 0;
  452. vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
  453. if (vinfo_tmp) {
  454. spin_lock(&vport_cntr_lock);
  455. vinfo_tmp->stats[0].netstats.rx_nohandler++;
  456. spin_unlock(&vport_cntr_lock);
  457. }
  458. }
  459. }
  460. if (unlikely(!vinfo)) {
  461. dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
  462. l4_type, vesw_id, packet->rcd->ctxt);
  463. return;
  464. }
  465. q_idx = packet->rcd->vnic_q_idx;
  466. rxq = &vinfo->rxq[q_idx];
  467. if (unlikely(!netif_oper_up(vinfo->netdev))) {
  468. vinfo->stats[q_idx].rx_drop_state++;
  469. skb_queue_purge(&rxq->skbq);
  470. return;
  471. }
  472. if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
  473. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  474. return;
  475. }
  476. skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
  477. if (unlikely(!skb)) {
  478. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  479. return;
  480. }
  481. memcpy(skb->data, packet->ebuf, packet->tlen);
  482. skb_put(skb, packet->tlen);
  483. skb_queue_tail(&rxq->skbq, skb);
  484. if (napi_schedule_prep(&rxq->napi)) {
  485. v_dbg("napi %d scheduling\n", q_idx);
  486. __napi_schedule(&rxq->napi);
  487. }
  488. }
  489. static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
  490. {
  491. struct hfi1_devdata *dd = vinfo->dd;
  492. struct net_device *netdev = vinfo->netdev;
  493. int i, rc;
  494. /* ensure virtual eth switch id is valid */
  495. if (!vinfo->vesw_id)
  496. return -EINVAL;
  497. rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
  498. vinfo->vesw_id + 1, GFP_NOWAIT);
  499. if (rc < 0)
  500. return rc;
  501. for (i = 0; i < vinfo->num_rx_q; i++) {
  502. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  503. skb_queue_head_init(&rxq->skbq);
  504. napi_enable(&rxq->napi);
  505. }
  506. netif_carrier_on(netdev);
  507. netif_tx_start_all_queues(netdev);
  508. set_bit(HFI1_VNIC_UP, &vinfo->flags);
  509. return 0;
  510. }
  511. static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
  512. {
  513. struct hfi1_devdata *dd = vinfo->dd;
  514. u8 i;
  515. clear_bit(HFI1_VNIC_UP, &vinfo->flags);
  516. netif_carrier_off(vinfo->netdev);
  517. netif_tx_disable(vinfo->netdev);
  518. idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
  519. /* ensure irqs see the change */
  520. hfi1_vnic_synchronize_irq(dd);
  521. /* remove unread skbs */
  522. for (i = 0; i < vinfo->num_rx_q; i++) {
  523. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  524. napi_disable(&rxq->napi);
  525. skb_queue_purge(&rxq->skbq);
  526. }
  527. }
  528. static int hfi1_netdev_open(struct net_device *netdev)
  529. {
  530. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  531. int rc;
  532. mutex_lock(&vinfo->lock);
  533. rc = hfi1_vnic_up(vinfo);
  534. mutex_unlock(&vinfo->lock);
  535. return rc;
  536. }
  537. static int hfi1_netdev_close(struct net_device *netdev)
  538. {
  539. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  540. mutex_lock(&vinfo->lock);
  541. if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
  542. hfi1_vnic_down(vinfo);
  543. mutex_unlock(&vinfo->lock);
  544. return 0;
  545. }
  546. static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
  547. struct hfi1_ctxtdata **vnic_ctxt)
  548. {
  549. int rc;
  550. rc = allocate_vnic_ctxt(dd, vnic_ctxt);
  551. if (rc) {
  552. dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
  553. return rc;
  554. }
  555. rc = setup_vnic_ctxt(dd, *vnic_ctxt);
  556. if (rc) {
  557. dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
  558. deallocate_vnic_ctxt(dd, *vnic_ctxt);
  559. *vnic_ctxt = NULL;
  560. }
  561. return rc;
  562. }
  563. static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
  564. {
  565. struct hfi1_devdata *dd = vinfo->dd;
  566. int i, rc = 0;
  567. mutex_lock(&hfi1_mutex);
  568. if (!dd->vnic.num_vports) {
  569. rc = hfi1_vnic_txreq_init(dd);
  570. if (rc)
  571. goto txreq_fail;
  572. dd->vnic.msix_idx = dd->first_dyn_msix_idx;
  573. }
  574. for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
  575. rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
  576. if (rc)
  577. break;
  578. hfi1_rcd_get(dd->vnic.ctxt[i]);
  579. dd->vnic.ctxt[i]->vnic_q_idx = i;
  580. }
  581. if (i < vinfo->num_rx_q) {
  582. /*
  583. * If required amount of contexts is not
  584. * allocated successfully then remaining contexts
  585. * are released.
  586. */
  587. while (i-- > dd->vnic.num_ctxt) {
  588. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  589. hfi1_rcd_put(dd->vnic.ctxt[i]);
  590. dd->vnic.ctxt[i] = NULL;
  591. }
  592. goto alloc_fail;
  593. }
  594. if (dd->vnic.num_ctxt != i) {
  595. dd->vnic.num_ctxt = i;
  596. hfi1_init_vnic_rsm(dd);
  597. }
  598. dd->vnic.num_vports++;
  599. hfi1_vnic_sdma_init(vinfo);
  600. alloc_fail:
  601. if (!dd->vnic.num_vports)
  602. hfi1_vnic_txreq_deinit(dd);
  603. txreq_fail:
  604. mutex_unlock(&hfi1_mutex);
  605. return rc;
  606. }
  607. static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
  608. {
  609. struct hfi1_devdata *dd = vinfo->dd;
  610. int i;
  611. mutex_lock(&hfi1_mutex);
  612. if (--dd->vnic.num_vports == 0) {
  613. for (i = 0; i < dd->vnic.num_ctxt; i++) {
  614. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  615. hfi1_rcd_put(dd->vnic.ctxt[i]);
  616. dd->vnic.ctxt[i] = NULL;
  617. }
  618. hfi1_deinit_vnic_rsm(dd);
  619. dd->vnic.num_ctxt = 0;
  620. hfi1_vnic_txreq_deinit(dd);
  621. }
  622. mutex_unlock(&hfi1_mutex);
  623. }
  624. static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
  625. {
  626. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  627. bool reopen = false;
  628. /*
  629. * If vesw_id is being changed, and if the vnic port is up,
  630. * reset the vnic port to ensure new vesw_id gets picked up
  631. */
  632. if (id != vinfo->vesw_id) {
  633. mutex_lock(&vinfo->lock);
  634. if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
  635. hfi1_vnic_down(vinfo);
  636. reopen = true;
  637. }
  638. vinfo->vesw_id = id;
  639. if (reopen)
  640. hfi1_vnic_up(vinfo);
  641. mutex_unlock(&vinfo->lock);
  642. }
  643. }
  644. /* netdev ops */
  645. static const struct net_device_ops hfi1_netdev_ops = {
  646. .ndo_open = hfi1_netdev_open,
  647. .ndo_stop = hfi1_netdev_close,
  648. .ndo_start_xmit = hfi1_netdev_start_xmit,
  649. .ndo_select_queue = hfi1_vnic_select_queue,
  650. .ndo_get_stats64 = hfi1_vnic_get_stats64,
  651. };
  652. static void hfi1_vnic_free_rn(struct net_device *netdev)
  653. {
  654. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  655. hfi1_vnic_deinit(vinfo);
  656. mutex_destroy(&vinfo->lock);
  657. free_netdev(netdev);
  658. }
  659. struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
  660. u8 port_num,
  661. enum rdma_netdev_t type,
  662. const char *name,
  663. unsigned char name_assign_type,
  664. void (*setup)(struct net_device *))
  665. {
  666. struct hfi1_devdata *dd = dd_from_ibdev(device);
  667. struct hfi1_vnic_vport_info *vinfo;
  668. struct net_device *netdev;
  669. struct rdma_netdev *rn;
  670. int i, size, rc;
  671. if (!dd->num_vnic_contexts)
  672. return ERR_PTR(-ENOMEM);
  673. if (!port_num || (port_num > dd->num_pports))
  674. return ERR_PTR(-EINVAL);
  675. if (type != RDMA_NETDEV_OPA_VNIC)
  676. return ERR_PTR(-EOPNOTSUPP);
  677. size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
  678. netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
  679. dd->chip_sdma_engines, dd->num_vnic_contexts);
  680. if (!netdev)
  681. return ERR_PTR(-ENOMEM);
  682. rn = netdev_priv(netdev);
  683. vinfo = opa_vnic_dev_priv(netdev);
  684. vinfo->dd = dd;
  685. vinfo->num_tx_q = dd->chip_sdma_engines;
  686. vinfo->num_rx_q = dd->num_vnic_contexts;
  687. vinfo->netdev = netdev;
  688. rn->free_rdma_netdev = hfi1_vnic_free_rn;
  689. rn->set_id = hfi1_vnic_set_vesw_id;
  690. netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
  691. netdev->hw_features = netdev->features;
  692. netdev->vlan_features = netdev->features;
  693. netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
  694. netdev->netdev_ops = &hfi1_netdev_ops;
  695. mutex_init(&vinfo->lock);
  696. for (i = 0; i < vinfo->num_rx_q; i++) {
  697. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  698. rxq->idx = i;
  699. rxq->vinfo = vinfo;
  700. rxq->netdev = netdev;
  701. netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
  702. }
  703. rc = hfi1_vnic_init(vinfo);
  704. if (rc)
  705. goto init_fail;
  706. return netdev;
  707. init_fail:
  708. mutex_destroy(&vinfo->lock);
  709. free_netdev(netdev);
  710. return ERR_PTR(rc);
  711. }