vnic_main.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889
  1. /*
  2. * Copyright(c) 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. /*
  48. * This file contains HFI1 support for VNIC functionality
  49. */
  50. #include <linux/io.h>
  51. #include <linux/if_vlan.h>
  52. #include "vnic.h"
  53. #define HFI_TX_TIMEOUT_MS 1000
  54. #define HFI1_VNIC_RCV_Q_SIZE 1024
  55. #define HFI1_VNIC_UP 0
  56. static DEFINE_SPINLOCK(vport_cntr_lock);
  57. static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  58. {
  59. unsigned int rcvctrl_ops = 0;
  60. int ret;
  61. hfi1_init_ctxt(uctxt->sc);
  62. uctxt->do_interrupt = &handle_receive_interrupt;
  63. /* Now allocate the RcvHdr queue and eager buffers. */
  64. ret = hfi1_create_rcvhdrq(dd, uctxt);
  65. if (ret)
  66. goto done;
  67. ret = hfi1_setup_eagerbufs(uctxt);
  68. if (ret)
  69. goto done;
  70. if (uctxt->rcvhdrtail_kvaddr)
  71. clear_rcvhdrtail(uctxt);
  72. rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  73. rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  74. if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  75. rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  76. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  77. rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  78. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  79. rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  80. if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  81. rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  82. hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
  83. uctxt->is_vnic = true;
  84. done:
  85. return ret;
  86. }
  87. static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
  88. struct hfi1_ctxtdata **vnic_ctxt)
  89. {
  90. struct hfi1_ctxtdata *uctxt;
  91. int ret;
  92. if (dd->flags & HFI1_FROZEN)
  93. return -EIO;
  94. ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
  95. if (ret < 0) {
  96. dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
  97. return -ENOMEM;
  98. }
  99. uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
  100. HFI1_CAP_KGET(NODROP_RHQ_FULL) |
  101. HFI1_CAP_KGET(NODROP_EGR_FULL) |
  102. HFI1_CAP_KGET(DMA_RTAIL);
  103. uctxt->seq_cnt = 1;
  104. /* Allocate and enable a PIO send context */
  105. uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
  106. uctxt->numa_id);
  107. ret = uctxt->sc ? 0 : -ENOMEM;
  108. if (ret)
  109. goto bail;
  110. dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
  111. uctxt->sc->sw_index, uctxt->sc->hw_context);
  112. ret = sc_enable(uctxt->sc);
  113. if (ret)
  114. goto bail;
  115. if (dd->num_msix_entries)
  116. hfi1_set_vnic_msix_info(uctxt);
  117. hfi1_stats.sps_ctxts++;
  118. dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
  119. *vnic_ctxt = uctxt;
  120. return ret;
  121. bail:
  122. hfi1_free_ctxt(uctxt);
  123. dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
  124. return ret;
  125. }
  126. static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
  127. struct hfi1_ctxtdata *uctxt)
  128. {
  129. dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
  130. flush_wc();
  131. if (dd->num_msix_entries)
  132. hfi1_reset_vnic_msix_info(uctxt);
  133. /*
  134. * Disable receive context and interrupt available, reset all
  135. * RcvCtxtCtrl bits to default values.
  136. */
  137. hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
  138. HFI1_RCVCTRL_TIDFLOW_DIS |
  139. HFI1_RCVCTRL_INTRAVAIL_DIS |
  140. HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
  141. HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
  142. HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
  143. /*
  144. * VNIC contexts are allocated from user context pool.
  145. * Release them back to user context pool.
  146. *
  147. * Reset context integrity checks to default.
  148. * (writes to CSRs probably belong in chip.c)
  149. */
  150. write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
  151. hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
  152. sc_disable(uctxt->sc);
  153. dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
  154. uctxt->event_flags = 0;
  155. hfi1_clear_tids(uctxt);
  156. hfi1_clear_ctxt_pkey(dd, uctxt);
  157. hfi1_stats.sps_ctxts--;
  158. hfi1_free_ctxt(uctxt);
  159. }
  160. void hfi1_vnic_setup(struct hfi1_devdata *dd)
  161. {
  162. idr_init(&dd->vnic.vesw_idr);
  163. }
  164. void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
  165. {
  166. idr_destroy(&dd->vnic.vesw_idr);
  167. }
  168. #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
  169. u64 *src64, *dst64; \
  170. for (src64 = &qstats->x_grp.unicast, \
  171. dst64 = &stats->x_grp.unicast; \
  172. dst64 <= &stats->x_grp.s_1519_max;) { \
  173. *dst64++ += *src64++; \
  174. } \
  175. } while (0)
  176. /* hfi1_vnic_update_stats - update statistics */
  177. static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
  178. struct opa_vnic_stats *stats)
  179. {
  180. struct net_device *netdev = vinfo->netdev;
  181. u8 i;
  182. /* add tx counters on different queues */
  183. for (i = 0; i < vinfo->num_tx_q; i++) {
  184. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  185. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  186. stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
  187. stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
  188. stats->tx_drop_state += qstats->tx_drop_state;
  189. stats->tx_dlid_zero += qstats->tx_dlid_zero;
  190. SUM_GRP_COUNTERS(stats, qstats, tx_grp);
  191. stats->netstats.tx_packets += qnstats->tx_packets;
  192. stats->netstats.tx_bytes += qnstats->tx_bytes;
  193. }
  194. /* add rx counters on different queues */
  195. for (i = 0; i < vinfo->num_rx_q; i++) {
  196. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  197. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  198. stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
  199. stats->netstats.rx_nohandler += qnstats->rx_nohandler;
  200. stats->rx_drop_state += qstats->rx_drop_state;
  201. stats->rx_oversize += qstats->rx_oversize;
  202. stats->rx_runt += qstats->rx_runt;
  203. SUM_GRP_COUNTERS(stats, qstats, rx_grp);
  204. stats->netstats.rx_packets += qnstats->rx_packets;
  205. stats->netstats.rx_bytes += qnstats->rx_bytes;
  206. }
  207. stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
  208. stats->netstats.tx_carrier_errors +
  209. stats->tx_drop_state + stats->tx_dlid_zero;
  210. stats->netstats.tx_dropped = stats->netstats.tx_errors;
  211. stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
  212. stats->netstats.rx_nohandler +
  213. stats->rx_drop_state + stats->rx_oversize +
  214. stats->rx_runt;
  215. stats->netstats.rx_dropped = stats->netstats.rx_errors;
  216. netdev->stats.tx_packets = stats->netstats.tx_packets;
  217. netdev->stats.tx_bytes = stats->netstats.tx_bytes;
  218. netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
  219. netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
  220. netdev->stats.tx_errors = stats->netstats.tx_errors;
  221. netdev->stats.tx_dropped = stats->netstats.tx_dropped;
  222. netdev->stats.rx_packets = stats->netstats.rx_packets;
  223. netdev->stats.rx_bytes = stats->netstats.rx_bytes;
  224. netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
  225. netdev->stats.multicast = stats->rx_grp.mcastbcast;
  226. netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
  227. netdev->stats.rx_errors = stats->netstats.rx_errors;
  228. netdev->stats.rx_dropped = stats->netstats.rx_dropped;
  229. }
  230. /* update_len_counters - update pkt's len histogram counters */
  231. static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
  232. int len)
  233. {
  234. /* account for 4 byte FCS */
  235. if (len >= 1515)
  236. grp->s_1519_max++;
  237. else if (len >= 1020)
  238. grp->s_1024_1518++;
  239. else if (len >= 508)
  240. grp->s_512_1023++;
  241. else if (len >= 252)
  242. grp->s_256_511++;
  243. else if (len >= 124)
  244. grp->s_128_255++;
  245. else if (len >= 61)
  246. grp->s_65_127++;
  247. else
  248. grp->s_64++;
  249. }
  250. /* hfi1_vnic_update_tx_counters - update transmit counters */
  251. static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
  252. u8 q_idx, struct sk_buff *skb, int err)
  253. {
  254. struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
  255. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  256. struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
  257. u16 vlan_tci;
  258. stats->netstats.tx_packets++;
  259. stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
  260. update_len_counters(tx_grp, skb->len);
  261. /* rest of the counts are for good packets only */
  262. if (unlikely(err))
  263. return;
  264. if (is_multicast_ether_addr(mac_hdr->h_dest))
  265. tx_grp->mcastbcast++;
  266. else
  267. tx_grp->unicast++;
  268. if (!__vlan_get_tag(skb, &vlan_tci))
  269. tx_grp->vlan++;
  270. else
  271. tx_grp->untagged++;
  272. }
  273. /* hfi1_vnic_update_rx_counters - update receive counters */
  274. static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
  275. u8 q_idx, struct sk_buff *skb, int err)
  276. {
  277. struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
  278. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  279. struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
  280. u16 vlan_tci;
  281. stats->netstats.rx_packets++;
  282. stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
  283. update_len_counters(rx_grp, skb->len);
  284. /* rest of the counts are for good packets only */
  285. if (unlikely(err))
  286. return;
  287. if (is_multicast_ether_addr(mac_hdr->h_dest))
  288. rx_grp->mcastbcast++;
  289. else
  290. rx_grp->unicast++;
  291. if (!__vlan_get_tag(skb, &vlan_tci))
  292. rx_grp->vlan++;
  293. else
  294. rx_grp->untagged++;
  295. }
  296. /* This function is overloaded for opa_vnic specific implementation */
  297. static void hfi1_vnic_get_stats64(struct net_device *netdev,
  298. struct rtnl_link_stats64 *stats)
  299. {
  300. struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
  301. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  302. hfi1_vnic_update_stats(vinfo, vstats);
  303. }
  304. static u64 create_bypass_pbc(u32 vl, u32 dw_len)
  305. {
  306. u64 pbc;
  307. pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
  308. | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
  309. | PBC_PACKET_BYPASS
  310. | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
  311. | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
  312. return pbc;
  313. }
  314. /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
  315. static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
  316. u8 q_idx)
  317. {
  318. netif_stop_subqueue(vinfo->netdev, q_idx);
  319. if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
  320. return;
  321. netif_start_subqueue(vinfo->netdev, q_idx);
  322. }
  323. static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
  324. struct net_device *netdev)
  325. {
  326. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  327. u8 pad_len, q_idx = skb->queue_mapping;
  328. struct hfi1_devdata *dd = vinfo->dd;
  329. struct opa_vnic_skb_mdata *mdata;
  330. u32 pkt_len, total_len;
  331. int err = -EINVAL;
  332. u64 pbc;
  333. v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
  334. if (unlikely(!netif_oper_up(netdev))) {
  335. vinfo->stats[q_idx].tx_drop_state++;
  336. goto tx_finish;
  337. }
  338. /* take out meta data */
  339. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  340. skb_pull(skb, sizeof(*mdata));
  341. if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
  342. vinfo->stats[q_idx].tx_dlid_zero++;
  343. goto tx_finish;
  344. }
  345. /* add tail padding (for 8 bytes size alignment) and icrc */
  346. pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
  347. pad_len += OPA_VNIC_ICRC_TAIL_LEN;
  348. /*
  349. * pkt_len is how much data we have to write, includes header and data.
  350. * total_len is length of the packet in Dwords plus the PBC should not
  351. * include the CRC.
  352. */
  353. pkt_len = (skb->len + pad_len) >> 2;
  354. total_len = pkt_len + 2; /* PBC + packet */
  355. pbc = create_bypass_pbc(mdata->vl, total_len);
  356. skb_get(skb);
  357. v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
  358. err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
  359. if (unlikely(err)) {
  360. if (err == -ENOMEM)
  361. vinfo->stats[q_idx].netstats.tx_fifo_errors++;
  362. else if (err != -EBUSY)
  363. vinfo->stats[q_idx].netstats.tx_carrier_errors++;
  364. }
  365. /* remove the header before updating tx counters */
  366. skb_pull(skb, OPA_VNIC_HDR_LEN);
  367. if (unlikely(err == -EBUSY)) {
  368. hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
  369. dev_kfree_skb_any(skb);
  370. return NETDEV_TX_BUSY;
  371. }
  372. tx_finish:
  373. /* update tx counters */
  374. hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
  375. dev_kfree_skb_any(skb);
  376. return NETDEV_TX_OK;
  377. }
  378. static u16 hfi1_vnic_select_queue(struct net_device *netdev,
  379. struct sk_buff *skb,
  380. void *accel_priv,
  381. select_queue_fallback_t fallback)
  382. {
  383. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  384. struct opa_vnic_skb_mdata *mdata;
  385. struct sdma_engine *sde;
  386. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  387. sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
  388. return sde->this_idx;
  389. }
  390. /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
  391. static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
  392. struct sk_buff *skb)
  393. {
  394. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  395. int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
  396. int rc = -EFAULT;
  397. skb_pull(skb, OPA_VNIC_HDR_LEN);
  398. /* Validate Packet length */
  399. if (unlikely(skb->len > max_len))
  400. vinfo->stats[rxq->idx].rx_oversize++;
  401. else if (unlikely(skb->len < ETH_ZLEN))
  402. vinfo->stats[rxq->idx].rx_runt++;
  403. else
  404. rc = 0;
  405. return rc;
  406. }
  407. static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
  408. {
  409. unsigned char *pad_info;
  410. struct sk_buff *skb;
  411. skb = skb_dequeue(&rxq->skbq);
  412. if (unlikely(!skb))
  413. return NULL;
  414. /* remove tail padding and icrc */
  415. pad_info = skb->data + skb->len - 1;
  416. skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
  417. ((*pad_info) & 0x7)));
  418. return skb;
  419. }
  420. /* hfi1_vnic_handle_rx - handle skb receive */
  421. static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
  422. int *work_done, int work_to_do)
  423. {
  424. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  425. struct sk_buff *skb;
  426. int rc;
  427. while (1) {
  428. if (*work_done >= work_to_do)
  429. break;
  430. skb = hfi1_vnic_get_skb(rxq);
  431. if (unlikely(!skb))
  432. break;
  433. rc = hfi1_vnic_decap_skb(rxq, skb);
  434. /* update rx counters */
  435. hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
  436. if (unlikely(rc)) {
  437. dev_kfree_skb_any(skb);
  438. continue;
  439. }
  440. skb_checksum_none_assert(skb);
  441. skb->protocol = eth_type_trans(skb, rxq->netdev);
  442. napi_gro_receive(&rxq->napi, skb);
  443. (*work_done)++;
  444. }
  445. }
  446. /* hfi1_vnic_napi - napi receive polling callback function */
  447. static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
  448. {
  449. struct hfi1_vnic_rx_queue *rxq = container_of(napi,
  450. struct hfi1_vnic_rx_queue, napi);
  451. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  452. int work_done = 0;
  453. v_dbg("napi %d budget %d\n", rxq->idx, budget);
  454. hfi1_vnic_handle_rx(rxq, &work_done, budget);
  455. v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
  456. if (work_done < budget)
  457. napi_complete(napi);
  458. return work_done;
  459. }
  460. void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
  461. {
  462. struct hfi1_devdata *dd = packet->rcd->dd;
  463. struct hfi1_vnic_vport_info *vinfo = NULL;
  464. struct hfi1_vnic_rx_queue *rxq;
  465. struct sk_buff *skb;
  466. int l4_type, vesw_id = -1;
  467. u8 q_idx;
  468. l4_type = hfi1_16B_get_l4(packet->ebuf);
  469. if (likely(l4_type == OPA_16B_L4_ETHR)) {
  470. vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
  471. vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
  472. /*
  473. * In case of invalid vesw id, count the error on
  474. * the first available vport.
  475. */
  476. if (unlikely(!vinfo)) {
  477. struct hfi1_vnic_vport_info *vinfo_tmp;
  478. int id_tmp = 0;
  479. vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
  480. if (vinfo_tmp) {
  481. spin_lock(&vport_cntr_lock);
  482. vinfo_tmp->stats[0].netstats.rx_nohandler++;
  483. spin_unlock(&vport_cntr_lock);
  484. }
  485. }
  486. }
  487. if (unlikely(!vinfo)) {
  488. dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
  489. l4_type, vesw_id, packet->rcd->ctxt);
  490. return;
  491. }
  492. q_idx = packet->rcd->vnic_q_idx;
  493. rxq = &vinfo->rxq[q_idx];
  494. if (unlikely(!netif_oper_up(vinfo->netdev))) {
  495. vinfo->stats[q_idx].rx_drop_state++;
  496. skb_queue_purge(&rxq->skbq);
  497. return;
  498. }
  499. if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
  500. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  501. return;
  502. }
  503. skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
  504. if (unlikely(!skb)) {
  505. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  506. return;
  507. }
  508. memcpy(skb->data, packet->ebuf, packet->tlen);
  509. skb_put(skb, packet->tlen);
  510. skb_queue_tail(&rxq->skbq, skb);
  511. if (napi_schedule_prep(&rxq->napi)) {
  512. v_dbg("napi %d scheduling\n", q_idx);
  513. __napi_schedule(&rxq->napi);
  514. }
  515. }
  516. static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
  517. {
  518. struct hfi1_devdata *dd = vinfo->dd;
  519. struct net_device *netdev = vinfo->netdev;
  520. int i, rc;
  521. /* ensure virtual eth switch id is valid */
  522. if (!vinfo->vesw_id)
  523. return -EINVAL;
  524. rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
  525. vinfo->vesw_id + 1, GFP_NOWAIT);
  526. if (rc < 0)
  527. return rc;
  528. for (i = 0; i < vinfo->num_rx_q; i++) {
  529. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  530. skb_queue_head_init(&rxq->skbq);
  531. napi_enable(&rxq->napi);
  532. }
  533. netif_carrier_on(netdev);
  534. netif_tx_start_all_queues(netdev);
  535. set_bit(HFI1_VNIC_UP, &vinfo->flags);
  536. return 0;
  537. }
  538. static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
  539. {
  540. struct hfi1_devdata *dd = vinfo->dd;
  541. u8 i;
  542. clear_bit(HFI1_VNIC_UP, &vinfo->flags);
  543. netif_carrier_off(vinfo->netdev);
  544. netif_tx_disable(vinfo->netdev);
  545. idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
  546. /* ensure irqs see the change */
  547. hfi1_vnic_synchronize_irq(dd);
  548. /* remove unread skbs */
  549. for (i = 0; i < vinfo->num_rx_q; i++) {
  550. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  551. napi_disable(&rxq->napi);
  552. skb_queue_purge(&rxq->skbq);
  553. }
  554. }
  555. static int hfi1_netdev_open(struct net_device *netdev)
  556. {
  557. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  558. int rc;
  559. mutex_lock(&vinfo->lock);
  560. rc = hfi1_vnic_up(vinfo);
  561. mutex_unlock(&vinfo->lock);
  562. return rc;
  563. }
  564. static int hfi1_netdev_close(struct net_device *netdev)
  565. {
  566. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  567. mutex_lock(&vinfo->lock);
  568. if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
  569. hfi1_vnic_down(vinfo);
  570. mutex_unlock(&vinfo->lock);
  571. return 0;
  572. }
  573. static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
  574. struct hfi1_ctxtdata **vnic_ctxt)
  575. {
  576. int rc;
  577. rc = allocate_vnic_ctxt(dd, vnic_ctxt);
  578. if (rc) {
  579. dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
  580. return rc;
  581. }
  582. rc = setup_vnic_ctxt(dd, *vnic_ctxt);
  583. if (rc) {
  584. dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
  585. deallocate_vnic_ctxt(dd, *vnic_ctxt);
  586. *vnic_ctxt = NULL;
  587. }
  588. return rc;
  589. }
  590. static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
  591. {
  592. struct hfi1_devdata *dd = vinfo->dd;
  593. int i, rc = 0;
  594. mutex_lock(&hfi1_mutex);
  595. if (!dd->vnic.num_vports) {
  596. rc = hfi1_vnic_txreq_init(dd);
  597. if (rc)
  598. goto txreq_fail;
  599. dd->vnic.msix_idx = dd->first_dyn_msix_idx;
  600. }
  601. for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
  602. rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
  603. if (rc)
  604. break;
  605. hfi1_rcd_get(dd->vnic.ctxt[i]);
  606. dd->vnic.ctxt[i]->vnic_q_idx = i;
  607. }
  608. if (i < vinfo->num_rx_q) {
  609. /*
  610. * If required amount of contexts is not
  611. * allocated successfully then remaining contexts
  612. * are released.
  613. */
  614. while (i-- > dd->vnic.num_ctxt) {
  615. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  616. hfi1_rcd_put(dd->vnic.ctxt[i]);
  617. dd->vnic.ctxt[i] = NULL;
  618. }
  619. goto alloc_fail;
  620. }
  621. if (dd->vnic.num_ctxt != i) {
  622. dd->vnic.num_ctxt = i;
  623. hfi1_init_vnic_rsm(dd);
  624. }
  625. dd->vnic.num_vports++;
  626. hfi1_vnic_sdma_init(vinfo);
  627. alloc_fail:
  628. if (!dd->vnic.num_vports)
  629. hfi1_vnic_txreq_deinit(dd);
  630. txreq_fail:
  631. mutex_unlock(&hfi1_mutex);
  632. return rc;
  633. }
  634. static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
  635. {
  636. struct hfi1_devdata *dd = vinfo->dd;
  637. int i;
  638. mutex_lock(&hfi1_mutex);
  639. if (--dd->vnic.num_vports == 0) {
  640. for (i = 0; i < dd->vnic.num_ctxt; i++) {
  641. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  642. hfi1_rcd_put(dd->vnic.ctxt[i]);
  643. dd->vnic.ctxt[i] = NULL;
  644. }
  645. hfi1_deinit_vnic_rsm(dd);
  646. dd->vnic.num_ctxt = 0;
  647. hfi1_vnic_txreq_deinit(dd);
  648. }
  649. mutex_unlock(&hfi1_mutex);
  650. }
  651. static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
  652. {
  653. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  654. bool reopen = false;
  655. /*
  656. * If vesw_id is being changed, and if the vnic port is up,
  657. * reset the vnic port to ensure new vesw_id gets picked up
  658. */
  659. if (id != vinfo->vesw_id) {
  660. mutex_lock(&vinfo->lock);
  661. if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
  662. hfi1_vnic_down(vinfo);
  663. reopen = true;
  664. }
  665. vinfo->vesw_id = id;
  666. if (reopen)
  667. hfi1_vnic_up(vinfo);
  668. mutex_unlock(&vinfo->lock);
  669. }
  670. }
  671. /* netdev ops */
  672. static const struct net_device_ops hfi1_netdev_ops = {
  673. .ndo_open = hfi1_netdev_open,
  674. .ndo_stop = hfi1_netdev_close,
  675. .ndo_start_xmit = hfi1_netdev_start_xmit,
  676. .ndo_select_queue = hfi1_vnic_select_queue,
  677. .ndo_get_stats64 = hfi1_vnic_get_stats64,
  678. };
  679. static void hfi1_vnic_free_rn(struct net_device *netdev)
  680. {
  681. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  682. hfi1_vnic_deinit(vinfo);
  683. mutex_destroy(&vinfo->lock);
  684. free_netdev(netdev);
  685. }
  686. struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
  687. u8 port_num,
  688. enum rdma_netdev_t type,
  689. const char *name,
  690. unsigned char name_assign_type,
  691. void (*setup)(struct net_device *))
  692. {
  693. struct hfi1_devdata *dd = dd_from_ibdev(device);
  694. struct hfi1_vnic_vport_info *vinfo;
  695. struct net_device *netdev;
  696. struct rdma_netdev *rn;
  697. int i, size, rc;
  698. if (!port_num || (port_num > dd->num_pports))
  699. return ERR_PTR(-EINVAL);
  700. if (type != RDMA_NETDEV_OPA_VNIC)
  701. return ERR_PTR(-EOPNOTSUPP);
  702. size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
  703. netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
  704. dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
  705. if (!netdev)
  706. return ERR_PTR(-ENOMEM);
  707. rn = netdev_priv(netdev);
  708. vinfo = opa_vnic_dev_priv(netdev);
  709. vinfo->dd = dd;
  710. vinfo->num_tx_q = dd->chip_sdma_engines;
  711. vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
  712. vinfo->netdev = netdev;
  713. rn->free_rdma_netdev = hfi1_vnic_free_rn;
  714. rn->set_id = hfi1_vnic_set_vesw_id;
  715. netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
  716. netdev->hw_features = netdev->features;
  717. netdev->vlan_features = netdev->features;
  718. netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
  719. netdev->netdev_ops = &hfi1_netdev_ops;
  720. mutex_init(&vinfo->lock);
  721. for (i = 0; i < vinfo->num_rx_q; i++) {
  722. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  723. rxq->idx = i;
  724. rxq->vinfo = vinfo;
  725. rxq->netdev = netdev;
  726. netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
  727. }
  728. rc = hfi1_vnic_init(vinfo);
  729. if (rc)
  730. goto init_fail;
  731. return netdev;
  732. init_fail:
  733. mutex_destroy(&vinfo->lock);
  734. free_netdev(netdev);
  735. return ERR_PTR(rc);
  736. }