vnic_main.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. /*
  2. * Copyright(c) 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. /*
  48. * This file contains HFI1 support for VNIC functionality
  49. */
  50. #include <linux/io.h>
  51. #include <linux/if_vlan.h>
  52. #include "vnic.h"
  53. #define HFI_TX_TIMEOUT_MS 1000
  54. #define HFI1_VNIC_RCV_Q_SIZE 1024
  55. #define HFI1_VNIC_UP 0
  56. static DEFINE_SPINLOCK(vport_cntr_lock);
  57. static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  58. {
  59. unsigned int rcvctrl_ops = 0;
  60. int ret;
  61. hfi1_init_ctxt(uctxt->sc);
  62. uctxt->do_interrupt = &handle_receive_interrupt;
  63. /* Now allocate the RcvHdr queue and eager buffers. */
  64. ret = hfi1_create_rcvhdrq(dd, uctxt);
  65. if (ret)
  66. goto done;
  67. ret = hfi1_setup_eagerbufs(uctxt);
  68. if (ret)
  69. goto done;
  70. if (uctxt->rcvhdrtail_kvaddr)
  71. clear_rcvhdrtail(uctxt);
  72. rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  73. rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  74. if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  75. rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  76. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  77. rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  78. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  79. rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  80. if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  81. rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  82. hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
  83. uctxt->is_vnic = true;
  84. done:
  85. return ret;
  86. }
  87. static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
  88. struct hfi1_ctxtdata **vnic_ctxt)
  89. {
  90. struct hfi1_ctxtdata *uctxt;
  91. unsigned int ctxt;
  92. int ret;
  93. if (dd->flags & HFI1_FROZEN)
  94. return -EIO;
  95. for (ctxt = dd->first_dyn_alloc_ctxt;
  96. ctxt < dd->num_rcv_contexts; ctxt++)
  97. if (!dd->rcd[ctxt])
  98. break;
  99. if (ctxt == dd->num_rcv_contexts)
  100. return -EBUSY;
  101. uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
  102. if (!uctxt) {
  103. dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
  104. return -ENOMEM;
  105. }
  106. uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
  107. HFI1_CAP_KGET(NODROP_RHQ_FULL) |
  108. HFI1_CAP_KGET(NODROP_EGR_FULL) |
  109. HFI1_CAP_KGET(DMA_RTAIL);
  110. uctxt->seq_cnt = 1;
  111. /* Allocate and enable a PIO send context */
  112. uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
  113. uctxt->numa_id);
  114. ret = uctxt->sc ? 0 : -ENOMEM;
  115. if (ret)
  116. goto bail;
  117. dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
  118. uctxt->sc->sw_index, uctxt->sc->hw_context);
  119. ret = sc_enable(uctxt->sc);
  120. if (ret)
  121. goto bail;
  122. if (dd->num_msix_entries)
  123. hfi1_set_vnic_msix_info(uctxt);
  124. hfi1_stats.sps_ctxts++;
  125. dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
  126. *vnic_ctxt = uctxt;
  127. return ret;
  128. bail:
  129. /*
  130. * hfi1_free_ctxtdata() also releases send_context
  131. * structure if uctxt->sc is not null
  132. */
  133. dd->rcd[uctxt->ctxt] = NULL;
  134. hfi1_free_ctxtdata(dd, uctxt);
  135. dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
  136. return ret;
  137. }
  138. static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
  139. struct hfi1_ctxtdata *uctxt)
  140. {
  141. unsigned long flags;
  142. dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
  143. flush_wc();
  144. if (dd->num_msix_entries)
  145. hfi1_reset_vnic_msix_info(uctxt);
  146. spin_lock_irqsave(&dd->uctxt_lock, flags);
  147. /*
  148. * Disable receive context and interrupt available, reset all
  149. * RcvCtxtCtrl bits to default values.
  150. */
  151. hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
  152. HFI1_RCVCTRL_TIDFLOW_DIS |
  153. HFI1_RCVCTRL_INTRAVAIL_DIS |
  154. HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
  155. HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
  156. HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
  157. /*
  158. * VNIC contexts are allocated from user context pool.
  159. * Release them back to user context pool.
  160. *
  161. * Reset context integrity checks to default.
  162. * (writes to CSRs probably belong in chip.c)
  163. */
  164. write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
  165. hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
  166. sc_disable(uctxt->sc);
  167. dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
  168. spin_unlock_irqrestore(&dd->uctxt_lock, flags);
  169. dd->rcd[uctxt->ctxt] = NULL;
  170. uctxt->event_flags = 0;
  171. hfi1_clear_tids(uctxt);
  172. hfi1_clear_ctxt_pkey(dd, uctxt);
  173. hfi1_stats.sps_ctxts--;
  174. hfi1_free_ctxtdata(dd, uctxt);
  175. }
  176. void hfi1_vnic_setup(struct hfi1_devdata *dd)
  177. {
  178. idr_init(&dd->vnic.vesw_idr);
  179. }
  180. void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
  181. {
  182. idr_destroy(&dd->vnic.vesw_idr);
  183. }
  184. #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
  185. u64 *src64, *dst64; \
  186. for (src64 = &qstats->x_grp.unicast, \
  187. dst64 = &stats->x_grp.unicast; \
  188. dst64 <= &stats->x_grp.s_1519_max;) { \
  189. *dst64++ += *src64++; \
  190. } \
  191. } while (0)
  192. /* hfi1_vnic_update_stats - update statistics */
  193. static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
  194. struct opa_vnic_stats *stats)
  195. {
  196. struct net_device *netdev = vinfo->netdev;
  197. u8 i;
  198. /* add tx counters on different queues */
  199. for (i = 0; i < vinfo->num_tx_q; i++) {
  200. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  201. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  202. stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
  203. stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
  204. stats->tx_drop_state += qstats->tx_drop_state;
  205. stats->tx_dlid_zero += qstats->tx_dlid_zero;
  206. SUM_GRP_COUNTERS(stats, qstats, tx_grp);
  207. stats->netstats.tx_packets += qnstats->tx_packets;
  208. stats->netstats.tx_bytes += qnstats->tx_bytes;
  209. }
  210. /* add rx counters on different queues */
  211. for (i = 0; i < vinfo->num_rx_q; i++) {
  212. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  213. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  214. stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
  215. stats->netstats.rx_nohandler += qnstats->rx_nohandler;
  216. stats->rx_drop_state += qstats->rx_drop_state;
  217. stats->rx_oversize += qstats->rx_oversize;
  218. stats->rx_runt += qstats->rx_runt;
  219. SUM_GRP_COUNTERS(stats, qstats, rx_grp);
  220. stats->netstats.rx_packets += qnstats->rx_packets;
  221. stats->netstats.rx_bytes += qnstats->rx_bytes;
  222. }
  223. stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
  224. stats->netstats.tx_carrier_errors +
  225. stats->tx_drop_state + stats->tx_dlid_zero;
  226. stats->netstats.tx_dropped = stats->netstats.tx_errors;
  227. stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
  228. stats->netstats.rx_nohandler +
  229. stats->rx_drop_state + stats->rx_oversize +
  230. stats->rx_runt;
  231. stats->netstats.rx_dropped = stats->netstats.rx_errors;
  232. netdev->stats.tx_packets = stats->netstats.tx_packets;
  233. netdev->stats.tx_bytes = stats->netstats.tx_bytes;
  234. netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
  235. netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
  236. netdev->stats.tx_errors = stats->netstats.tx_errors;
  237. netdev->stats.tx_dropped = stats->netstats.tx_dropped;
  238. netdev->stats.rx_packets = stats->netstats.rx_packets;
  239. netdev->stats.rx_bytes = stats->netstats.rx_bytes;
  240. netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
  241. netdev->stats.multicast = stats->rx_grp.mcastbcast;
  242. netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
  243. netdev->stats.rx_errors = stats->netstats.rx_errors;
  244. netdev->stats.rx_dropped = stats->netstats.rx_dropped;
  245. }
  246. /* update_len_counters - update pkt's len histogram counters */
  247. static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
  248. int len)
  249. {
  250. /* account for 4 byte FCS */
  251. if (len >= 1515)
  252. grp->s_1519_max++;
  253. else if (len >= 1020)
  254. grp->s_1024_1518++;
  255. else if (len >= 508)
  256. grp->s_512_1023++;
  257. else if (len >= 252)
  258. grp->s_256_511++;
  259. else if (len >= 124)
  260. grp->s_128_255++;
  261. else if (len >= 61)
  262. grp->s_65_127++;
  263. else
  264. grp->s_64++;
  265. }
  266. /* hfi1_vnic_update_tx_counters - update transmit counters */
  267. static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
  268. u8 q_idx, struct sk_buff *skb, int err)
  269. {
  270. struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
  271. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  272. struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
  273. u16 vlan_tci;
  274. stats->netstats.tx_packets++;
  275. stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
  276. update_len_counters(tx_grp, skb->len);
  277. /* rest of the counts are for good packets only */
  278. if (unlikely(err))
  279. return;
  280. if (is_multicast_ether_addr(mac_hdr->h_dest))
  281. tx_grp->mcastbcast++;
  282. else
  283. tx_grp->unicast++;
  284. if (!__vlan_get_tag(skb, &vlan_tci))
  285. tx_grp->vlan++;
  286. else
  287. tx_grp->untagged++;
  288. }
  289. /* hfi1_vnic_update_rx_counters - update receive counters */
  290. static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
  291. u8 q_idx, struct sk_buff *skb, int err)
  292. {
  293. struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
  294. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  295. struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
  296. u16 vlan_tci;
  297. stats->netstats.rx_packets++;
  298. stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
  299. update_len_counters(rx_grp, skb->len);
  300. /* rest of the counts are for good packets only */
  301. if (unlikely(err))
  302. return;
  303. if (is_multicast_ether_addr(mac_hdr->h_dest))
  304. rx_grp->mcastbcast++;
  305. else
  306. rx_grp->unicast++;
  307. if (!__vlan_get_tag(skb, &vlan_tci))
  308. rx_grp->vlan++;
  309. else
  310. rx_grp->untagged++;
  311. }
  312. /* This function is overloaded for opa_vnic specific implementation */
  313. static void hfi1_vnic_get_stats64(struct net_device *netdev,
  314. struct rtnl_link_stats64 *stats)
  315. {
  316. struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
  317. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  318. hfi1_vnic_update_stats(vinfo, vstats);
  319. }
  320. static u64 create_bypass_pbc(u32 vl, u32 dw_len)
  321. {
  322. u64 pbc;
  323. pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
  324. | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
  325. | PBC_PACKET_BYPASS
  326. | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
  327. | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
  328. return pbc;
  329. }
  330. /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
  331. static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
  332. u8 q_idx)
  333. {
  334. netif_stop_subqueue(vinfo->netdev, q_idx);
  335. if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
  336. return;
  337. netif_start_subqueue(vinfo->netdev, q_idx);
  338. }
  339. static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
  340. struct net_device *netdev)
  341. {
  342. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  343. u8 pad_len, q_idx = skb->queue_mapping;
  344. struct hfi1_devdata *dd = vinfo->dd;
  345. struct opa_vnic_skb_mdata *mdata;
  346. u32 pkt_len, total_len;
  347. int err = -EINVAL;
  348. u64 pbc;
  349. v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
  350. if (unlikely(!netif_oper_up(netdev))) {
  351. vinfo->stats[q_idx].tx_drop_state++;
  352. goto tx_finish;
  353. }
  354. /* take out meta data */
  355. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  356. skb_pull(skb, sizeof(*mdata));
  357. if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
  358. vinfo->stats[q_idx].tx_dlid_zero++;
  359. goto tx_finish;
  360. }
  361. /* add tail padding (for 8 bytes size alignment) and icrc */
  362. pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
  363. pad_len += OPA_VNIC_ICRC_TAIL_LEN;
  364. /*
  365. * pkt_len is how much data we have to write, includes header and data.
  366. * total_len is length of the packet in Dwords plus the PBC should not
  367. * include the CRC.
  368. */
  369. pkt_len = (skb->len + pad_len) >> 2;
  370. total_len = pkt_len + 2; /* PBC + packet */
  371. pbc = create_bypass_pbc(mdata->vl, total_len);
  372. skb_get(skb);
  373. v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
  374. err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
  375. if (unlikely(err)) {
  376. if (err == -ENOMEM)
  377. vinfo->stats[q_idx].netstats.tx_fifo_errors++;
  378. else if (err != -EBUSY)
  379. vinfo->stats[q_idx].netstats.tx_carrier_errors++;
  380. }
  381. /* remove the header before updating tx counters */
  382. skb_pull(skb, OPA_VNIC_HDR_LEN);
  383. if (unlikely(err == -EBUSY)) {
  384. hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
  385. dev_kfree_skb_any(skb);
  386. return NETDEV_TX_BUSY;
  387. }
  388. tx_finish:
  389. /* update tx counters */
  390. hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
  391. dev_kfree_skb_any(skb);
  392. return NETDEV_TX_OK;
  393. }
  394. static u16 hfi1_vnic_select_queue(struct net_device *netdev,
  395. struct sk_buff *skb,
  396. void *accel_priv,
  397. select_queue_fallback_t fallback)
  398. {
  399. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  400. struct opa_vnic_skb_mdata *mdata;
  401. struct sdma_engine *sde;
  402. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  403. sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
  404. return sde->this_idx;
  405. }
  406. /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
  407. static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
  408. struct sk_buff *skb)
  409. {
  410. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  411. int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
  412. int rc = -EFAULT;
  413. skb_pull(skb, OPA_VNIC_HDR_LEN);
  414. /* Validate Packet length */
  415. if (unlikely(skb->len > max_len))
  416. vinfo->stats[rxq->idx].rx_oversize++;
  417. else if (unlikely(skb->len < ETH_ZLEN))
  418. vinfo->stats[rxq->idx].rx_runt++;
  419. else
  420. rc = 0;
  421. return rc;
  422. }
  423. static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
  424. {
  425. unsigned char *pad_info;
  426. struct sk_buff *skb;
  427. skb = skb_dequeue(&rxq->skbq);
  428. if (unlikely(!skb))
  429. return NULL;
  430. /* remove tail padding and icrc */
  431. pad_info = skb->data + skb->len - 1;
  432. skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
  433. ((*pad_info) & 0x7)));
  434. return skb;
  435. }
  436. /* hfi1_vnic_handle_rx - handle skb receive */
  437. static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
  438. int *work_done, int work_to_do)
  439. {
  440. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  441. struct sk_buff *skb;
  442. int rc;
  443. while (1) {
  444. if (*work_done >= work_to_do)
  445. break;
  446. skb = hfi1_vnic_get_skb(rxq);
  447. if (unlikely(!skb))
  448. break;
  449. rc = hfi1_vnic_decap_skb(rxq, skb);
  450. /* update rx counters */
  451. hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
  452. if (unlikely(rc)) {
  453. dev_kfree_skb_any(skb);
  454. continue;
  455. }
  456. skb_checksum_none_assert(skb);
  457. skb->protocol = eth_type_trans(skb, rxq->netdev);
  458. napi_gro_receive(&rxq->napi, skb);
  459. (*work_done)++;
  460. }
  461. }
  462. /* hfi1_vnic_napi - napi receive polling callback function */
  463. static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
  464. {
  465. struct hfi1_vnic_rx_queue *rxq = container_of(napi,
  466. struct hfi1_vnic_rx_queue, napi);
  467. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  468. int work_done = 0;
  469. v_dbg("napi %d budget %d\n", rxq->idx, budget);
  470. hfi1_vnic_handle_rx(rxq, &work_done, budget);
  471. v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
  472. if (work_done < budget)
  473. napi_complete(napi);
  474. return work_done;
  475. }
  476. void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
  477. {
  478. struct hfi1_devdata *dd = packet->rcd->dd;
  479. struct hfi1_vnic_vport_info *vinfo = NULL;
  480. struct hfi1_vnic_rx_queue *rxq;
  481. struct sk_buff *skb;
  482. int l4_type, vesw_id = -1;
  483. u8 q_idx;
  484. l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
  485. if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
  486. vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
  487. vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
  488. /*
  489. * In case of invalid vesw id, count the error on
  490. * the first available vport.
  491. */
  492. if (unlikely(!vinfo)) {
  493. struct hfi1_vnic_vport_info *vinfo_tmp;
  494. int id_tmp = 0;
  495. vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
  496. if (vinfo_tmp) {
  497. spin_lock(&vport_cntr_lock);
  498. vinfo_tmp->stats[0].netstats.rx_nohandler++;
  499. spin_unlock(&vport_cntr_lock);
  500. }
  501. }
  502. }
  503. if (unlikely(!vinfo)) {
  504. dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
  505. l4_type, vesw_id, packet->rcd->ctxt);
  506. return;
  507. }
  508. q_idx = packet->rcd->vnic_q_idx;
  509. rxq = &vinfo->rxq[q_idx];
  510. if (unlikely(!netif_oper_up(vinfo->netdev))) {
  511. vinfo->stats[q_idx].rx_drop_state++;
  512. skb_queue_purge(&rxq->skbq);
  513. return;
  514. }
  515. if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
  516. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  517. return;
  518. }
  519. skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
  520. if (unlikely(!skb)) {
  521. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  522. return;
  523. }
  524. memcpy(skb->data, packet->ebuf, packet->tlen);
  525. skb_put(skb, packet->tlen);
  526. skb_queue_tail(&rxq->skbq, skb);
  527. if (napi_schedule_prep(&rxq->napi)) {
  528. v_dbg("napi %d scheduling\n", q_idx);
  529. __napi_schedule(&rxq->napi);
  530. }
  531. }
  532. static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
  533. {
  534. struct hfi1_devdata *dd = vinfo->dd;
  535. struct net_device *netdev = vinfo->netdev;
  536. int i, rc;
  537. /* ensure virtual eth switch id is valid */
  538. if (!vinfo->vesw_id)
  539. return -EINVAL;
  540. rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
  541. vinfo->vesw_id + 1, GFP_NOWAIT);
  542. if (rc < 0)
  543. return rc;
  544. for (i = 0; i < vinfo->num_rx_q; i++) {
  545. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  546. skb_queue_head_init(&rxq->skbq);
  547. napi_enable(&rxq->napi);
  548. }
  549. netif_carrier_on(netdev);
  550. netif_tx_start_all_queues(netdev);
  551. set_bit(HFI1_VNIC_UP, &vinfo->flags);
  552. return 0;
  553. }
  554. static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
  555. {
  556. struct hfi1_devdata *dd = vinfo->dd;
  557. u8 i;
  558. clear_bit(HFI1_VNIC_UP, &vinfo->flags);
  559. netif_carrier_off(vinfo->netdev);
  560. netif_tx_disable(vinfo->netdev);
  561. idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
  562. /* ensure irqs see the change */
  563. hfi1_vnic_synchronize_irq(dd);
  564. /* remove unread skbs */
  565. for (i = 0; i < vinfo->num_rx_q; i++) {
  566. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  567. napi_disable(&rxq->napi);
  568. skb_queue_purge(&rxq->skbq);
  569. }
  570. }
  571. static int hfi1_netdev_open(struct net_device *netdev)
  572. {
  573. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  574. int rc;
  575. mutex_lock(&vinfo->lock);
  576. rc = hfi1_vnic_up(vinfo);
  577. mutex_unlock(&vinfo->lock);
  578. return rc;
  579. }
  580. static int hfi1_netdev_close(struct net_device *netdev)
  581. {
  582. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  583. mutex_lock(&vinfo->lock);
  584. if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
  585. hfi1_vnic_down(vinfo);
  586. mutex_unlock(&vinfo->lock);
  587. return 0;
  588. }
  589. static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
  590. struct hfi1_ctxtdata **vnic_ctxt)
  591. {
  592. int rc;
  593. rc = allocate_vnic_ctxt(dd, vnic_ctxt);
  594. if (rc) {
  595. dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
  596. return rc;
  597. }
  598. rc = setup_vnic_ctxt(dd, *vnic_ctxt);
  599. if (rc) {
  600. dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
  601. deallocate_vnic_ctxt(dd, *vnic_ctxt);
  602. *vnic_ctxt = NULL;
  603. }
  604. return rc;
  605. }
  606. static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
  607. {
  608. struct hfi1_devdata *dd = vinfo->dd;
  609. int i, rc = 0;
  610. mutex_lock(&hfi1_mutex);
  611. if (!dd->vnic.num_vports) {
  612. rc = hfi1_vnic_txreq_init(dd);
  613. if (rc)
  614. goto txreq_fail;
  615. dd->vnic.msix_idx = dd->first_dyn_msix_idx;
  616. }
  617. for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
  618. rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
  619. if (rc)
  620. break;
  621. dd->vnic.ctxt[i]->vnic_q_idx = i;
  622. }
  623. if (i < vinfo->num_rx_q) {
  624. /*
  625. * If required amount of contexts is not
  626. * allocated successfully then remaining contexts
  627. * are released.
  628. */
  629. while (i-- > dd->vnic.num_ctxt) {
  630. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  631. dd->vnic.ctxt[i] = NULL;
  632. }
  633. goto alloc_fail;
  634. }
  635. if (dd->vnic.num_ctxt != i) {
  636. dd->vnic.num_ctxt = i;
  637. hfi1_init_vnic_rsm(dd);
  638. }
  639. dd->vnic.num_vports++;
  640. hfi1_vnic_sdma_init(vinfo);
  641. alloc_fail:
  642. if (!dd->vnic.num_vports)
  643. hfi1_vnic_txreq_deinit(dd);
  644. txreq_fail:
  645. mutex_unlock(&hfi1_mutex);
  646. return rc;
  647. }
  648. static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
  649. {
  650. struct hfi1_devdata *dd = vinfo->dd;
  651. int i;
  652. mutex_lock(&hfi1_mutex);
  653. if (--dd->vnic.num_vports == 0) {
  654. for (i = 0; i < dd->vnic.num_ctxt; i++) {
  655. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  656. dd->vnic.ctxt[i] = NULL;
  657. }
  658. hfi1_deinit_vnic_rsm(dd);
  659. dd->vnic.num_ctxt = 0;
  660. hfi1_vnic_txreq_deinit(dd);
  661. }
  662. mutex_unlock(&hfi1_mutex);
  663. }
  664. static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
  665. {
  666. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  667. bool reopen = false;
  668. /*
  669. * If vesw_id is being changed, and if the vnic port is up,
  670. * reset the vnic port to ensure new vesw_id gets picked up
  671. */
  672. if (id != vinfo->vesw_id) {
  673. mutex_lock(&vinfo->lock);
  674. if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
  675. hfi1_vnic_down(vinfo);
  676. reopen = true;
  677. }
  678. vinfo->vesw_id = id;
  679. if (reopen)
  680. hfi1_vnic_up(vinfo);
  681. mutex_unlock(&vinfo->lock);
  682. }
  683. }
  684. /* netdev ops */
  685. static const struct net_device_ops hfi1_netdev_ops = {
  686. .ndo_open = hfi1_netdev_open,
  687. .ndo_stop = hfi1_netdev_close,
  688. .ndo_start_xmit = hfi1_netdev_start_xmit,
  689. .ndo_select_queue = hfi1_vnic_select_queue,
  690. .ndo_get_stats64 = hfi1_vnic_get_stats64,
  691. };
  692. struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
  693. u8 port_num,
  694. enum rdma_netdev_t type,
  695. const char *name,
  696. unsigned char name_assign_type,
  697. void (*setup)(struct net_device *))
  698. {
  699. struct hfi1_devdata *dd = dd_from_ibdev(device);
  700. struct hfi1_vnic_vport_info *vinfo;
  701. struct net_device *netdev;
  702. struct rdma_netdev *rn;
  703. int i, size, rc;
  704. if (!port_num || (port_num > dd->num_pports))
  705. return ERR_PTR(-EINVAL);
  706. if (type != RDMA_NETDEV_OPA_VNIC)
  707. return ERR_PTR(-EOPNOTSUPP);
  708. size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
  709. netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
  710. dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
  711. if (!netdev)
  712. return ERR_PTR(-ENOMEM);
  713. rn = netdev_priv(netdev);
  714. vinfo = opa_vnic_dev_priv(netdev);
  715. vinfo->dd = dd;
  716. vinfo->num_tx_q = dd->chip_sdma_engines;
  717. vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
  718. vinfo->netdev = netdev;
  719. rn->set_id = hfi1_vnic_set_vesw_id;
  720. netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
  721. netdev->hw_features = netdev->features;
  722. netdev->vlan_features = netdev->features;
  723. netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
  724. netdev->netdev_ops = &hfi1_netdev_ops;
  725. mutex_init(&vinfo->lock);
  726. for (i = 0; i < vinfo->num_rx_q; i++) {
  727. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  728. rxq->idx = i;
  729. rxq->vinfo = vinfo;
  730. rxq->netdev = netdev;
  731. netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
  732. }
  733. rc = hfi1_vnic_init(vinfo);
  734. if (rc)
  735. goto init_fail;
  736. return netdev;
  737. init_fail:
  738. mutex_destroy(&vinfo->lock);
  739. free_netdev(netdev);
  740. return ERR_PTR(rc);
  741. }
  742. void hfi1_vnic_free_rn(struct net_device *netdev)
  743. {
  744. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  745. hfi1_vnic_deinit(vinfo);
  746. mutex_destroy(&vinfo->lock);
  747. free_netdev(netdev);
  748. }