ice_txrx.c 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright (c) 2018, Intel Corporation. */
  3. /* The driver transmit and receive code */
  4. #include <linux/prefetch.h>
  5. #include <linux/mm.h>
  6. #include "ice.h"
  7. #define ICE_RX_HDR_SIZE 256
  8. /**
  9. * ice_unmap_and_free_tx_buf - Release a Tx buffer
  10. * @ring: the ring that owns the buffer
  11. * @tx_buf: the buffer to free
  12. */
  13. static void
  14. ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
  15. {
  16. if (tx_buf->skb) {
  17. dev_kfree_skb_any(tx_buf->skb);
  18. if (dma_unmap_len(tx_buf, len))
  19. dma_unmap_single(ring->dev,
  20. dma_unmap_addr(tx_buf, dma),
  21. dma_unmap_len(tx_buf, len),
  22. DMA_TO_DEVICE);
  23. } else if (dma_unmap_len(tx_buf, len)) {
  24. dma_unmap_page(ring->dev,
  25. dma_unmap_addr(tx_buf, dma),
  26. dma_unmap_len(tx_buf, len),
  27. DMA_TO_DEVICE);
  28. }
  29. tx_buf->next_to_watch = NULL;
  30. tx_buf->skb = NULL;
  31. dma_unmap_len_set(tx_buf, len, 0);
  32. /* tx_buf must be completely set up in the transmit path */
  33. }
  34. static struct netdev_queue *txring_txq(const struct ice_ring *ring)
  35. {
  36. return netdev_get_tx_queue(ring->netdev, ring->q_index);
  37. }
  38. /**
  39. * ice_clean_tx_ring - Free any empty Tx buffers
  40. * @tx_ring: ring to be cleaned
  41. */
  42. void ice_clean_tx_ring(struct ice_ring *tx_ring)
  43. {
  44. unsigned long size;
  45. u16 i;
  46. /* ring already cleared, nothing to do */
  47. if (!tx_ring->tx_buf)
  48. return;
  49. /* Free all the Tx ring sk_bufss */
  50. for (i = 0; i < tx_ring->count; i++)
  51. ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
  52. size = sizeof(struct ice_tx_buf) * tx_ring->count;
  53. memset(tx_ring->tx_buf, 0, size);
  54. /* Zero out the descriptor ring */
  55. memset(tx_ring->desc, 0, tx_ring->size);
  56. tx_ring->next_to_use = 0;
  57. tx_ring->next_to_clean = 0;
  58. if (!tx_ring->netdev)
  59. return;
  60. /* cleanup Tx queue statistics */
  61. netdev_tx_reset_queue(txring_txq(tx_ring));
  62. }
  63. /**
  64. * ice_free_tx_ring - Free Tx resources per queue
  65. * @tx_ring: Tx descriptor ring for a specific queue
  66. *
  67. * Free all transmit software resources
  68. */
  69. void ice_free_tx_ring(struct ice_ring *tx_ring)
  70. {
  71. ice_clean_tx_ring(tx_ring);
  72. devm_kfree(tx_ring->dev, tx_ring->tx_buf);
  73. tx_ring->tx_buf = NULL;
  74. if (tx_ring->desc) {
  75. dmam_free_coherent(tx_ring->dev, tx_ring->size,
  76. tx_ring->desc, tx_ring->dma);
  77. tx_ring->desc = NULL;
  78. }
  79. }
  80. /**
  81. * ice_clean_tx_irq - Reclaim resources after transmit completes
  82. * @vsi: the VSI we care about
  83. * @tx_ring: Tx ring to clean
  84. * @napi_budget: Used to determine if we are in netpoll
  85. *
  86. * Returns true if there's any budget left (e.g. the clean is finished)
  87. */
  88. static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring,
  89. int napi_budget)
  90. {
  91. unsigned int total_bytes = 0, total_pkts = 0;
  92. unsigned int budget = vsi->work_lmt;
  93. s16 i = tx_ring->next_to_clean;
  94. struct ice_tx_desc *tx_desc;
  95. struct ice_tx_buf *tx_buf;
  96. tx_buf = &tx_ring->tx_buf[i];
  97. tx_desc = ICE_TX_DESC(tx_ring, i);
  98. i -= tx_ring->count;
  99. do {
  100. struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
  101. /* if next_to_watch is not set then there is no work pending */
  102. if (!eop_desc)
  103. break;
  104. smp_rmb(); /* prevent any other reads prior to eop_desc */
  105. /* if the descriptor isn't done, no work yet to do */
  106. if (!(eop_desc->cmd_type_offset_bsz &
  107. cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
  108. break;
  109. /* clear next_to_watch to prevent false hangs */
  110. tx_buf->next_to_watch = NULL;
  111. /* update the statistics for this packet */
  112. total_bytes += tx_buf->bytecount;
  113. total_pkts += tx_buf->gso_segs;
  114. /* free the skb */
  115. napi_consume_skb(tx_buf->skb, napi_budget);
  116. /* unmap skb header data */
  117. dma_unmap_single(tx_ring->dev,
  118. dma_unmap_addr(tx_buf, dma),
  119. dma_unmap_len(tx_buf, len),
  120. DMA_TO_DEVICE);
  121. /* clear tx_buf data */
  122. tx_buf->skb = NULL;
  123. dma_unmap_len_set(tx_buf, len, 0);
  124. /* unmap remaining buffers */
  125. while (tx_desc != eop_desc) {
  126. tx_buf++;
  127. tx_desc++;
  128. i++;
  129. if (unlikely(!i)) {
  130. i -= tx_ring->count;
  131. tx_buf = tx_ring->tx_buf;
  132. tx_desc = ICE_TX_DESC(tx_ring, 0);
  133. }
  134. /* unmap any remaining paged data */
  135. if (dma_unmap_len(tx_buf, len)) {
  136. dma_unmap_page(tx_ring->dev,
  137. dma_unmap_addr(tx_buf, dma),
  138. dma_unmap_len(tx_buf, len),
  139. DMA_TO_DEVICE);
  140. dma_unmap_len_set(tx_buf, len, 0);
  141. }
  142. }
  143. /* move us one more past the eop_desc for start of next pkt */
  144. tx_buf++;
  145. tx_desc++;
  146. i++;
  147. if (unlikely(!i)) {
  148. i -= tx_ring->count;
  149. tx_buf = tx_ring->tx_buf;
  150. tx_desc = ICE_TX_DESC(tx_ring, 0);
  151. }
  152. prefetch(tx_desc);
  153. /* update budget accounting */
  154. budget--;
  155. } while (likely(budget));
  156. i += tx_ring->count;
  157. tx_ring->next_to_clean = i;
  158. u64_stats_update_begin(&tx_ring->syncp);
  159. tx_ring->stats.bytes += total_bytes;
  160. tx_ring->stats.pkts += total_pkts;
  161. u64_stats_update_end(&tx_ring->syncp);
  162. tx_ring->q_vector->tx.total_bytes += total_bytes;
  163. tx_ring->q_vector->tx.total_pkts += total_pkts;
  164. netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
  165. total_bytes);
  166. #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
  167. if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
  168. (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
  169. /* Make sure that anybody stopping the queue after this
  170. * sees the new next_to_clean.
  171. */
  172. smp_mb();
  173. if (__netif_subqueue_stopped(tx_ring->netdev,
  174. tx_ring->q_index) &&
  175. !test_bit(__ICE_DOWN, vsi->state)) {
  176. netif_wake_subqueue(tx_ring->netdev,
  177. tx_ring->q_index);
  178. ++tx_ring->tx_stats.restart_q;
  179. }
  180. }
  181. return !!budget;
  182. }
  183. /**
  184. * ice_setup_tx_ring - Allocate the Tx descriptors
  185. * @tx_ring: the tx ring to set up
  186. *
  187. * Return 0 on success, negative on error
  188. */
  189. int ice_setup_tx_ring(struct ice_ring *tx_ring)
  190. {
  191. struct device *dev = tx_ring->dev;
  192. int bi_size;
  193. if (!dev)
  194. return -ENOMEM;
  195. /* warn if we are about to overwrite the pointer */
  196. WARN_ON(tx_ring->tx_buf);
  197. bi_size = sizeof(struct ice_tx_buf) * tx_ring->count;
  198. tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
  199. if (!tx_ring->tx_buf)
  200. return -ENOMEM;
  201. /* round up to nearest 4K */
  202. tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc);
  203. tx_ring->size = ALIGN(tx_ring->size, 4096);
  204. tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
  205. GFP_KERNEL);
  206. if (!tx_ring->desc) {
  207. dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
  208. tx_ring->size);
  209. goto err;
  210. }
  211. tx_ring->next_to_use = 0;
  212. tx_ring->next_to_clean = 0;
  213. return 0;
  214. err:
  215. devm_kfree(dev, tx_ring->tx_buf);
  216. tx_ring->tx_buf = NULL;
  217. return -ENOMEM;
  218. }
  219. /**
  220. * ice_clean_rx_ring - Free Rx buffers
  221. * @rx_ring: ring to be cleaned
  222. */
  223. void ice_clean_rx_ring(struct ice_ring *rx_ring)
  224. {
  225. struct device *dev = rx_ring->dev;
  226. unsigned long size;
  227. u16 i;
  228. /* ring already cleared, nothing to do */
  229. if (!rx_ring->rx_buf)
  230. return;
  231. /* Free all the Rx ring sk_buffs */
  232. for (i = 0; i < rx_ring->count; i++) {
  233. struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
  234. if (rx_buf->skb) {
  235. dev_kfree_skb(rx_buf->skb);
  236. rx_buf->skb = NULL;
  237. }
  238. if (!rx_buf->page)
  239. continue;
  240. dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE);
  241. __free_pages(rx_buf->page, 0);
  242. rx_buf->page = NULL;
  243. rx_buf->page_offset = 0;
  244. }
  245. size = sizeof(struct ice_rx_buf) * rx_ring->count;
  246. memset(rx_ring->rx_buf, 0, size);
  247. /* Zero out the descriptor ring */
  248. memset(rx_ring->desc, 0, rx_ring->size);
  249. rx_ring->next_to_alloc = 0;
  250. rx_ring->next_to_clean = 0;
  251. rx_ring->next_to_use = 0;
  252. }
  253. /**
  254. * ice_free_rx_ring - Free Rx resources
  255. * @rx_ring: ring to clean the resources from
  256. *
  257. * Free all receive software resources
  258. */
  259. void ice_free_rx_ring(struct ice_ring *rx_ring)
  260. {
  261. ice_clean_rx_ring(rx_ring);
  262. devm_kfree(rx_ring->dev, rx_ring->rx_buf);
  263. rx_ring->rx_buf = NULL;
  264. if (rx_ring->desc) {
  265. dmam_free_coherent(rx_ring->dev, rx_ring->size,
  266. rx_ring->desc, rx_ring->dma);
  267. rx_ring->desc = NULL;
  268. }
  269. }
  270. /**
  271. * ice_setup_rx_ring - Allocate the Rx descriptors
  272. * @rx_ring: the rx ring to set up
  273. *
  274. * Return 0 on success, negative on error
  275. */
  276. int ice_setup_rx_ring(struct ice_ring *rx_ring)
  277. {
  278. struct device *dev = rx_ring->dev;
  279. int bi_size;
  280. if (!dev)
  281. return -ENOMEM;
  282. /* warn if we are about to overwrite the pointer */
  283. WARN_ON(rx_ring->rx_buf);
  284. bi_size = sizeof(struct ice_rx_buf) * rx_ring->count;
  285. rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
  286. if (!rx_ring->rx_buf)
  287. return -ENOMEM;
  288. /* round up to nearest 4K */
  289. rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc);
  290. rx_ring->size = ALIGN(rx_ring->size, 4096);
  291. rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
  292. GFP_KERNEL);
  293. if (!rx_ring->desc) {
  294. dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
  295. rx_ring->size);
  296. goto err;
  297. }
  298. rx_ring->next_to_use = 0;
  299. rx_ring->next_to_clean = 0;
  300. return 0;
  301. err:
  302. devm_kfree(dev, rx_ring->rx_buf);
  303. rx_ring->rx_buf = NULL;
  304. return -ENOMEM;
  305. }
  306. /**
  307. * ice_release_rx_desc - Store the new tail and head values
  308. * @rx_ring: ring to bump
  309. * @val: new head index
  310. */
  311. static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
  312. {
  313. rx_ring->next_to_use = val;
  314. /* update next to alloc since we have filled the ring */
  315. rx_ring->next_to_alloc = val;
  316. /* Force memory writes to complete before letting h/w
  317. * know there are new descriptors to fetch. (Only
  318. * applicable for weak-ordered memory model archs,
  319. * such as IA-64).
  320. */
  321. wmb();
  322. writel(val, rx_ring->tail);
  323. }
  324. /**
  325. * ice_alloc_mapped_page - recycle or make a new page
  326. * @rx_ring: ring to use
  327. * @bi: rx_buf struct to modify
  328. *
  329. * Returns true if the page was successfully allocated or
  330. * reused.
  331. */
  332. static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
  333. struct ice_rx_buf *bi)
  334. {
  335. struct page *page = bi->page;
  336. dma_addr_t dma;
  337. /* since we are recycling buffers we should seldom need to alloc */
  338. if (likely(page)) {
  339. rx_ring->rx_stats.page_reuse_count++;
  340. return true;
  341. }
  342. /* alloc new page for storage */
  343. page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
  344. if (unlikely(!page)) {
  345. rx_ring->rx_stats.alloc_page_failed++;
  346. return false;
  347. }
  348. /* map page for use */
  349. dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
  350. /* if mapping failed free memory back to system since
  351. * there isn't much point in holding memory we can't use
  352. */
  353. if (dma_mapping_error(rx_ring->dev, dma)) {
  354. __free_pages(page, 0);
  355. rx_ring->rx_stats.alloc_page_failed++;
  356. return false;
  357. }
  358. bi->dma = dma;
  359. bi->page = page;
  360. bi->page_offset = 0;
  361. return true;
  362. }
  363. /**
  364. * ice_alloc_rx_bufs - Replace used receive buffers
  365. * @rx_ring: ring to place buffers on
  366. * @cleaned_count: number of buffers to replace
  367. *
  368. * Returns false if all allocations were successful, true if any fail
  369. */
  370. bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
  371. {
  372. union ice_32b_rx_flex_desc *rx_desc;
  373. u16 ntu = rx_ring->next_to_use;
  374. struct ice_rx_buf *bi;
  375. /* do nothing if no valid netdev defined */
  376. if (!rx_ring->netdev || !cleaned_count)
  377. return false;
  378. /* get the RX descriptor and buffer based on next_to_use */
  379. rx_desc = ICE_RX_DESC(rx_ring, ntu);
  380. bi = &rx_ring->rx_buf[ntu];
  381. do {
  382. if (!ice_alloc_mapped_page(rx_ring, bi))
  383. goto no_bufs;
  384. /* Refresh the desc even if buffer_addrs didn't change
  385. * because each write-back erases this info.
  386. */
  387. rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
  388. rx_desc++;
  389. bi++;
  390. ntu++;
  391. if (unlikely(ntu == rx_ring->count)) {
  392. rx_desc = ICE_RX_DESC(rx_ring, 0);
  393. bi = rx_ring->rx_buf;
  394. ntu = 0;
  395. }
  396. /* clear the status bits for the next_to_use descriptor */
  397. rx_desc->wb.status_error0 = 0;
  398. cleaned_count--;
  399. } while (cleaned_count);
  400. if (rx_ring->next_to_use != ntu)
  401. ice_release_rx_desc(rx_ring, ntu);
  402. return false;
  403. no_bufs:
  404. if (rx_ring->next_to_use != ntu)
  405. ice_release_rx_desc(rx_ring, ntu);
  406. /* make sure to come back via polling to try again after
  407. * allocation failure
  408. */
  409. return true;
  410. }
  411. /**
  412. * ice_page_is_reserved - check if reuse is possible
  413. * @page: page struct to check
  414. */
  415. static bool ice_page_is_reserved(struct page *page)
  416. {
  417. return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
  418. }
  419. /**
  420. * ice_add_rx_frag - Add contents of Rx buffer to sk_buff
  421. * @rx_buf: buffer containing page to add
  422. * @rx_desc: descriptor containing length of buffer written by hardware
  423. * @skb: sk_buf to place the data into
  424. *
  425. * This function will add the data contained in rx_buf->page to the skb.
  426. * This is done either through a direct copy if the data in the buffer is
  427. * less than the skb header size, otherwise it will just attach the page as
  428. * a frag to the skb.
  429. *
  430. * The function will then update the page offset if necessary and return
  431. * true if the buffer can be reused by the adapter.
  432. */
  433. static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf,
  434. union ice_32b_rx_flex_desc *rx_desc,
  435. struct sk_buff *skb)
  436. {
  437. #if (PAGE_SIZE < 8192)
  438. unsigned int truesize = ICE_RXBUF_2048;
  439. #else
  440. unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
  441. unsigned int truesize;
  442. #endif /* PAGE_SIZE < 8192) */
  443. struct page *page;
  444. unsigned int size;
  445. size = le16_to_cpu(rx_desc->wb.pkt_len) &
  446. ICE_RX_FLX_DESC_PKT_LEN_M;
  447. page = rx_buf->page;
  448. #if (PAGE_SIZE >= 8192)
  449. truesize = ALIGN(size, L1_CACHE_BYTES);
  450. #endif /* PAGE_SIZE >= 8192) */
  451. /* will the data fit in the skb we allocated? if so, just
  452. * copy it as it is pretty small anyway
  453. */
  454. if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {
  455. unsigned char *va = page_address(page) + rx_buf->page_offset;
  456. memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
  457. /* page is not reserved, we can reuse buffer as-is */
  458. if (likely(!ice_page_is_reserved(page)))
  459. return true;
  460. /* this page cannot be reused so discard it */
  461. __free_pages(page, 0);
  462. return false;
  463. }
  464. skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
  465. rx_buf->page_offset, size, truesize);
  466. /* avoid re-using remote pages */
  467. if (unlikely(ice_page_is_reserved(page)))
  468. return false;
  469. #if (PAGE_SIZE < 8192)
  470. /* if we are only owner of page we can reuse it */
  471. if (unlikely(page_count(page) != 1))
  472. return false;
  473. /* flip page offset to other buffer */
  474. rx_buf->page_offset ^= truesize;
  475. #else
  476. /* move offset up to the next cache line */
  477. rx_buf->page_offset += truesize;
  478. if (rx_buf->page_offset > last_offset)
  479. return false;
  480. #endif /* PAGE_SIZE < 8192) */
  481. /* Even if we own the page, we are not allowed to use atomic_set()
  482. * This would break get_page_unless_zero() users.
  483. */
  484. get_page(rx_buf->page);
  485. return true;
  486. }
  487. /**
  488. * ice_reuse_rx_page - page flip buffer and store it back on the ring
  489. * @rx_ring: rx descriptor ring to store buffers on
  490. * @old_buf: donor buffer to have page reused
  491. *
  492. * Synchronizes page for reuse by the adapter
  493. */
  494. static void ice_reuse_rx_page(struct ice_ring *rx_ring,
  495. struct ice_rx_buf *old_buf)
  496. {
  497. u16 nta = rx_ring->next_to_alloc;
  498. struct ice_rx_buf *new_buf;
  499. new_buf = &rx_ring->rx_buf[nta];
  500. /* update, and store next to alloc */
  501. nta++;
  502. rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
  503. /* transfer page from old buffer to new buffer */
  504. *new_buf = *old_buf;
  505. }
  506. /**
  507. * ice_fetch_rx_buf - Allocate skb and populate it
  508. * @rx_ring: rx descriptor ring to transact packets on
  509. * @rx_desc: descriptor containing info written by hardware
  510. *
  511. * This function allocates an skb on the fly, and populates it with the page
  512. * data from the current receive descriptor, taking care to set up the skb
  513. * correctly, as well as handling calling the page recycle function if
  514. * necessary.
  515. */
  516. static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring,
  517. union ice_32b_rx_flex_desc *rx_desc)
  518. {
  519. struct ice_rx_buf *rx_buf;
  520. struct sk_buff *skb;
  521. struct page *page;
  522. rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
  523. page = rx_buf->page;
  524. prefetchw(page);
  525. skb = rx_buf->skb;
  526. if (likely(!skb)) {
  527. u8 *page_addr = page_address(page) + rx_buf->page_offset;
  528. /* prefetch first cache line of first page */
  529. prefetch(page_addr);
  530. #if L1_CACHE_BYTES < 128
  531. prefetch((void *)(page_addr + L1_CACHE_BYTES));
  532. #endif /* L1_CACHE_BYTES */
  533. /* allocate a skb to store the frags */
  534. skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
  535. ICE_RX_HDR_SIZE,
  536. GFP_ATOMIC | __GFP_NOWARN);
  537. if (unlikely(!skb)) {
  538. rx_ring->rx_stats.alloc_buf_failed++;
  539. return NULL;
  540. }
  541. /* we will be copying header into skb->data in
  542. * pskb_may_pull so it is in our interest to prefetch
  543. * it now to avoid a possible cache miss
  544. */
  545. prefetchw(skb->data);
  546. skb_record_rx_queue(skb, rx_ring->q_index);
  547. } else {
  548. /* we are reusing so sync this buffer for CPU use */
  549. dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
  550. rx_buf->page_offset,
  551. ICE_RXBUF_2048,
  552. DMA_FROM_DEVICE);
  553. rx_buf->skb = NULL;
  554. }
  555. /* pull page into skb */
  556. if (ice_add_rx_frag(rx_buf, rx_desc, skb)) {
  557. /* hand second half of page back to the ring */
  558. ice_reuse_rx_page(rx_ring, rx_buf);
  559. rx_ring->rx_stats.page_reuse_count++;
  560. } else {
  561. /* we are not reusing the buffer so unmap it */
  562. dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
  563. DMA_FROM_DEVICE);
  564. }
  565. /* clear contents of buffer_info */
  566. rx_buf->page = NULL;
  567. return skb;
  568. }
  569. /**
  570. * ice_pull_tail - ice specific version of skb_pull_tail
  571. * @skb: pointer to current skb being adjusted
  572. *
  573. * This function is an ice specific version of __pskb_pull_tail. The
  574. * main difference between this version and the original function is that
  575. * this function can make several assumptions about the state of things
  576. * that allow for significant optimizations versus the standard function.
  577. * As a result we can do things like drop a frag and maintain an accurate
  578. * truesize for the skb.
  579. */
  580. static void ice_pull_tail(struct sk_buff *skb)
  581. {
  582. struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
  583. unsigned int pull_len;
  584. unsigned char *va;
  585. /* it is valid to use page_address instead of kmap since we are
  586. * working with pages allocated out of the lomem pool per
  587. * alloc_page(GFP_ATOMIC)
  588. */
  589. va = skb_frag_address(frag);
  590. /* we need the header to contain the greater of either ETH_HLEN or
  591. * 60 bytes if the skb->len is less than 60 for skb_pad.
  592. */
  593. pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE);
  594. /* align pull length to size of long to optimize memcpy performance */
  595. skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
  596. /* update all of the pointers */
  597. skb_frag_size_sub(frag, pull_len);
  598. frag->page_offset += pull_len;
  599. skb->data_len -= pull_len;
  600. skb->tail += pull_len;
  601. }
  602. /**
  603. * ice_cleanup_headers - Correct empty headers
  604. * @skb: pointer to current skb being fixed
  605. *
  606. * Also address the case where we are pulling data in on pages only
  607. * and as such no data is present in the skb header.
  608. *
  609. * In addition if skb is not at least 60 bytes we need to pad it so that
  610. * it is large enough to qualify as a valid Ethernet frame.
  611. *
  612. * Returns true if an error was encountered and skb was freed.
  613. */
  614. static bool ice_cleanup_headers(struct sk_buff *skb)
  615. {
  616. /* place header in linear portion of buffer */
  617. if (skb_is_nonlinear(skb))
  618. ice_pull_tail(skb);
  619. /* if eth_skb_pad returns an error the skb was freed */
  620. if (eth_skb_pad(skb))
  621. return true;
  622. return false;
  623. }
  624. /**
  625. * ice_test_staterr - tests bits in Rx descriptor status and error fields
  626. * @rx_desc: pointer to receive descriptor (in le64 format)
  627. * @stat_err_bits: value to mask
  628. *
  629. * This function does some fast chicanery in order to return the
  630. * value of the mask which is really only used for boolean tests.
  631. * The status_error_len doesn't need to be shifted because it begins
  632. * at offset zero.
  633. */
  634. static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc,
  635. const u16 stat_err_bits)
  636. {
  637. return !!(rx_desc->wb.status_error0 &
  638. cpu_to_le16(stat_err_bits));
  639. }
  640. /**
  641. * ice_is_non_eop - process handling of non-EOP buffers
  642. * @rx_ring: Rx ring being processed
  643. * @rx_desc: Rx descriptor for current buffer
  644. * @skb: Current socket buffer containing buffer in progress
  645. *
  646. * This function updates next to clean. If the buffer is an EOP buffer
  647. * this function exits returning false, otherwise it will place the
  648. * sk_buff in the next buffer to be chained and return true indicating
  649. * that this is in fact a non-EOP buffer.
  650. */
  651. static bool ice_is_non_eop(struct ice_ring *rx_ring,
  652. union ice_32b_rx_flex_desc *rx_desc,
  653. struct sk_buff *skb)
  654. {
  655. u32 ntc = rx_ring->next_to_clean + 1;
  656. /* fetch, update, and store next to clean */
  657. ntc = (ntc < rx_ring->count) ? ntc : 0;
  658. rx_ring->next_to_clean = ntc;
  659. prefetch(ICE_RX_DESC(rx_ring, ntc));
  660. /* if we are the last buffer then there is nothing else to do */
  661. #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
  662. if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
  663. return false;
  664. /* place skb in next buffer to be received */
  665. rx_ring->rx_buf[ntc].skb = skb;
  666. rx_ring->rx_stats.non_eop_descs++;
  667. return true;
  668. }
  669. /**
  670. * ice_ptype_to_htype - get a hash type
  671. * @ptype: the ptype value from the descriptor
  672. *
  673. * Returns a hash type to be used by skb_set_hash
  674. */
  675. static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
  676. {
  677. return PKT_HASH_TYPE_NONE;
  678. }
  679. /**
  680. * ice_rx_hash - set the hash value in the skb
  681. * @rx_ring: descriptor ring
  682. * @rx_desc: specific descriptor
  683. * @skb: pointer to current skb
  684. * @rx_ptype: the ptype value from the descriptor
  685. */
  686. static void
  687. ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
  688. struct sk_buff *skb, u8 rx_ptype)
  689. {
  690. struct ice_32b_rx_flex_desc_nic *nic_mdid;
  691. u32 hash;
  692. if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
  693. return;
  694. if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
  695. return;
  696. nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
  697. hash = le32_to_cpu(nic_mdid->rss_hash);
  698. skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
  699. }
  700. /**
  701. * ice_rx_csum - Indicate in skb if checksum is good
  702. * @vsi: the VSI we care about
  703. * @skb: skb currently being received and modified
  704. * @rx_desc: the receive descriptor
  705. * @ptype: the packet type decoded by hardware
  706. *
  707. * skb->protocol must be set before this function is called
  708. */
  709. static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
  710. union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
  711. {
  712. struct ice_rx_ptype_decoded decoded;
  713. u32 rx_error, rx_status;
  714. bool ipv4, ipv6;
  715. rx_status = le16_to_cpu(rx_desc->wb.status_error0);
  716. rx_error = rx_status;
  717. decoded = ice_decode_rx_desc_ptype(ptype);
  718. /* Start with CHECKSUM_NONE and by default csum_level = 0 */
  719. skb->ip_summed = CHECKSUM_NONE;
  720. skb_checksum_none_assert(skb);
  721. /* check if Rx checksum is enabled */
  722. if (!(vsi->netdev->features & NETIF_F_RXCSUM))
  723. return;
  724. /* check if HW has decoded the packet and checksum */
  725. if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
  726. return;
  727. if (!(decoded.known && decoded.outer_ip))
  728. return;
  729. ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
  730. (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
  731. ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
  732. (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
  733. if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
  734. BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
  735. goto checksum_fail;
  736. else if (ipv6 && (rx_status &
  737. (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
  738. goto checksum_fail;
  739. /* check for L4 errors and handle packets that were not able to be
  740. * checksummed due to arrival speed
  741. */
  742. if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
  743. goto checksum_fail;
  744. /* Only report checksum unnecessary for TCP, UDP, or SCTP */
  745. switch (decoded.inner_prot) {
  746. case ICE_RX_PTYPE_INNER_PROT_TCP:
  747. case ICE_RX_PTYPE_INNER_PROT_UDP:
  748. case ICE_RX_PTYPE_INNER_PROT_SCTP:
  749. skb->ip_summed = CHECKSUM_UNNECESSARY;
  750. default:
  751. break;
  752. }
  753. return;
  754. checksum_fail:
  755. vsi->back->hw_csum_rx_error++;
  756. }
  757. /**
  758. * ice_process_skb_fields - Populate skb header fields from Rx descriptor
  759. * @rx_ring: rx descriptor ring packet is being transacted on
  760. * @rx_desc: pointer to the EOP Rx descriptor
  761. * @skb: pointer to current skb being populated
  762. * @ptype: the packet type decoded by hardware
  763. *
  764. * This function checks the ring, descriptor, and packet information in
  765. * order to populate the hash, checksum, VLAN, protocol, and
  766. * other fields within the skb.
  767. */
  768. static void ice_process_skb_fields(struct ice_ring *rx_ring,
  769. union ice_32b_rx_flex_desc *rx_desc,
  770. struct sk_buff *skb, u8 ptype)
  771. {
  772. ice_rx_hash(rx_ring, rx_desc, skb, ptype);
  773. /* modifies the skb - consumes the enet header */
  774. skb->protocol = eth_type_trans(skb, rx_ring->netdev);
  775. ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype);
  776. }
  777. /**
  778. * ice_receive_skb - Send a completed packet up the stack
  779. * @rx_ring: rx ring in play
  780. * @skb: packet to send up
  781. * @vlan_tag: vlan tag for packet
  782. *
  783. * This function sends the completed packet (via. skb) up the stack using
  784. * gro receive functions (with/without vlan tag)
  785. */
  786. static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb,
  787. u16 vlan_tag)
  788. {
  789. if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
  790. (vlan_tag & VLAN_VID_MASK)) {
  791. __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
  792. }
  793. napi_gro_receive(&rx_ring->q_vector->napi, skb);
  794. }
  795. /**
  796. * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  797. * @rx_ring: rx descriptor ring to transact packets on
  798. * @budget: Total limit on number of packets to process
  799. *
  800. * This function provides a "bounce buffer" approach to Rx interrupt
  801. * processing. The advantage to this is that on systems that have
  802. * expensive overhead for IOMMU access this provides a means of avoiding
  803. * it by maintaining the mapping of the page to the system.
  804. *
  805. * Returns amount of work completed
  806. */
  807. static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
  808. {
  809. unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
  810. u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
  811. bool failure = false;
  812. /* start the loop to process RX packets bounded by 'budget' */
  813. while (likely(total_rx_pkts < (unsigned int)budget)) {
  814. union ice_32b_rx_flex_desc *rx_desc;
  815. struct sk_buff *skb;
  816. u16 stat_err_bits;
  817. u16 vlan_tag = 0;
  818. u8 rx_ptype;
  819. /* return some buffers to hardware, one at a time is too slow */
  820. if (cleaned_count >= ICE_RX_BUF_WRITE) {
  821. failure = failure ||
  822. ice_alloc_rx_bufs(rx_ring, cleaned_count);
  823. cleaned_count = 0;
  824. }
  825. /* get the RX desc from RX ring based on 'next_to_clean' */
  826. rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
  827. /* status_error_len will always be zero for unused descriptors
  828. * because it's cleared in cleanup, and overlaps with hdr_addr
  829. * which is always zero because packet split isn't used, if the
  830. * hardware wrote DD then it will be non-zero
  831. */
  832. stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
  833. if (!ice_test_staterr(rx_desc, stat_err_bits))
  834. break;
  835. /* This memory barrier is needed to keep us from reading
  836. * any other fields out of the rx_desc until we know the
  837. * DD bit is set.
  838. */
  839. dma_rmb();
  840. /* allocate (if needed) and populate skb */
  841. skb = ice_fetch_rx_buf(rx_ring, rx_desc);
  842. if (!skb)
  843. break;
  844. cleaned_count++;
  845. /* skip if it is NOP desc */
  846. if (ice_is_non_eop(rx_ring, rx_desc, skb))
  847. continue;
  848. stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
  849. if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
  850. dev_kfree_skb_any(skb);
  851. continue;
  852. }
  853. rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
  854. ICE_RX_FLEX_DESC_PTYPE_M;
  855. stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
  856. if (ice_test_staterr(rx_desc, stat_err_bits))
  857. vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
  858. /* correct empty headers and pad skb if needed (to make valid
  859. * ethernet frame
  860. */
  861. if (ice_cleanup_headers(skb)) {
  862. skb = NULL;
  863. continue;
  864. }
  865. /* probably a little skewed due to removing CRC */
  866. total_rx_bytes += skb->len;
  867. /* populate checksum, VLAN, and protocol */
  868. ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
  869. /* send completed skb up the stack */
  870. ice_receive_skb(rx_ring, skb, vlan_tag);
  871. /* update budget accounting */
  872. total_rx_pkts++;
  873. }
  874. /* update queue and vector specific stats */
  875. u64_stats_update_begin(&rx_ring->syncp);
  876. rx_ring->stats.pkts += total_rx_pkts;
  877. rx_ring->stats.bytes += total_rx_bytes;
  878. u64_stats_update_end(&rx_ring->syncp);
  879. rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
  880. rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
  881. /* guarantee a trip back through this routine if there was a failure */
  882. return failure ? budget : (int)total_rx_pkts;
  883. }
  884. /**
  885. * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
  886. * @napi: napi struct with our devices info in it
  887. * @budget: amount of work driver is allowed to do this pass, in packets
  888. *
  889. * This function will clean all queues associated with a q_vector.
  890. *
  891. * Returns the amount of work done
  892. */
  893. int ice_napi_poll(struct napi_struct *napi, int budget)
  894. {
  895. struct ice_q_vector *q_vector =
  896. container_of(napi, struct ice_q_vector, napi);
  897. struct ice_vsi *vsi = q_vector->vsi;
  898. struct ice_pf *pf = vsi->back;
  899. bool clean_complete = true;
  900. int budget_per_ring = 0;
  901. struct ice_ring *ring;
  902. int work_done = 0;
  903. /* Since the actual Tx work is minimal, we can give the Tx a larger
  904. * budget and be more aggressive about cleaning up the Tx descriptors.
  905. */
  906. ice_for_each_ring(ring, q_vector->tx)
  907. if (!ice_clean_tx_irq(vsi, ring, budget))
  908. clean_complete = false;
  909. /* Handle case where we are called by netpoll with a budget of 0 */
  910. if (budget <= 0)
  911. return budget;
  912. /* We attempt to distribute budget to each Rx queue fairly, but don't
  913. * allow the budget to go below 1 because that would exit polling early.
  914. */
  915. if (q_vector->num_ring_rx)
  916. budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
  917. ice_for_each_ring(ring, q_vector->rx) {
  918. int cleaned;
  919. cleaned = ice_clean_rx_irq(ring, budget_per_ring);
  920. work_done += cleaned;
  921. /* if we clean as many as budgeted, we must not be done */
  922. if (cleaned >= budget_per_ring)
  923. clean_complete = false;
  924. }
  925. /* If work not completed, return budget and polling will return */
  926. if (!clean_complete)
  927. return budget;
  928. /* Work is done so exit the polling mode and re-enable the interrupt */
  929. napi_complete_done(napi, work_done);
  930. if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
  931. ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);
  932. return 0;
  933. }
  934. /* helper function for building cmd/type/offset */
  935. static __le64
  936. build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
  937. {
  938. return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
  939. (td_cmd << ICE_TXD_QW1_CMD_S) |
  940. (td_offset << ICE_TXD_QW1_OFFSET_S) |
  941. ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
  942. (td_tag << ICE_TXD_QW1_L2TAG1_S));
  943. }
  944. /**
  945. * __ice_maybe_stop_tx - 2nd level check for tx stop conditions
  946. * @tx_ring: the ring to be checked
  947. * @size: the size buffer we want to assure is available
  948. *
  949. * Returns -EBUSY if a stop is needed, else 0
  950. */
  951. static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
  952. {
  953. netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);
  954. /* Memory barrier before checking head and tail */
  955. smp_mb();
  956. /* Check again in a case another CPU has just made room available. */
  957. if (likely(ICE_DESC_UNUSED(tx_ring) < size))
  958. return -EBUSY;
  959. /* A reprieve! - use start_subqueue because it doesn't call schedule */
  960. netif_start_subqueue(tx_ring->netdev, tx_ring->q_index);
  961. ++tx_ring->tx_stats.restart_q;
  962. return 0;
  963. }
  964. /**
  965. * ice_maybe_stop_tx - 1st level check for tx stop conditions
  966. * @tx_ring: the ring to be checked
  967. * @size: the size buffer we want to assure is available
  968. *
  969. * Returns 0 if stop is not needed
  970. */
  971. static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
  972. {
  973. if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
  974. return 0;
  975. return __ice_maybe_stop_tx(tx_ring, size);
  976. }
  977. /**
  978. * ice_tx_map - Build the Tx descriptor
  979. * @tx_ring: ring to send buffer on
  980. * @first: first buffer info buffer to use
  981. * @off: pointer to struct that holds offload parameters
  982. *
  983. * This function loops over the skb data pointed to by *first
  984. * and gets a physical address for each memory location and programs
  985. * it and the length into the transmit descriptor.
  986. */
  987. static void
  988. ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
  989. struct ice_tx_offload_params *off)
  990. {
  991. u64 td_offset, td_tag, td_cmd;
  992. u16 i = tx_ring->next_to_use;
  993. struct skb_frag_struct *frag;
  994. unsigned int data_len, size;
  995. struct ice_tx_desc *tx_desc;
  996. struct ice_tx_buf *tx_buf;
  997. struct sk_buff *skb;
  998. dma_addr_t dma;
  999. td_tag = off->td_l2tag1;
  1000. td_cmd = off->td_cmd;
  1001. td_offset = off->td_offset;
  1002. skb = first->skb;
  1003. data_len = skb->data_len;
  1004. size = skb_headlen(skb);
  1005. tx_desc = ICE_TX_DESC(tx_ring, i);
  1006. if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
  1007. td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
  1008. td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
  1009. ICE_TX_FLAGS_VLAN_S;
  1010. }
  1011. dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
  1012. tx_buf = first;
  1013. for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
  1014. unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
  1015. if (dma_mapping_error(tx_ring->dev, dma))
  1016. goto dma_error;
  1017. /* record length, and DMA address */
  1018. dma_unmap_len_set(tx_buf, len, size);
  1019. dma_unmap_addr_set(tx_buf, dma, dma);
  1020. /* align size to end of page */
  1021. max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
  1022. tx_desc->buf_addr = cpu_to_le64(dma);
  1023. /* account for data chunks larger than the hardware
  1024. * can handle
  1025. */
  1026. while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
  1027. tx_desc->cmd_type_offset_bsz =
  1028. build_ctob(td_cmd, td_offset, max_data, td_tag);
  1029. tx_desc++;
  1030. i++;
  1031. if (i == tx_ring->count) {
  1032. tx_desc = ICE_TX_DESC(tx_ring, 0);
  1033. i = 0;
  1034. }
  1035. dma += max_data;
  1036. size -= max_data;
  1037. max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
  1038. tx_desc->buf_addr = cpu_to_le64(dma);
  1039. }
  1040. if (likely(!data_len))
  1041. break;
  1042. tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
  1043. size, td_tag);
  1044. tx_desc++;
  1045. i++;
  1046. if (i == tx_ring->count) {
  1047. tx_desc = ICE_TX_DESC(tx_ring, 0);
  1048. i = 0;
  1049. }
  1050. size = skb_frag_size(frag);
  1051. data_len -= size;
  1052. dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
  1053. DMA_TO_DEVICE);
  1054. tx_buf = &tx_ring->tx_buf[i];
  1055. }
  1056. /* record bytecount for BQL */
  1057. netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
  1058. /* record SW timestamp if HW timestamp is not available */
  1059. skb_tx_timestamp(first->skb);
  1060. i++;
  1061. if (i == tx_ring->count)
  1062. i = 0;
  1063. /* write last descriptor with RS and EOP bits */
  1064. td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
  1065. tx_desc->cmd_type_offset_bsz =
  1066. build_ctob(td_cmd, td_offset, size, td_tag);
  1067. /* Force memory writes to complete before letting h/w know there
  1068. * are new descriptors to fetch.
  1069. *
  1070. * We also use this memory barrier to make certain all of the
  1071. * status bits have been updated before next_to_watch is written.
  1072. */
  1073. wmb();
  1074. /* set next_to_watch value indicating a packet is present */
  1075. first->next_to_watch = tx_desc;
  1076. tx_ring->next_to_use = i;
  1077. ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
  1078. /* notify HW of packet */
  1079. if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
  1080. writel(i, tx_ring->tail);
  1081. /* we need this if more than one processor can write to our tail
  1082. * at a time, it synchronizes IO on IA64/Altix systems
  1083. */
  1084. mmiowb();
  1085. }
  1086. return;
  1087. dma_error:
  1088. /* clear dma mappings for failed tx_buf map */
  1089. for (;;) {
  1090. tx_buf = &tx_ring->tx_buf[i];
  1091. ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
  1092. if (tx_buf == first)
  1093. break;
  1094. if (i == 0)
  1095. i = tx_ring->count;
  1096. i--;
  1097. }
  1098. tx_ring->next_to_use = i;
  1099. }
  1100. /**
  1101. * ice_tx_csum - Enable Tx checksum offloads
  1102. * @first: pointer to the first descriptor
  1103. * @off: pointer to struct that holds offload parameters
  1104. *
  1105. * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
  1106. */
  1107. static
  1108. int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
  1109. {
  1110. u32 l4_len = 0, l3_len = 0, l2_len = 0;
  1111. struct sk_buff *skb = first->skb;
  1112. union {
  1113. struct iphdr *v4;
  1114. struct ipv6hdr *v6;
  1115. unsigned char *hdr;
  1116. } ip;
  1117. union {
  1118. struct tcphdr *tcp;
  1119. unsigned char *hdr;
  1120. } l4;
  1121. __be16 frag_off, protocol;
  1122. unsigned char *exthdr;
  1123. u32 offset, cmd = 0;
  1124. u8 l4_proto = 0;
  1125. if (skb->ip_summed != CHECKSUM_PARTIAL)
  1126. return 0;
  1127. ip.hdr = skb_network_header(skb);
  1128. l4.hdr = skb_transport_header(skb);
  1129. /* compute outer L2 header size */
  1130. l2_len = ip.hdr - skb->data;
  1131. offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
  1132. if (skb->encapsulation)
  1133. return -1;
  1134. /* Enable IP checksum offloads */
  1135. protocol = vlan_get_protocol(skb);
  1136. if (protocol == htons(ETH_P_IP)) {
  1137. l4_proto = ip.v4->protocol;
  1138. /* the stack computes the IP header already, the only time we
  1139. * need the hardware to recompute it is in the case of TSO.
  1140. */
  1141. if (first->tx_flags & ICE_TX_FLAGS_TSO)
  1142. cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
  1143. else
  1144. cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
  1145. } else if (protocol == htons(ETH_P_IPV6)) {
  1146. cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
  1147. exthdr = ip.hdr + sizeof(*ip.v6);
  1148. l4_proto = ip.v6->nexthdr;
  1149. if (l4.hdr != exthdr)
  1150. ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
  1151. &frag_off);
  1152. } else {
  1153. return -1;
  1154. }
  1155. /* compute inner L3 header size */
  1156. l3_len = l4.hdr - ip.hdr;
  1157. offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
  1158. /* Enable L4 checksum offloads */
  1159. switch (l4_proto) {
  1160. case IPPROTO_TCP:
  1161. /* enable checksum offloads */
  1162. cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
  1163. l4_len = l4.tcp->doff;
  1164. offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
  1165. break;
  1166. case IPPROTO_UDP:
  1167. /* enable UDP checksum offload */
  1168. cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
  1169. l4_len = (sizeof(struct udphdr) >> 2);
  1170. offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
  1171. break;
  1172. case IPPROTO_SCTP:
  1173. default:
  1174. if (first->tx_flags & ICE_TX_FLAGS_TSO)
  1175. return -1;
  1176. skb_checksum_help(skb);
  1177. return 0;
  1178. }
  1179. off->td_cmd |= cmd;
  1180. off->td_offset |= offset;
  1181. return 1;
  1182. }
  1183. /**
  1184. * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
  1185. * @tx_ring: ring to send buffer on
  1186. * @first: pointer to struct ice_tx_buf
  1187. *
  1188. * Checks the skb and set up correspondingly several generic transmit flags
  1189. * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
  1190. *
  1191. * Returns error code indicate the frame should be dropped upon error and the
  1192. * otherwise returns 0 to indicate the flags has been set properly.
  1193. */
  1194. static int
  1195. ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first)
  1196. {
  1197. struct sk_buff *skb = first->skb;
  1198. __be16 protocol = skb->protocol;
  1199. if (protocol == htons(ETH_P_8021Q) &&
  1200. !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
  1201. /* when HW VLAN acceleration is turned off by the user the
  1202. * stack sets the protocol to 8021q so that the driver
  1203. * can take any steps required to support the SW only
  1204. * VLAN handling. In our case the driver doesn't need
  1205. * to take any further steps so just set the protocol
  1206. * to the encapsulated ethertype.
  1207. */
  1208. skb->protocol = vlan_get_protocol(skb);
  1209. goto out;
  1210. }
  1211. /* if we have a HW VLAN tag being added, default to the HW one */
  1212. if (skb_vlan_tag_present(skb)) {
  1213. first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
  1214. first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
  1215. } else if (protocol == htons(ETH_P_8021Q)) {
  1216. struct vlan_hdr *vhdr, _vhdr;
  1217. /* for SW VLAN, check the next protocol and store the tag */
  1218. vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN,
  1219. sizeof(_vhdr),
  1220. &_vhdr);
  1221. if (!vhdr)
  1222. return -EINVAL;
  1223. first->tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
  1224. ICE_TX_FLAGS_VLAN_S;
  1225. first->tx_flags |= ICE_TX_FLAGS_SW_VLAN;
  1226. }
  1227. out:
  1228. return 0;
  1229. }
  1230. /**
  1231. * ice_tso - computes mss and TSO length to prepare for TSO
  1232. * @first: pointer to struct ice_tx_buf
  1233. * @off: pointer to struct that holds offload parameters
  1234. *
  1235. * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
  1236. */
  1237. static
  1238. int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
  1239. {
  1240. struct sk_buff *skb = first->skb;
  1241. union {
  1242. struct iphdr *v4;
  1243. struct ipv6hdr *v6;
  1244. unsigned char *hdr;
  1245. } ip;
  1246. union {
  1247. struct tcphdr *tcp;
  1248. unsigned char *hdr;
  1249. } l4;
  1250. u64 cd_mss, cd_tso_len;
  1251. u32 paylen, l4_start;
  1252. int err;
  1253. if (skb->ip_summed != CHECKSUM_PARTIAL)
  1254. return 0;
  1255. if (!skb_is_gso(skb))
  1256. return 0;
  1257. err = skb_cow_head(skb, 0);
  1258. if (err < 0)
  1259. return err;
  1260. ip.hdr = skb_network_header(skb);
  1261. l4.hdr = skb_transport_header(skb);
  1262. /* initialize outer IP header fields */
  1263. if (ip.v4->version == 4) {
  1264. ip.v4->tot_len = 0;
  1265. ip.v4->check = 0;
  1266. } else {
  1267. ip.v6->payload_len = 0;
  1268. }
  1269. /* determine offset of transport header */
  1270. l4_start = l4.hdr - skb->data;
  1271. /* remove payload length from checksum */
  1272. paylen = skb->len - l4_start;
  1273. csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
  1274. /* compute length of segmentation header */
  1275. off->header_len = (l4.tcp->doff * 4) + l4_start;
  1276. /* update gso_segs and bytecount */
  1277. first->gso_segs = skb_shinfo(skb)->gso_segs;
  1278. first->bytecount = (first->gso_segs - 1) * off->header_len;
  1279. cd_tso_len = skb->len - off->header_len;
  1280. cd_mss = skb_shinfo(skb)->gso_size;
  1281. /* record cdesc_qw1 with TSO parameters */
  1282. off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
  1283. (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
  1284. (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
  1285. (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
  1286. first->tx_flags |= ICE_TX_FLAGS_TSO;
  1287. return 1;
  1288. }
  1289. /**
  1290. * ice_txd_use_count - estimate the number of descriptors needed for Tx
  1291. * @size: transmit request size in bytes
  1292. *
  1293. * Due to hardware alignment restrictions (4K alignment), we need to
  1294. * assume that we can have no more than 12K of data per descriptor, even
  1295. * though each descriptor can take up to 16K - 1 bytes of aligned memory.
  1296. * Thus, we need to divide by 12K. But division is slow! Instead,
  1297. * we decompose the operation into shifts and one relatively cheap
  1298. * multiply operation.
  1299. *
  1300. * To divide by 12K, we first divide by 4K, then divide by 3:
  1301. * To divide by 4K, shift right by 12 bits
  1302. * To divide by 3, multiply by 85, then divide by 256
  1303. * (Divide by 256 is done by shifting right by 8 bits)
  1304. * Finally, we add one to round up. Because 256 isn't an exact multiple of
  1305. * 3, we'll underestimate near each multiple of 12K. This is actually more
  1306. * accurate as we have 4K - 1 of wiggle room that we can fit into the last
  1307. * segment. For our purposes this is accurate out to 1M which is orders of
  1308. * magnitude greater than our largest possible GSO size.
  1309. *
  1310. * This would then be implemented as:
  1311. * return (((size >> 12) * 85) >> 8) + 1;
  1312. *
  1313. * Since multiplication and division are commutative, we can reorder
  1314. * operations into:
  1315. * return ((size * 85) >> 20) + 1;
  1316. */
  1317. static unsigned int ice_txd_use_count(unsigned int size)
  1318. {
  1319. return ((size * 85) >> 20) + 1;
  1320. }
  1321. /**
  1322. * ice_xmit_desc_count - calculate number of tx descriptors needed
  1323. * @skb: send buffer
  1324. *
  1325. * Returns number of data descriptors needed for this skb.
  1326. */
  1327. static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
  1328. {
  1329. const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
  1330. unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
  1331. unsigned int count = 0, size = skb_headlen(skb);
  1332. for (;;) {
  1333. count += ice_txd_use_count(size);
  1334. if (!nr_frags--)
  1335. break;
  1336. size = skb_frag_size(frag++);
  1337. }
  1338. return count;
  1339. }
  1340. /**
  1341. * __ice_chk_linearize - Check if there are more than 8 buffers per packet
  1342. * @skb: send buffer
  1343. *
  1344. * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
  1345. * and so we need to figure out the cases where we need to linearize the skb.
  1346. *
  1347. * For TSO we need to count the TSO header and segment payload separately.
  1348. * As such we need to check cases where we have 7 fragments or more as we
  1349. * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
  1350. * the segment payload in the first descriptor, and another 7 for the
  1351. * fragments.
  1352. */
  1353. static bool __ice_chk_linearize(struct sk_buff *skb)
  1354. {
  1355. const struct skb_frag_struct *frag, *stale;
  1356. int nr_frags, sum;
  1357. /* no need to check if number of frags is less than 7 */
  1358. nr_frags = skb_shinfo(skb)->nr_frags;
  1359. if (nr_frags < (ICE_MAX_BUF_TXD - 1))
  1360. return false;
  1361. /* We need to walk through the list and validate that each group
  1362. * of 6 fragments totals at least gso_size.
  1363. */
  1364. nr_frags -= ICE_MAX_BUF_TXD - 2;
  1365. frag = &skb_shinfo(skb)->frags[0];
  1366. /* Initialize size to the negative value of gso_size minus 1. We
  1367. * use this as the worst case scenerio in which the frag ahead
  1368. * of us only provides one byte which is why we are limited to 6
  1369. * descriptors for a single transmit as the header and previous
  1370. * fragment are already consuming 2 descriptors.
  1371. */
  1372. sum = 1 - skb_shinfo(skb)->gso_size;
  1373. /* Add size of frags 0 through 4 to create our initial sum */
  1374. sum += skb_frag_size(frag++);
  1375. sum += skb_frag_size(frag++);
  1376. sum += skb_frag_size(frag++);
  1377. sum += skb_frag_size(frag++);
  1378. sum += skb_frag_size(frag++);
  1379. /* Walk through fragments adding latest fragment, testing it, and
  1380. * then removing stale fragments from the sum.
  1381. */
  1382. stale = &skb_shinfo(skb)->frags[0];
  1383. for (;;) {
  1384. sum += skb_frag_size(frag++);
  1385. /* if sum is negative we failed to make sufficient progress */
  1386. if (sum < 0)
  1387. return true;
  1388. if (!nr_frags--)
  1389. break;
  1390. sum -= skb_frag_size(stale++);
  1391. }
  1392. return false;
  1393. }
  1394. /**
  1395. * ice_chk_linearize - Check if there are more than 8 fragments per packet
  1396. * @skb: send buffer
  1397. * @count: number of buffers used
  1398. *
  1399. * Note: Our HW can't scatter-gather more than 8 fragments to build
  1400. * a packet on the wire and so we need to figure out the cases where we
  1401. * need to linearize the skb.
  1402. */
  1403. static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
  1404. {
  1405. /* Both TSO and single send will work if count is less than 8 */
  1406. if (likely(count < ICE_MAX_BUF_TXD))
  1407. return false;
  1408. if (skb_is_gso(skb))
  1409. return __ice_chk_linearize(skb);
  1410. /* we can support up to 8 data buffers for a single send */
  1411. return count != ICE_MAX_BUF_TXD;
  1412. }
  1413. /**
  1414. * ice_xmit_frame_ring - Sends buffer on Tx ring
  1415. * @skb: send buffer
  1416. * @tx_ring: ring to send buffer on
  1417. *
  1418. * Returns NETDEV_TX_OK if sent, else an error code
  1419. */
  1420. static netdev_tx_t
  1421. ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
  1422. {
  1423. struct ice_tx_offload_params offload = { 0 };
  1424. struct ice_tx_buf *first;
  1425. unsigned int count;
  1426. int tso, csum;
  1427. count = ice_xmit_desc_count(skb);
  1428. if (ice_chk_linearize(skb, count)) {
  1429. if (__skb_linearize(skb))
  1430. goto out_drop;
  1431. count = ice_txd_use_count(skb->len);
  1432. tx_ring->tx_stats.tx_linearize++;
  1433. }
  1434. /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
  1435. * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
  1436. * + 4 desc gap to avoid the cache line where head is,
  1437. * + 1 desc for context descriptor,
  1438. * otherwise try next time
  1439. */
  1440. if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {
  1441. tx_ring->tx_stats.tx_busy++;
  1442. return NETDEV_TX_BUSY;
  1443. }
  1444. offload.tx_ring = tx_ring;
  1445. /* record the location of the first descriptor for this packet */
  1446. first = &tx_ring->tx_buf[tx_ring->next_to_use];
  1447. first->skb = skb;
  1448. first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
  1449. first->gso_segs = 1;
  1450. first->tx_flags = 0;
  1451. /* prepare the VLAN tagging flags for Tx */
  1452. if (ice_tx_prepare_vlan_flags(tx_ring, first))
  1453. goto out_drop;
  1454. /* set up TSO offload */
  1455. tso = ice_tso(first, &offload);
  1456. if (tso < 0)
  1457. goto out_drop;
  1458. /* always set up Tx checksum offload */
  1459. csum = ice_tx_csum(first, &offload);
  1460. if (csum < 0)
  1461. goto out_drop;
  1462. if (tso || offload.cd_tunnel_params) {
  1463. struct ice_tx_ctx_desc *cdesc;
  1464. int i = tx_ring->next_to_use;
  1465. /* grab the next descriptor */
  1466. cdesc = ICE_TX_CTX_DESC(tx_ring, i);
  1467. i++;
  1468. tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
  1469. /* setup context descriptor */
  1470. cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
  1471. cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
  1472. cdesc->rsvd = cpu_to_le16(0);
  1473. cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
  1474. }
  1475. ice_tx_map(tx_ring, first, &offload);
  1476. return NETDEV_TX_OK;
  1477. out_drop:
  1478. dev_kfree_skb_any(skb);
  1479. return NETDEV_TX_OK;
  1480. }
  1481. /**
  1482. * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
  1483. * @skb: send buffer
  1484. * @netdev: network interface device structure
  1485. *
  1486. * Returns NETDEV_TX_OK if sent, else an error code
  1487. */
  1488. netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
  1489. {
  1490. struct ice_netdev_priv *np = netdev_priv(netdev);
  1491. struct ice_vsi *vsi = np->vsi;
  1492. struct ice_ring *tx_ring;
  1493. tx_ring = vsi->tx_rings[skb->queue_mapping];
  1494. /* hardware can't handle really short frames, hardware padding works
  1495. * beyond this point
  1496. */
  1497. if (skb_put_padto(skb, ICE_MIN_TX_LEN))
  1498. return NETDEV_TX_OK;
  1499. return ice_xmit_frame_ring(skb, tx_ring);
  1500. }