pio_copy.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. /*
  2. * Copyright(c) 2015, 2016 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include "hfi.h"
  48. /* additive distance between non-SOP and SOP space */
  49. #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
  50. #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
  51. /* number of QUADWORDs in a block */
  52. #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
  53. /**
  54. * pio_copy - copy data block to MMIO space
  55. * @pbuf: a number of blocks allocated within a PIO send context
  56. * @pbc: PBC to send
  57. * @from: source, must be 8 byte aligned
  58. * @count: number of DWORD (32-bit) quantities to copy from source
  59. *
  60. * Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
  61. * Must always write full BLOCK_SIZE bytes blocks. The first block must
  62. * be written to the corresponding SOP=1 address.
  63. *
  64. * Known:
  65. * o pbuf->start always starts on a block boundary
  66. * o pbuf can wrap only at a block boundary
  67. */
  68. void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
  69. const void *from, size_t count)
  70. {
  71. void __iomem *dest = pbuf->start + SOP_DISTANCE;
  72. void __iomem *send = dest + PIO_BLOCK_SIZE;
  73. void __iomem *dend; /* 8-byte data end */
  74. /* write the PBC */
  75. writeq(pbc, dest);
  76. dest += sizeof(u64);
  77. /* calculate where the QWORD data ends - in SOP=1 space */
  78. dend = dest + ((count >> 1) * sizeof(u64));
  79. if (dend < send) {
  80. /*
  81. * all QWORD data is within the SOP block, does *not*
  82. * reach the end of the SOP block
  83. */
  84. while (dest < dend) {
  85. writeq(*(u64 *)from, dest);
  86. from += sizeof(u64);
  87. dest += sizeof(u64);
  88. }
  89. /*
  90. * No boundary checks are needed here:
  91. * 0. We're not on the SOP block boundary
  92. * 1. The possible DWORD dangle will still be within
  93. * the SOP block
  94. * 2. We cannot wrap except on a block boundary.
  95. */
  96. } else {
  97. /* QWORD data extends _to_ or beyond the SOP block */
  98. /* write 8-byte SOP chunk data */
  99. while (dest < send) {
  100. writeq(*(u64 *)from, dest);
  101. from += sizeof(u64);
  102. dest += sizeof(u64);
  103. }
  104. /* drop out of the SOP range */
  105. dest -= SOP_DISTANCE;
  106. dend -= SOP_DISTANCE;
  107. /*
  108. * If the wrap comes before or matches the data end,
  109. * copy until until the wrap, then wrap.
  110. *
  111. * If the data ends at the end of the SOP above and
  112. * the buffer wraps, then pbuf->end == dend == dest
  113. * and nothing will get written, but we will wrap in
  114. * case there is a dangling DWORD.
  115. */
  116. if (pbuf->end <= dend) {
  117. while (dest < pbuf->end) {
  118. writeq(*(u64 *)from, dest);
  119. from += sizeof(u64);
  120. dest += sizeof(u64);
  121. }
  122. dest -= pbuf->size;
  123. dend -= pbuf->size;
  124. }
  125. /* write 8-byte non-SOP, non-wrap chunk data */
  126. while (dest < dend) {
  127. writeq(*(u64 *)from, dest);
  128. from += sizeof(u64);
  129. dest += sizeof(u64);
  130. }
  131. }
  132. /* at this point we have wrapped if we are going to wrap */
  133. /* write dangling u32, if any */
  134. if (count & 1) {
  135. union mix val;
  136. val.val64 = 0;
  137. val.val32[0] = *(u32 *)from;
  138. writeq(val.val64, dest);
  139. dest += sizeof(u64);
  140. }
  141. /*
  142. * fill in rest of block, no need to check pbuf->end
  143. * as we only wrap on a block boundary
  144. */
  145. while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
  146. writeq(0, dest);
  147. dest += sizeof(u64);
  148. }
  149. /* finished with this buffer */
  150. this_cpu_dec(*pbuf->sc->buffers_allocated);
  151. preempt_enable();
  152. }
  153. /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
  154. #define USE_SHIFTS 1
  155. #ifdef USE_SHIFTS
  156. /*
  157. * Handle carry bytes using shifts and masks.
  158. *
  159. * NOTE: the value the unused portion of carry is expected to always be zero.
  160. */
  161. /*
  162. * "zero" shift - bit shift used to zero out upper bytes. Input is
  163. * the count of LSB bytes to preserve.
  164. */
  165. #define zshift(x) (8 * (8 - (x)))
  166. /*
  167. * "merge" shift - bit shift used to merge with carry bytes. Input is
  168. * the LSB byte count to move beyond.
  169. */
  170. #define mshift(x) (8 * (x))
  171. /*
  172. * Read nbytes bytes from "from" and return them in the LSB bytes
  173. * of pbuf->carry. Other bytes are zeroed. Any previous value
  174. * pbuf->carry is lost.
  175. *
  176. * NOTES:
  177. * o do not read from from if nbytes is zero
  178. * o from may _not_ be u64 aligned
  179. * o nbytes must not span a QW boundary
  180. */
  181. static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
  182. unsigned int nbytes)
  183. {
  184. unsigned long off;
  185. if (nbytes == 0) {
  186. pbuf->carry.val64 = 0;
  187. } else {
  188. /* align our pointer */
  189. off = (unsigned long)from & 0x7;
  190. from = (void *)((unsigned long)from & ~0x7l);
  191. pbuf->carry.val64 = ((*(u64 *)from)
  192. << zshift(nbytes + off))/* zero upper bytes */
  193. >> zshift(nbytes); /* place at bottom */
  194. }
  195. pbuf->carry_bytes = nbytes;
  196. }
  197. /*
  198. * Read nbytes bytes from "from" and put them at the next significant bytes
  199. * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
  200. * read does not overfill carry.
  201. *
  202. * NOTES:
  203. * o from may _not_ be u64 aligned
  204. * o nbytes may span a QW boundary
  205. */
  206. static inline void read_extra_bytes(struct pio_buf *pbuf,
  207. const void *from, unsigned int nbytes)
  208. {
  209. unsigned long off = (unsigned long)from & 0x7;
  210. unsigned int room, xbytes;
  211. /* align our pointer */
  212. from = (void *)((unsigned long)from & ~0x7l);
  213. /* check count first - don't read anything if count is zero */
  214. while (nbytes) {
  215. /* find the number of bytes in this u64 */
  216. room = 8 - off; /* this u64 has room for this many bytes */
  217. xbytes = min(room, nbytes);
  218. /*
  219. * shift down to zero lower bytes, shift up to zero upper
  220. * bytes, shift back down to move into place
  221. */
  222. pbuf->carry.val64 |= (((*(u64 *)from)
  223. >> mshift(off))
  224. << zshift(xbytes))
  225. >> zshift(xbytes + pbuf->carry_bytes);
  226. off = 0;
  227. pbuf->carry_bytes += xbytes;
  228. nbytes -= xbytes;
  229. from += sizeof(u64);
  230. }
  231. }
  232. /*
  233. * Zero extra bytes from the end of pbuf->carry.
  234. *
  235. * NOTES:
  236. * o zbytes <= old_bytes
  237. */
  238. static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
  239. {
  240. unsigned int remaining;
  241. if (zbytes == 0) /* nothing to do */
  242. return;
  243. remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
  244. /* NOTE: zshift only guaranteed to work if remaining != 0 */
  245. if (remaining)
  246. pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
  247. >> zshift(remaining);
  248. else
  249. pbuf->carry.val64 = 0;
  250. pbuf->carry_bytes = remaining;
  251. }
  252. /*
  253. * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
  254. * Put the unused part of the next 8 bytes of src into the LSB bytes of
  255. * pbuf->carry with the upper bytes zeroed..
  256. *
  257. * NOTES:
  258. * o result must keep unused bytes zeroed
  259. * o src must be u64 aligned
  260. */
  261. static inline void merge_write8(
  262. struct pio_buf *pbuf,
  263. void __iomem *dest,
  264. const void *src)
  265. {
  266. u64 new, temp;
  267. new = *(u64 *)src;
  268. temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
  269. writeq(temp, dest);
  270. pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
  271. }
  272. /*
  273. * Write a quad word using all bytes of carry.
  274. */
  275. static inline void carry8_write8(union mix carry, void __iomem *dest)
  276. {
  277. writeq(carry.val64, dest);
  278. }
  279. /*
  280. * Write a quad word using all the valid bytes of carry. If carry
  281. * has zero valid bytes, nothing is written.
  282. * Returns 0 on nothing written, non-zero on quad word written.
  283. */
  284. static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
  285. {
  286. if (pbuf->carry_bytes) {
  287. /* unused bytes are always kept zeroed, so just write */
  288. writeq(pbuf->carry.val64, dest);
  289. return 1;
  290. }
  291. return 0;
  292. }
  293. #else /* USE_SHIFTS */
  294. /*
  295. * Handle carry bytes using byte copies.
  296. *
  297. * NOTE: the value the unused portion of carry is left uninitialized.
  298. */
  299. /*
  300. * Jump copy - no-loop copy for < 8 bytes.
  301. */
  302. static inline void jcopy(u8 *dest, const u8 *src, u32 n)
  303. {
  304. switch (n) {
  305. case 7:
  306. *dest++ = *src++;
  307. case 6:
  308. *dest++ = *src++;
  309. case 5:
  310. *dest++ = *src++;
  311. case 4:
  312. *dest++ = *src++;
  313. case 3:
  314. *dest++ = *src++;
  315. case 2:
  316. *dest++ = *src++;
  317. case 1:
  318. *dest++ = *src++;
  319. }
  320. }
  321. /*
  322. * Read nbytes from "from" and and place them in the low bytes
  323. * of pbuf->carry. Other bytes are left as-is. Any previous
  324. * value in pbuf->carry is lost.
  325. *
  326. * NOTES:
  327. * o do not read from from if nbytes is zero
  328. * o from may _not_ be u64 aligned.
  329. */
  330. static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
  331. unsigned int nbytes)
  332. {
  333. jcopy(&pbuf->carry.val8[0], from, nbytes);
  334. pbuf->carry_bytes = nbytes;
  335. }
  336. /*
  337. * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
  338. * It is expected that the extra read does not overfill carry.
  339. *
  340. * NOTES:
  341. * o from may _not_ be u64 aligned
  342. * o nbytes may span a QW boundary
  343. */
  344. static inline void read_extra_bytes(struct pio_buf *pbuf,
  345. const void *from, unsigned int nbytes)
  346. {
  347. jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
  348. pbuf->carry_bytes += nbytes;
  349. }
  350. /*
  351. * Zero extra bytes from the end of pbuf->carry.
  352. *
  353. * We do not care about the value of unused bytes in carry, so just
  354. * reduce the byte count.
  355. *
  356. * NOTES:
  357. * o zbytes <= old_bytes
  358. */
  359. static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
  360. {
  361. pbuf->carry_bytes -= zbytes;
  362. }
  363. /*
  364. * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
  365. * Put the unused part of the next 8 bytes of src into the low bytes of
  366. * pbuf->carry.
  367. */
  368. static inline void merge_write8(
  369. struct pio_buf *pbuf,
  370. void *dest,
  371. const void *src)
  372. {
  373. u32 remainder = 8 - pbuf->carry_bytes;
  374. jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
  375. writeq(pbuf->carry.val64, dest);
  376. jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
  377. }
  378. /*
  379. * Write a quad word using all bytes of carry.
  380. */
  381. static inline void carry8_write8(union mix carry, void *dest)
  382. {
  383. writeq(carry.val64, dest);
  384. }
  385. /*
  386. * Write a quad word using all the valid bytes of carry. If carry
  387. * has zero valid bytes, nothing is written.
  388. * Returns 0 on nothing written, non-zero on quad word written.
  389. */
  390. static inline int carry_write8(struct pio_buf *pbuf, void *dest)
  391. {
  392. if (pbuf->carry_bytes) {
  393. u64 zero = 0;
  394. jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
  395. 8 - pbuf->carry_bytes);
  396. writeq(pbuf->carry.val64, dest);
  397. return 1;
  398. }
  399. return 0;
  400. }
  401. #endif /* USE_SHIFTS */
  402. /*
  403. * Segmented PIO Copy - start
  404. *
  405. * Start a PIO copy.
  406. *
  407. * @pbuf: destination buffer
  408. * @pbc: the PBC for the PIO buffer
  409. * @from: data source, QWORD aligned
  410. * @nbytes: bytes to copy
  411. */
  412. void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
  413. const void *from, size_t nbytes)
  414. {
  415. void __iomem *dest = pbuf->start + SOP_DISTANCE;
  416. void __iomem *send = dest + PIO_BLOCK_SIZE;
  417. void __iomem *dend; /* 8-byte data end */
  418. writeq(pbc, dest);
  419. dest += sizeof(u64);
  420. /* calculate where the QWORD data ends - in SOP=1 space */
  421. dend = dest + ((nbytes >> 3) * sizeof(u64));
  422. if (dend < send) {
  423. /*
  424. * all QWORD data is within the SOP block, does *not*
  425. * reach the end of the SOP block
  426. */
  427. while (dest < dend) {
  428. writeq(*(u64 *)from, dest);
  429. from += sizeof(u64);
  430. dest += sizeof(u64);
  431. }
  432. /*
  433. * No boundary checks are needed here:
  434. * 0. We're not on the SOP block boundary
  435. * 1. The possible DWORD dangle will still be within
  436. * the SOP block
  437. * 2. We cannot wrap except on a block boundary.
  438. */
  439. } else {
  440. /* QWORD data extends _to_ or beyond the SOP block */
  441. /* write 8-byte SOP chunk data */
  442. while (dest < send) {
  443. writeq(*(u64 *)from, dest);
  444. from += sizeof(u64);
  445. dest += sizeof(u64);
  446. }
  447. /* drop out of the SOP range */
  448. dest -= SOP_DISTANCE;
  449. dend -= SOP_DISTANCE;
  450. /*
  451. * If the wrap comes before or matches the data end,
  452. * copy until until the wrap, then wrap.
  453. *
  454. * If the data ends at the end of the SOP above and
  455. * the buffer wraps, then pbuf->end == dend == dest
  456. * and nothing will get written, but we will wrap in
  457. * case there is a dangling DWORD.
  458. */
  459. if (pbuf->end <= dend) {
  460. while (dest < pbuf->end) {
  461. writeq(*(u64 *)from, dest);
  462. from += sizeof(u64);
  463. dest += sizeof(u64);
  464. }
  465. dest -= pbuf->size;
  466. dend -= pbuf->size;
  467. }
  468. /* write 8-byte non-SOP, non-wrap chunk data */
  469. while (dest < dend) {
  470. writeq(*(u64 *)from, dest);
  471. from += sizeof(u64);
  472. dest += sizeof(u64);
  473. }
  474. }
  475. /* at this point we have wrapped if we are going to wrap */
  476. /* ...but it doesn't matter as we're done writing */
  477. /* save dangling bytes, if any */
  478. read_low_bytes(pbuf, from, nbytes & 0x7);
  479. pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
  480. }
  481. /*
  482. * Mid copy helper, "mixed case" - source is 64-bit aligned but carry
  483. * bytes are non-zero.
  484. *
  485. * Whole u64s must be written to the chip, so bytes must be manually merged.
  486. *
  487. * @pbuf: destination buffer
  488. * @from: data source, is QWORD aligned.
  489. * @nbytes: bytes to copy
  490. *
  491. * Must handle nbytes < 8.
  492. */
  493. static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
  494. {
  495. void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
  496. void __iomem *dend; /* 8-byte data end */
  497. unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
  498. unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
  499. /* calculate 8-byte data end */
  500. dend = dest + (qw_to_write * sizeof(u64));
  501. if (pbuf->qw_written < PIO_BLOCK_QWS) {
  502. /*
  503. * Still within SOP block. We don't need to check for
  504. * wrap because we are still in the first block and
  505. * can only wrap on block boundaries.
  506. */
  507. void __iomem *send; /* SOP end */
  508. void __iomem *xend;
  509. /*
  510. * calculate the end of data or end of block, whichever
  511. * comes first
  512. */
  513. send = pbuf->start + PIO_BLOCK_SIZE;
  514. xend = min(send, dend);
  515. /* shift up to SOP=1 space */
  516. dest += SOP_DISTANCE;
  517. xend += SOP_DISTANCE;
  518. /* write 8-byte chunk data */
  519. while (dest < xend) {
  520. merge_write8(pbuf, dest, from);
  521. from += sizeof(u64);
  522. dest += sizeof(u64);
  523. }
  524. /* shift down to SOP=0 space */
  525. dest -= SOP_DISTANCE;
  526. }
  527. /*
  528. * At this point dest could be (either, both, or neither):
  529. * - at dend
  530. * - at the wrap
  531. */
  532. /*
  533. * If the wrap comes before or matches the data end,
  534. * copy until until the wrap, then wrap.
  535. *
  536. * If dest is at the wrap, we will fall into the if,
  537. * not do the loop, when wrap.
  538. *
  539. * If the data ends at the end of the SOP above and
  540. * the buffer wraps, then pbuf->end == dend == dest
  541. * and nothing will get written.
  542. */
  543. if (pbuf->end <= dend) {
  544. while (dest < pbuf->end) {
  545. merge_write8(pbuf, dest, from);
  546. from += sizeof(u64);
  547. dest += sizeof(u64);
  548. }
  549. dest -= pbuf->size;
  550. dend -= pbuf->size;
  551. }
  552. /* write 8-byte non-SOP, non-wrap chunk data */
  553. while (dest < dend) {
  554. merge_write8(pbuf, dest, from);
  555. from += sizeof(u64);
  556. dest += sizeof(u64);
  557. }
  558. /* adjust carry */
  559. if (pbuf->carry_bytes < bytes_left) {
  560. /* need to read more */
  561. read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
  562. } else {
  563. /* remove invalid bytes */
  564. zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
  565. }
  566. pbuf->qw_written += qw_to_write;
  567. }
  568. /*
  569. * Mid copy helper, "straight case" - source pointer is 64-bit aligned
  570. * with no carry bytes.
  571. *
  572. * @pbuf: destination buffer
  573. * @from: data source, is QWORD aligned
  574. * @nbytes: bytes to copy
  575. *
  576. * Must handle nbytes < 8.
  577. */
  578. static void mid_copy_straight(struct pio_buf *pbuf,
  579. const void *from, size_t nbytes)
  580. {
  581. void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
  582. void __iomem *dend; /* 8-byte data end */
  583. /* calculate 8-byte data end */
  584. dend = dest + ((nbytes >> 3) * sizeof(u64));
  585. if (pbuf->qw_written < PIO_BLOCK_QWS) {
  586. /*
  587. * Still within SOP block. We don't need to check for
  588. * wrap because we are still in the first block and
  589. * can only wrap on block boundaries.
  590. */
  591. void __iomem *send; /* SOP end */
  592. void __iomem *xend;
  593. /*
  594. * calculate the end of data or end of block, whichever
  595. * comes first
  596. */
  597. send = pbuf->start + PIO_BLOCK_SIZE;
  598. xend = min(send, dend);
  599. /* shift up to SOP=1 space */
  600. dest += SOP_DISTANCE;
  601. xend += SOP_DISTANCE;
  602. /* write 8-byte chunk data */
  603. while (dest < xend) {
  604. writeq(*(u64 *)from, dest);
  605. from += sizeof(u64);
  606. dest += sizeof(u64);
  607. }
  608. /* shift down to SOP=0 space */
  609. dest -= SOP_DISTANCE;
  610. }
  611. /*
  612. * At this point dest could be (either, both, or neither):
  613. * - at dend
  614. * - at the wrap
  615. */
  616. /*
  617. * If the wrap comes before or matches the data end,
  618. * copy until until the wrap, then wrap.
  619. *
  620. * If dest is at the wrap, we will fall into the if,
  621. * not do the loop, when wrap.
  622. *
  623. * If the data ends at the end of the SOP above and
  624. * the buffer wraps, then pbuf->end == dend == dest
  625. * and nothing will get written.
  626. */
  627. if (pbuf->end <= dend) {
  628. while (dest < pbuf->end) {
  629. writeq(*(u64 *)from, dest);
  630. from += sizeof(u64);
  631. dest += sizeof(u64);
  632. }
  633. dest -= pbuf->size;
  634. dend -= pbuf->size;
  635. }
  636. /* write 8-byte non-SOP, non-wrap chunk data */
  637. while (dest < dend) {
  638. writeq(*(u64 *)from, dest);
  639. from += sizeof(u64);
  640. dest += sizeof(u64);
  641. }
  642. /* we know carry_bytes was zero on entry to this routine */
  643. read_low_bytes(pbuf, from, nbytes & 0x7);
  644. pbuf->qw_written += nbytes >> 3;
  645. }
  646. /*
  647. * Segmented PIO Copy - middle
  648. *
  649. * Must handle any aligned tail and any aligned source with any byte count.
  650. *
  651. * @pbuf: a number of blocks allocated within a PIO send context
  652. * @from: data source
  653. * @nbytes: number of bytes to copy
  654. */
  655. void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
  656. {
  657. unsigned long from_align = (unsigned long)from & 0x7;
  658. if (pbuf->carry_bytes + nbytes < 8) {
  659. /* not enough bytes to fill a QW */
  660. read_extra_bytes(pbuf, from, nbytes);
  661. return;
  662. }
  663. if (from_align) {
  664. /* misaligned source pointer - align it */
  665. unsigned long to_align;
  666. /* bytes to read to align "from" */
  667. to_align = 8 - from_align;
  668. /*
  669. * In the advance-to-alignment logic below, we do not need
  670. * to check if we are using more than nbytes. This is because
  671. * if we are here, we already know that carry+nbytes will
  672. * fill at least one QW.
  673. */
  674. if (pbuf->carry_bytes + to_align < 8) {
  675. /* not enough align bytes to fill a QW */
  676. read_extra_bytes(pbuf, from, to_align);
  677. from += to_align;
  678. nbytes -= to_align;
  679. } else {
  680. /* bytes to fill carry */
  681. unsigned long to_fill = 8 - pbuf->carry_bytes;
  682. /* bytes left over to be read */
  683. unsigned long extra = to_align - to_fill;
  684. void __iomem *dest;
  685. /* fill carry... */
  686. read_extra_bytes(pbuf, from, to_fill);
  687. from += to_fill;
  688. nbytes -= to_fill;
  689. /* ...now write carry */
  690. dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
  691. /*
  692. * The two checks immediately below cannot both be
  693. * true, hence the else. If we have wrapped, we
  694. * cannot still be within the first block.
  695. * Conversely, if we are still in the first block, we
  696. * cannot have wrapped. We do the wrap check first
  697. * as that is more likely.
  698. */
  699. /* adjust if we've wrapped */
  700. if (dest >= pbuf->end)
  701. dest -= pbuf->size;
  702. /* jump to SOP range if within the first block */
  703. else if (pbuf->qw_written < PIO_BLOCK_QWS)
  704. dest += SOP_DISTANCE;
  705. carry8_write8(pbuf->carry, dest);
  706. pbuf->qw_written++;
  707. /* read any extra bytes to do final alignment */
  708. /* this will overwrite anything in pbuf->carry */
  709. read_low_bytes(pbuf, from, extra);
  710. from += extra;
  711. nbytes -= extra;
  712. }
  713. /* at this point, from is QW aligned */
  714. }
  715. if (pbuf->carry_bytes)
  716. mid_copy_mix(pbuf, from, nbytes);
  717. else
  718. mid_copy_straight(pbuf, from, nbytes);
  719. }
  720. /*
  721. * Segmented PIO Copy - end
  722. *
  723. * Write any remainder (in pbuf->carry) and finish writing the whole block.
  724. *
  725. * @pbuf: a number of blocks allocated within a PIO send context
  726. */
  727. void seg_pio_copy_end(struct pio_buf *pbuf)
  728. {
  729. void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
  730. /*
  731. * The two checks immediately below cannot both be true, hence the
  732. * else. If we have wrapped, we cannot still be within the first
  733. * block. Conversely, if we are still in the first block, we
  734. * cannot have wrapped. We do the wrap check first as that is
  735. * more likely.
  736. */
  737. /* adjust if we have wrapped */
  738. if (dest >= pbuf->end)
  739. dest -= pbuf->size;
  740. /* jump to the SOP range if within the first block */
  741. else if (pbuf->qw_written < PIO_BLOCK_QWS)
  742. dest += SOP_DISTANCE;
  743. /* write final bytes, if any */
  744. if (carry_write8(pbuf, dest)) {
  745. dest += sizeof(u64);
  746. /*
  747. * NOTE: We do not need to recalculate whether dest needs
  748. * SOP_DISTANCE or not.
  749. *
  750. * If we are in the first block and the dangle write
  751. * keeps us in the same block, dest will need
  752. * to retain SOP_DISTANCE in the loop below.
  753. *
  754. * If we are in the first block and the dangle write pushes
  755. * us to the next block, then loop below will not run
  756. * and dest is not used. Hence we do not need to update
  757. * it.
  758. *
  759. * If we are past the first block, then SOP_DISTANCE
  760. * was never added, so there is nothing to do.
  761. */
  762. }
  763. /* fill in rest of block */
  764. while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
  765. writeq(0, dest);
  766. dest += sizeof(u64);
  767. }
  768. /* finished with this buffer */
  769. this_cpu_dec(*pbuf->sc->buffers_allocated);
  770. preempt_enable();
  771. }