prep.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715
  1. /*
  2. * Intel I/OAT DMA Linux driver
  3. * Copyright(c) 2004 - 2015 Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * The full GNU General Public License is included in this distribution in
  15. * the file called "COPYING".
  16. *
  17. */
  18. #include <linux/module.h>
  19. #include <linux/pci.h>
  20. #include <linux/gfp.h>
  21. #include <linux/dmaengine.h>
  22. #include <linux/dma-mapping.h>
  23. #include <linux/prefetch.h>
  24. #include "../dmaengine.h"
  25. #include "registers.h"
  26. #include "hw.h"
  27. #include "dma.h"
  28. #define MAX_SCF 1024
  29. /* provide a lookup table for setting the source address in the base or
  30. * extended descriptor of an xor or pq descriptor
  31. */
  32. static const u8 xor_idx_to_desc = 0xe0;
  33. static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
  34. static const u8 pq_idx_to_desc = 0xf8;
  35. static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
  36. 2, 2, 2, 2, 2, 2, 2 };
  37. static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
  38. static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
  39. 0, 1, 2, 3, 4, 5, 6 };
  40. static void xor_set_src(struct ioat_raw_descriptor *descs[2],
  41. dma_addr_t addr, u32 offset, int idx)
  42. {
  43. struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  44. raw->field[xor_idx_to_field[idx]] = addr + offset;
  45. }
  46. static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  47. {
  48. struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
  49. return raw->field[pq_idx_to_field[idx]];
  50. }
  51. static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
  52. {
  53. struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
  54. return raw->field[pq16_idx_to_field[idx]];
  55. }
  56. static void pq_set_src(struct ioat_raw_descriptor *descs[2],
  57. dma_addr_t addr, u32 offset, u8 coef, int idx)
  58. {
  59. struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
  60. struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
  61. raw->field[pq_idx_to_field[idx]] = addr + offset;
  62. pq->coef[idx] = coef;
  63. }
  64. static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
  65. dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
  66. {
  67. struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
  68. struct ioat_pq16a_descriptor *pq16 =
  69. (struct ioat_pq16a_descriptor *)desc[1];
  70. struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
  71. raw->field[pq16_idx_to_field[idx]] = addr + offset;
  72. if (idx < 8)
  73. pq->coef[idx] = coef;
  74. else
  75. pq16->coef[idx - 8] = coef;
  76. }
  77. static struct ioat_sed_ent *
  78. ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
  79. {
  80. struct ioat_sed_ent *sed;
  81. gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
  82. sed = kmem_cache_alloc(ioat_sed_cache, flags);
  83. if (!sed)
  84. return NULL;
  85. sed->hw_pool = hw_pool;
  86. sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
  87. flags, &sed->dma);
  88. if (!sed->hw) {
  89. kmem_cache_free(ioat_sed_cache, sed);
  90. return NULL;
  91. }
  92. return sed;
  93. }
  94. struct dma_async_tx_descriptor *
  95. ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
  96. dma_addr_t dma_src, size_t len, unsigned long flags)
  97. {
  98. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  99. struct ioat_dma_descriptor *hw;
  100. struct ioat_ring_ent *desc;
  101. dma_addr_t dst = dma_dest;
  102. dma_addr_t src = dma_src;
  103. size_t total_len = len;
  104. int num_descs, idx, i;
  105. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  106. if (likely(num_descs) &&
  107. ioat_check_space_lock(ioat_chan, num_descs) == 0)
  108. idx = ioat_chan->head;
  109. else
  110. return NULL;
  111. i = 0;
  112. do {
  113. size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
  114. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  115. hw = desc->hw;
  116. hw->size = copy;
  117. hw->ctl = 0;
  118. hw->src_addr = src;
  119. hw->dst_addr = dst;
  120. len -= copy;
  121. dst += copy;
  122. src += copy;
  123. dump_desc_dbg(ioat_chan, desc);
  124. } while (++i < num_descs);
  125. desc->txd.flags = flags;
  126. desc->len = total_len;
  127. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  128. hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  129. hw->ctl_f.compl_write = 1;
  130. dump_desc_dbg(ioat_chan, desc);
  131. /* we leave the channel locked to ensure in order submission */
  132. return &desc->txd;
  133. }
  134. static struct dma_async_tx_descriptor *
  135. __ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
  136. dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
  137. size_t len, unsigned long flags)
  138. {
  139. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  140. struct ioat_ring_ent *compl_desc;
  141. struct ioat_ring_ent *desc;
  142. struct ioat_ring_ent *ext;
  143. size_t total_len = len;
  144. struct ioat_xor_descriptor *xor;
  145. struct ioat_xor_ext_descriptor *xor_ex = NULL;
  146. struct ioat_dma_descriptor *hw;
  147. int num_descs, with_ext, idx, i;
  148. u32 offset = 0;
  149. u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
  150. BUG_ON(src_cnt < 2);
  151. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  152. /* we need 2x the number of descriptors to cover greater than 5
  153. * sources
  154. */
  155. if (src_cnt > 5) {
  156. with_ext = 1;
  157. num_descs *= 2;
  158. } else
  159. with_ext = 0;
  160. /* completion writes from the raid engine may pass completion
  161. * writes from the legacy engine, so we need one extra null
  162. * (legacy) descriptor to ensure all completion writes arrive in
  163. * order.
  164. */
  165. if (likely(num_descs) &&
  166. ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
  167. idx = ioat_chan->head;
  168. else
  169. return NULL;
  170. i = 0;
  171. do {
  172. struct ioat_raw_descriptor *descs[2];
  173. size_t xfer_size = min_t(size_t,
  174. len, 1 << ioat_chan->xfercap_log);
  175. int s;
  176. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  177. xor = desc->xor;
  178. /* save a branch by unconditionally retrieving the
  179. * extended descriptor xor_set_src() knows to not write
  180. * to it in the single descriptor case
  181. */
  182. ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
  183. xor_ex = ext->xor_ex;
  184. descs[0] = (struct ioat_raw_descriptor *) xor;
  185. descs[1] = (struct ioat_raw_descriptor *) xor_ex;
  186. for (s = 0; s < src_cnt; s++)
  187. xor_set_src(descs, src[s], offset, s);
  188. xor->size = xfer_size;
  189. xor->dst_addr = dest + offset;
  190. xor->ctl = 0;
  191. xor->ctl_f.op = op;
  192. xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
  193. len -= xfer_size;
  194. offset += xfer_size;
  195. dump_desc_dbg(ioat_chan, desc);
  196. } while ((i += 1 + with_ext) < num_descs);
  197. /* last xor descriptor carries the unmap parameters and fence bit */
  198. desc->txd.flags = flags;
  199. desc->len = total_len;
  200. if (result)
  201. desc->result = result;
  202. xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  203. /* completion descriptor carries interrupt bit */
  204. compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
  205. compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
  206. hw = compl_desc->hw;
  207. hw->ctl = 0;
  208. hw->ctl_f.null = 1;
  209. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  210. hw->ctl_f.compl_write = 1;
  211. hw->size = NULL_DESC_BUFFER_SIZE;
  212. dump_desc_dbg(ioat_chan, compl_desc);
  213. /* we leave the channel locked to ensure in order submission */
  214. return &compl_desc->txd;
  215. }
  216. struct dma_async_tx_descriptor *
  217. ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
  218. unsigned int src_cnt, size_t len, unsigned long flags)
  219. {
  220. return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
  221. }
  222. struct dma_async_tx_descriptor *
  223. ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
  224. unsigned int src_cnt, size_t len,
  225. enum sum_check_flags *result, unsigned long flags)
  226. {
  227. /* the cleanup routine only sets bits on validate failure, it
  228. * does not clear bits on validate success... so clear it here
  229. */
  230. *result = 0;
  231. return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
  232. src_cnt - 1, len, flags);
  233. }
  234. static void
  235. dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
  236. struct ioat_ring_ent *ext)
  237. {
  238. struct device *dev = to_dev(ioat_chan);
  239. struct ioat_pq_descriptor *pq = desc->pq;
  240. struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
  241. struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
  242. int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
  243. int i;
  244. dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
  245. " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
  246. " src_cnt: %d)\n",
  247. desc_id(desc), (unsigned long long) desc->txd.phys,
  248. (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
  249. desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
  250. pq->ctl_f.int_en, pq->ctl_f.compl_write,
  251. pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
  252. pq->ctl_f.src_cnt);
  253. for (i = 0; i < src_cnt; i++)
  254. dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
  255. (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
  256. dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
  257. dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
  258. dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
  259. }
  260. static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
  261. struct ioat_ring_ent *desc)
  262. {
  263. struct device *dev = to_dev(ioat_chan);
  264. struct ioat_pq_descriptor *pq = desc->pq;
  265. struct ioat_raw_descriptor *descs[] = { (void *)pq,
  266. (void *)pq,
  267. (void *)pq };
  268. int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
  269. int i;
  270. if (desc->sed) {
  271. descs[1] = (void *)desc->sed->hw;
  272. descs[2] = (void *)desc->sed->hw + 64;
  273. }
  274. dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
  275. " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
  276. " src_cnt: %d)\n",
  277. desc_id(desc), (unsigned long long) desc->txd.phys,
  278. (unsigned long long) pq->next,
  279. desc->txd.flags, pq->size, pq->ctl,
  280. pq->ctl_f.op, pq->ctl_f.int_en,
  281. pq->ctl_f.compl_write,
  282. pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
  283. pq->ctl_f.src_cnt);
  284. for (i = 0; i < src_cnt; i++) {
  285. dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
  286. (unsigned long long) pq16_get_src(descs, i),
  287. pq->coef[i]);
  288. }
  289. dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
  290. dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
  291. }
  292. static struct dma_async_tx_descriptor *
  293. __ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
  294. const dma_addr_t *dst, const dma_addr_t *src,
  295. unsigned int src_cnt, const unsigned char *scf,
  296. size_t len, unsigned long flags)
  297. {
  298. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  299. struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
  300. struct ioat_ring_ent *compl_desc;
  301. struct ioat_ring_ent *desc;
  302. struct ioat_ring_ent *ext;
  303. size_t total_len = len;
  304. struct ioat_pq_descriptor *pq;
  305. struct ioat_pq_ext_descriptor *pq_ex = NULL;
  306. struct ioat_dma_descriptor *hw;
  307. u32 offset = 0;
  308. u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
  309. int i, s, idx, with_ext, num_descs;
  310. int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
  311. dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
  312. /* the engine requires at least two sources (we provide
  313. * at least 1 implied source in the DMA_PREP_CONTINUE case)
  314. */
  315. BUG_ON(src_cnt + dmaf_continue(flags) < 2);
  316. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  317. /* we need 2x the number of descriptors to cover greater than 3
  318. * sources (we need 1 extra source in the q-only continuation
  319. * case and 3 extra sources in the p+q continuation case.
  320. */
  321. if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
  322. (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
  323. with_ext = 1;
  324. num_descs *= 2;
  325. } else
  326. with_ext = 0;
  327. /* completion writes from the raid engine may pass completion
  328. * writes from the legacy engine, so we need one extra null
  329. * (legacy) descriptor to ensure all completion writes arrive in
  330. * order.
  331. */
  332. if (likely(num_descs) &&
  333. ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
  334. idx = ioat_chan->head;
  335. else
  336. return NULL;
  337. i = 0;
  338. do {
  339. struct ioat_raw_descriptor *descs[2];
  340. size_t xfer_size = min_t(size_t, len,
  341. 1 << ioat_chan->xfercap_log);
  342. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  343. pq = desc->pq;
  344. /* save a branch by unconditionally retrieving the
  345. * extended descriptor pq_set_src() knows to not write
  346. * to it in the single descriptor case
  347. */
  348. ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
  349. pq_ex = ext->pq_ex;
  350. descs[0] = (struct ioat_raw_descriptor *) pq;
  351. descs[1] = (struct ioat_raw_descriptor *) pq_ex;
  352. for (s = 0; s < src_cnt; s++)
  353. pq_set_src(descs, src[s], offset, scf[s], s);
  354. /* see the comment for dma_maxpq in include/linux/dmaengine.h */
  355. if (dmaf_p_disabled_continue(flags))
  356. pq_set_src(descs, dst[1], offset, 1, s++);
  357. else if (dmaf_continue(flags)) {
  358. pq_set_src(descs, dst[0], offset, 0, s++);
  359. pq_set_src(descs, dst[1], offset, 1, s++);
  360. pq_set_src(descs, dst[1], offset, 0, s++);
  361. }
  362. pq->size = xfer_size;
  363. pq->p_addr = dst[0] + offset;
  364. pq->q_addr = dst[1] + offset;
  365. pq->ctl = 0;
  366. pq->ctl_f.op = op;
  367. /* we turn on descriptor write back error status */
  368. if (ioat_dma->cap & IOAT_CAP_DWBES)
  369. pq->ctl_f.wb_en = result ? 1 : 0;
  370. pq->ctl_f.src_cnt = src_cnt_to_hw(s);
  371. pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
  372. pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
  373. len -= xfer_size;
  374. offset += xfer_size;
  375. } while ((i += 1 + with_ext) < num_descs);
  376. /* last pq descriptor carries the unmap parameters and fence bit */
  377. desc->txd.flags = flags;
  378. desc->len = total_len;
  379. if (result)
  380. desc->result = result;
  381. pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  382. dump_pq_desc_dbg(ioat_chan, desc, ext);
  383. if (!cb32) {
  384. pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  385. pq->ctl_f.compl_write = 1;
  386. compl_desc = desc;
  387. } else {
  388. /* completion descriptor carries interrupt bit */
  389. compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
  390. compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
  391. hw = compl_desc->hw;
  392. hw->ctl = 0;
  393. hw->ctl_f.null = 1;
  394. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  395. hw->ctl_f.compl_write = 1;
  396. hw->size = NULL_DESC_BUFFER_SIZE;
  397. dump_desc_dbg(ioat_chan, compl_desc);
  398. }
  399. /* we leave the channel locked to ensure in order submission */
  400. return &compl_desc->txd;
  401. }
  402. static struct dma_async_tx_descriptor *
  403. __ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
  404. const dma_addr_t *dst, const dma_addr_t *src,
  405. unsigned int src_cnt, const unsigned char *scf,
  406. size_t len, unsigned long flags)
  407. {
  408. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  409. struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
  410. struct ioat_ring_ent *desc;
  411. size_t total_len = len;
  412. struct ioat_pq_descriptor *pq;
  413. u32 offset = 0;
  414. u8 op;
  415. int i, s, idx, num_descs;
  416. /* this function is only called with 9-16 sources */
  417. op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
  418. dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
  419. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  420. /*
  421. * 16 source pq is only available on cb3.3 and has no completion
  422. * write hw bug.
  423. */
  424. if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
  425. idx = ioat_chan->head;
  426. else
  427. return NULL;
  428. i = 0;
  429. do {
  430. struct ioat_raw_descriptor *descs[4];
  431. size_t xfer_size = min_t(size_t, len,
  432. 1 << ioat_chan->xfercap_log);
  433. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  434. pq = desc->pq;
  435. descs[0] = (struct ioat_raw_descriptor *) pq;
  436. desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
  437. if (!desc->sed) {
  438. dev_err(to_dev(ioat_chan),
  439. "%s: no free sed entries\n", __func__);
  440. return NULL;
  441. }
  442. pq->sed_addr = desc->sed->dma;
  443. desc->sed->parent = desc;
  444. descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
  445. descs[2] = (void *)descs[1] + 64;
  446. for (s = 0; s < src_cnt; s++)
  447. pq16_set_src(descs, src[s], offset, scf[s], s);
  448. /* see the comment for dma_maxpq in include/linux/dmaengine.h */
  449. if (dmaf_p_disabled_continue(flags))
  450. pq16_set_src(descs, dst[1], offset, 1, s++);
  451. else if (dmaf_continue(flags)) {
  452. pq16_set_src(descs, dst[0], offset, 0, s++);
  453. pq16_set_src(descs, dst[1], offset, 1, s++);
  454. pq16_set_src(descs, dst[1], offset, 0, s++);
  455. }
  456. pq->size = xfer_size;
  457. pq->p_addr = dst[0] + offset;
  458. pq->q_addr = dst[1] + offset;
  459. pq->ctl = 0;
  460. pq->ctl_f.op = op;
  461. pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
  462. /* we turn on descriptor write back error status */
  463. if (ioat_dma->cap & IOAT_CAP_DWBES)
  464. pq->ctl_f.wb_en = result ? 1 : 0;
  465. pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
  466. pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
  467. len -= xfer_size;
  468. offset += xfer_size;
  469. } while (++i < num_descs);
  470. /* last pq descriptor carries the unmap parameters and fence bit */
  471. desc->txd.flags = flags;
  472. desc->len = total_len;
  473. if (result)
  474. desc->result = result;
  475. pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  476. /* with cb3.3 we should be able to do completion w/o a null desc */
  477. pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  478. pq->ctl_f.compl_write = 1;
  479. dump_pq16_desc_dbg(ioat_chan, desc);
  480. /* we leave the channel locked to ensure in order submission */
  481. return &desc->txd;
  482. }
  483. static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
  484. {
  485. if (dmaf_p_disabled_continue(flags))
  486. return src_cnt + 1;
  487. else if (dmaf_continue(flags))
  488. return src_cnt + 3;
  489. else
  490. return src_cnt;
  491. }
  492. struct dma_async_tx_descriptor *
  493. ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
  494. unsigned int src_cnt, const unsigned char *scf, size_t len,
  495. unsigned long flags)
  496. {
  497. /* specify valid address for disabled result */
  498. if (flags & DMA_PREP_PQ_DISABLE_P)
  499. dst[0] = dst[1];
  500. if (flags & DMA_PREP_PQ_DISABLE_Q)
  501. dst[1] = dst[0];
  502. /* handle the single source multiply case from the raid6
  503. * recovery path
  504. */
  505. if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
  506. dma_addr_t single_source[2];
  507. unsigned char single_source_coef[2];
  508. BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
  509. single_source[0] = src[0];
  510. single_source[1] = src[0];
  511. single_source_coef[0] = scf[0];
  512. single_source_coef[1] = 0;
  513. return src_cnt_flags(src_cnt, flags) > 8 ?
  514. __ioat_prep_pq16_lock(chan, NULL, dst, single_source,
  515. 2, single_source_coef, len,
  516. flags) :
  517. __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
  518. single_source_coef, len, flags);
  519. } else {
  520. return src_cnt_flags(src_cnt, flags) > 8 ?
  521. __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
  522. scf, len, flags) :
  523. __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
  524. scf, len, flags);
  525. }
  526. }
  527. struct dma_async_tx_descriptor *
  528. ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
  529. unsigned int src_cnt, const unsigned char *scf, size_t len,
  530. enum sum_check_flags *pqres, unsigned long flags)
  531. {
  532. /* specify valid address for disabled result */
  533. if (flags & DMA_PREP_PQ_DISABLE_P)
  534. pq[0] = pq[1];
  535. if (flags & DMA_PREP_PQ_DISABLE_Q)
  536. pq[1] = pq[0];
  537. /* the cleanup routine only sets bits on validate failure, it
  538. * does not clear bits on validate success... so clear it here
  539. */
  540. *pqres = 0;
  541. return src_cnt_flags(src_cnt, flags) > 8 ?
  542. __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
  543. flags) :
  544. __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
  545. flags);
  546. }
  547. struct dma_async_tx_descriptor *
  548. ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
  549. unsigned int src_cnt, size_t len, unsigned long flags)
  550. {
  551. unsigned char scf[MAX_SCF];
  552. dma_addr_t pq[2];
  553. if (src_cnt > MAX_SCF)
  554. return NULL;
  555. memset(scf, 0, src_cnt);
  556. pq[0] = dst;
  557. flags |= DMA_PREP_PQ_DISABLE_Q;
  558. pq[1] = dst; /* specify valid address for disabled result */
  559. return src_cnt_flags(src_cnt, flags) > 8 ?
  560. __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
  561. flags) :
  562. __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
  563. flags);
  564. }
  565. struct dma_async_tx_descriptor *
  566. ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
  567. unsigned int src_cnt, size_t len,
  568. enum sum_check_flags *result, unsigned long flags)
  569. {
  570. unsigned char scf[MAX_SCF];
  571. dma_addr_t pq[2];
  572. if (src_cnt > MAX_SCF)
  573. return NULL;
  574. /* the cleanup routine only sets bits on validate failure, it
  575. * does not clear bits on validate success... so clear it here
  576. */
  577. *result = 0;
  578. memset(scf, 0, src_cnt);
  579. pq[0] = src[0];
  580. flags |= DMA_PREP_PQ_DISABLE_Q;
  581. pq[1] = pq[0]; /* specify valid address for disabled result */
  582. return src_cnt_flags(src_cnt, flags) > 8 ?
  583. __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
  584. scf, len, flags) :
  585. __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
  586. scf, len, flags);
  587. }
  588. struct dma_async_tx_descriptor *
  589. ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
  590. {
  591. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  592. struct ioat_ring_ent *desc;
  593. struct ioat_dma_descriptor *hw;
  594. if (ioat_check_space_lock(ioat_chan, 1) == 0)
  595. desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
  596. else
  597. return NULL;
  598. hw = desc->hw;
  599. hw->ctl = 0;
  600. hw->ctl_f.null = 1;
  601. hw->ctl_f.int_en = 1;
  602. hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  603. hw->ctl_f.compl_write = 1;
  604. hw->size = NULL_DESC_BUFFER_SIZE;
  605. hw->src_addr = 0;
  606. hw->dst_addr = 0;
  607. desc->txd.flags = flags;
  608. desc->len = 1;
  609. dump_desc_dbg(ioat_chan, desc);
  610. /* we leave the channel locked to ensure in order submission */
  611. return &desc->txd;
  612. }