ccp-dev-v3.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. /*
  2. * AMD Cryptographic Coprocessor (CCP) driver
  3. *
  4. * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  5. *
  6. * Author: Tom Lendacky <thomas.lendacky@amd.com>
  7. * Author: Gary R Hook <gary.hook@amd.com>
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License version 2 as
  11. * published by the Free Software Foundation.
  12. */
  13. #include <linux/module.h>
  14. #include <linux/kernel.h>
  15. #include <linux/pci.h>
  16. #include <linux/kthread.h>
  17. #include <linux/interrupt.h>
  18. #include <linux/ccp.h>
  19. #include "ccp-dev.h"
  20. static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
  21. {
  22. struct ccp_cmd_queue *cmd_q = op->cmd_q;
  23. struct ccp_device *ccp = cmd_q->ccp;
  24. void __iomem *cr_addr;
  25. u32 cr0, cmd;
  26. unsigned int i;
  27. int ret = 0;
  28. /* We could read a status register to see how many free slots
  29. * are actually available, but reading that register resets it
  30. * and you could lose some error information.
  31. */
  32. cmd_q->free_slots--;
  33. cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
  34. | (op->jobid << REQ0_JOBID_SHIFT)
  35. | REQ0_WAIT_FOR_WRITE;
  36. if (op->soc)
  37. cr0 |= REQ0_STOP_ON_COMPLETE
  38. | REQ0_INT_ON_COMPLETE;
  39. if (op->ioc || !cmd_q->free_slots)
  40. cr0 |= REQ0_INT_ON_COMPLETE;
  41. /* Start at CMD_REQ1 */
  42. cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
  43. mutex_lock(&ccp->req_mutex);
  44. /* Write CMD_REQ1 through CMD_REQx first */
  45. for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
  46. iowrite32(*(cr + i), cr_addr);
  47. /* Tell the CCP to start */
  48. wmb();
  49. iowrite32(cr0, ccp->io_regs + CMD_REQ0);
  50. mutex_unlock(&ccp->req_mutex);
  51. if (cr0 & REQ0_INT_ON_COMPLETE) {
  52. /* Wait for the job to complete */
  53. ret = wait_event_interruptible(cmd_q->int_queue,
  54. cmd_q->int_rcvd);
  55. if (ret || cmd_q->cmd_error) {
  56. /* On error delete all related jobs from the queue */
  57. cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
  58. | op->jobid;
  59. iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
  60. if (!ret)
  61. ret = -EIO;
  62. } else if (op->soc) {
  63. /* Delete just head job from the queue on SoC */
  64. cmd = DEL_Q_ACTIVE
  65. | (cmd_q->id << DEL_Q_ID_SHIFT)
  66. | op->jobid;
  67. iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
  68. }
  69. cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
  70. cmd_q->int_rcvd = 0;
  71. }
  72. return ret;
  73. }
  74. static int ccp_perform_aes(struct ccp_op *op)
  75. {
  76. u32 cr[6];
  77. /* Fill out the register contents for REQ1 through REQ6 */
  78. cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
  79. | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
  80. | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
  81. | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
  82. | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
  83. cr[1] = op->src.u.dma.length - 1;
  84. cr[2] = ccp_addr_lo(&op->src.u.dma);
  85. cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
  86. | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
  87. | ccp_addr_hi(&op->src.u.dma);
  88. cr[4] = ccp_addr_lo(&op->dst.u.dma);
  89. cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
  90. | ccp_addr_hi(&op->dst.u.dma);
  91. if (op->u.aes.mode == CCP_AES_MODE_CFB)
  92. cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
  93. if (op->eom)
  94. cr[0] |= REQ1_EOM;
  95. if (op->init)
  96. cr[0] |= REQ1_INIT;
  97. return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
  98. }
  99. static int ccp_perform_xts_aes(struct ccp_op *op)
  100. {
  101. u32 cr[6];
  102. /* Fill out the register contents for REQ1 through REQ6 */
  103. cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
  104. | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
  105. | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
  106. | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
  107. cr[1] = op->src.u.dma.length - 1;
  108. cr[2] = ccp_addr_lo(&op->src.u.dma);
  109. cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
  110. | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
  111. | ccp_addr_hi(&op->src.u.dma);
  112. cr[4] = ccp_addr_lo(&op->dst.u.dma);
  113. cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
  114. | ccp_addr_hi(&op->dst.u.dma);
  115. if (op->eom)
  116. cr[0] |= REQ1_EOM;
  117. if (op->init)
  118. cr[0] |= REQ1_INIT;
  119. return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
  120. }
  121. static int ccp_perform_sha(struct ccp_op *op)
  122. {
  123. u32 cr[6];
  124. /* Fill out the register contents for REQ1 through REQ6 */
  125. cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
  126. | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
  127. | REQ1_INIT;
  128. cr[1] = op->src.u.dma.length - 1;
  129. cr[2] = ccp_addr_lo(&op->src.u.dma);
  130. cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
  131. | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
  132. | ccp_addr_hi(&op->src.u.dma);
  133. if (op->eom) {
  134. cr[0] |= REQ1_EOM;
  135. cr[4] = lower_32_bits(op->u.sha.msg_bits);
  136. cr[5] = upper_32_bits(op->u.sha.msg_bits);
  137. } else {
  138. cr[4] = 0;
  139. cr[5] = 0;
  140. }
  141. return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
  142. }
  143. static int ccp_perform_rsa(struct ccp_op *op)
  144. {
  145. u32 cr[6];
  146. /* Fill out the register contents for REQ1 through REQ6 */
  147. cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
  148. | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
  149. | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
  150. | REQ1_EOM;
  151. cr[1] = op->u.rsa.input_len - 1;
  152. cr[2] = ccp_addr_lo(&op->src.u.dma);
  153. cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
  154. | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
  155. | ccp_addr_hi(&op->src.u.dma);
  156. cr[4] = ccp_addr_lo(&op->dst.u.dma);
  157. cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
  158. | ccp_addr_hi(&op->dst.u.dma);
  159. return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
  160. }
  161. static int ccp_perform_passthru(struct ccp_op *op)
  162. {
  163. u32 cr[6];
  164. /* Fill out the register contents for REQ1 through REQ6 */
  165. cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
  166. | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
  167. | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
  168. if (op->src.type == CCP_MEMTYPE_SYSTEM)
  169. cr[1] = op->src.u.dma.length - 1;
  170. else
  171. cr[1] = op->dst.u.dma.length - 1;
  172. if (op->src.type == CCP_MEMTYPE_SYSTEM) {
  173. cr[2] = ccp_addr_lo(&op->src.u.dma);
  174. cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
  175. | ccp_addr_hi(&op->src.u.dma);
  176. if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
  177. cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
  178. } else {
  179. cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
  180. cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
  181. }
  182. if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
  183. cr[4] = ccp_addr_lo(&op->dst.u.dma);
  184. cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
  185. | ccp_addr_hi(&op->dst.u.dma);
  186. } else {
  187. cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
  188. cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
  189. }
  190. if (op->eom)
  191. cr[0] |= REQ1_EOM;
  192. return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
  193. }
  194. static int ccp_perform_ecc(struct ccp_op *op)
  195. {
  196. u32 cr[6];
  197. /* Fill out the register contents for REQ1 through REQ6 */
  198. cr[0] = REQ1_ECC_AFFINE_CONVERT
  199. | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
  200. | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
  201. | REQ1_EOM;
  202. cr[1] = op->src.u.dma.length - 1;
  203. cr[2] = ccp_addr_lo(&op->src.u.dma);
  204. cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
  205. | ccp_addr_hi(&op->src.u.dma);
  206. cr[4] = ccp_addr_lo(&op->dst.u.dma);
  207. cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
  208. | ccp_addr_hi(&op->dst.u.dma);
  209. return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
  210. }
  211. static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
  212. {
  213. struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
  214. u32 trng_value;
  215. int len = min_t(int, sizeof(trng_value), max);
  216. /*
  217. * Locking is provided by the caller so we can update device
  218. * hwrng-related fields safely
  219. */
  220. trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
  221. if (!trng_value) {
  222. /* Zero is returned if not data is available or if a
  223. * bad-entropy error is present. Assume an error if
  224. * we exceed TRNG_RETRIES reads of zero.
  225. */
  226. if (ccp->hwrng_retries++ > TRNG_RETRIES)
  227. return -EIO;
  228. return 0;
  229. }
  230. /* Reset the counter and save the rng value */
  231. ccp->hwrng_retries = 0;
  232. memcpy(data, &trng_value, len);
  233. return len;
  234. }
  235. static int ccp_init(struct ccp_device *ccp)
  236. {
  237. struct device *dev = ccp->dev;
  238. struct ccp_cmd_queue *cmd_q;
  239. struct dma_pool *dma_pool;
  240. char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
  241. unsigned int qmr, qim, i;
  242. int ret;
  243. /* Find available queues */
  244. qim = 0;
  245. qmr = ioread32(ccp->io_regs + Q_MASK_REG);
  246. for (i = 0; i < MAX_HW_QUEUES; i++) {
  247. if (!(qmr & (1 << i)))
  248. continue;
  249. /* Allocate a dma pool for this queue */
  250. snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
  251. ccp->name, i);
  252. dma_pool = dma_pool_create(dma_pool_name, dev,
  253. CCP_DMAPOOL_MAX_SIZE,
  254. CCP_DMAPOOL_ALIGN, 0);
  255. if (!dma_pool) {
  256. dev_err(dev, "unable to allocate dma pool\n");
  257. ret = -ENOMEM;
  258. goto e_pool;
  259. }
  260. cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
  261. ccp->cmd_q_count++;
  262. cmd_q->ccp = ccp;
  263. cmd_q->id = i;
  264. cmd_q->dma_pool = dma_pool;
  265. /* Reserve 2 KSB regions for the queue */
  266. cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
  267. cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
  268. ccp->ksb_count -= 2;
  269. /* Preset some register values and masks that are queue
  270. * number dependent
  271. */
  272. cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
  273. (CMD_Q_STATUS_INCR * i);
  274. cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
  275. (CMD_Q_STATUS_INCR * i);
  276. cmd_q->int_ok = 1 << (i * 2);
  277. cmd_q->int_err = 1 << ((i * 2) + 1);
  278. cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
  279. init_waitqueue_head(&cmd_q->int_queue);
  280. /* Build queue interrupt mask (two interrupts per queue) */
  281. qim |= cmd_q->int_ok | cmd_q->int_err;
  282. #ifdef CONFIG_ARM64
  283. /* For arm64 set the recommended queue cache settings */
  284. iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE +
  285. (CMD_Q_CACHE_INC * i));
  286. #endif
  287. dev_dbg(dev, "queue #%u available\n", i);
  288. }
  289. if (ccp->cmd_q_count == 0) {
  290. dev_notice(dev, "no command queues available\n");
  291. ret = -EIO;
  292. goto e_pool;
  293. }
  294. dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
  295. /* Disable and clear interrupts until ready */
  296. iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
  297. for (i = 0; i < ccp->cmd_q_count; i++) {
  298. cmd_q = &ccp->cmd_q[i];
  299. ioread32(cmd_q->reg_int_status);
  300. ioread32(cmd_q->reg_status);
  301. }
  302. iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
  303. /* Request an irq */
  304. ret = ccp->get_irq(ccp);
  305. if (ret) {
  306. dev_err(dev, "unable to allocate an IRQ\n");
  307. goto e_pool;
  308. }
  309. /* Initialize the queues used to wait for KSB space and suspend */
  310. init_waitqueue_head(&ccp->ksb_queue);
  311. init_waitqueue_head(&ccp->suspend_queue);
  312. /* Create a kthread for each queue */
  313. for (i = 0; i < ccp->cmd_q_count; i++) {
  314. struct task_struct *kthread;
  315. cmd_q = &ccp->cmd_q[i];
  316. kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
  317. "%s-q%u", ccp->name, cmd_q->id);
  318. if (IS_ERR(kthread)) {
  319. dev_err(dev, "error creating queue thread (%ld)\n",
  320. PTR_ERR(kthread));
  321. ret = PTR_ERR(kthread);
  322. goto e_kthread;
  323. }
  324. cmd_q->kthread = kthread;
  325. wake_up_process(kthread);
  326. }
  327. /* Register the RNG */
  328. ccp->hwrng.name = ccp->rngname;
  329. ccp->hwrng.read = ccp_trng_read;
  330. ret = hwrng_register(&ccp->hwrng);
  331. if (ret) {
  332. dev_err(dev, "error registering hwrng (%d)\n", ret);
  333. goto e_kthread;
  334. }
  335. /* Register the DMA engine support */
  336. ret = ccp_dmaengine_register(ccp);
  337. if (ret)
  338. goto e_hwrng;
  339. ccp_add_device(ccp);
  340. /* Enable interrupts */
  341. iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
  342. return 0;
  343. e_hwrng:
  344. hwrng_unregister(&ccp->hwrng);
  345. e_kthread:
  346. for (i = 0; i < ccp->cmd_q_count; i++)
  347. if (ccp->cmd_q[i].kthread)
  348. kthread_stop(ccp->cmd_q[i].kthread);
  349. ccp->free_irq(ccp);
  350. e_pool:
  351. for (i = 0; i < ccp->cmd_q_count; i++)
  352. dma_pool_destroy(ccp->cmd_q[i].dma_pool);
  353. return ret;
  354. }
  355. static void ccp_destroy(struct ccp_device *ccp)
  356. {
  357. struct ccp_cmd_queue *cmd_q;
  358. struct ccp_cmd *cmd;
  359. unsigned int qim, i;
  360. /* Remove this device from the list of available units first */
  361. ccp_del_device(ccp);
  362. /* Unregister the DMA engine */
  363. ccp_dmaengine_unregister(ccp);
  364. /* Unregister the RNG */
  365. hwrng_unregister(&ccp->hwrng);
  366. /* Stop the queue kthreads */
  367. for (i = 0; i < ccp->cmd_q_count; i++)
  368. if (ccp->cmd_q[i].kthread)
  369. kthread_stop(ccp->cmd_q[i].kthread);
  370. /* Build queue interrupt mask (two interrupt masks per queue) */
  371. qim = 0;
  372. for (i = 0; i < ccp->cmd_q_count; i++) {
  373. cmd_q = &ccp->cmd_q[i];
  374. qim |= cmd_q->int_ok | cmd_q->int_err;
  375. }
  376. /* Disable and clear interrupts */
  377. iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
  378. for (i = 0; i < ccp->cmd_q_count; i++) {
  379. cmd_q = &ccp->cmd_q[i];
  380. ioread32(cmd_q->reg_int_status);
  381. ioread32(cmd_q->reg_status);
  382. }
  383. iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
  384. ccp->free_irq(ccp);
  385. for (i = 0; i < ccp->cmd_q_count; i++)
  386. dma_pool_destroy(ccp->cmd_q[i].dma_pool);
  387. /* Flush the cmd and backlog queue */
  388. while (!list_empty(&ccp->cmd)) {
  389. /* Invoke the callback directly with an error code */
  390. cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
  391. list_del(&cmd->entry);
  392. cmd->callback(cmd->data, -ENODEV);
  393. }
  394. while (!list_empty(&ccp->backlog)) {
  395. /* Invoke the callback directly with an error code */
  396. cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
  397. list_del(&cmd->entry);
  398. cmd->callback(cmd->data, -ENODEV);
  399. }
  400. }
  401. static irqreturn_t ccp_irq_handler(int irq, void *data)
  402. {
  403. struct device *dev = data;
  404. struct ccp_device *ccp = dev_get_drvdata(dev);
  405. struct ccp_cmd_queue *cmd_q;
  406. u32 q_int, status;
  407. unsigned int i;
  408. status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
  409. for (i = 0; i < ccp->cmd_q_count; i++) {
  410. cmd_q = &ccp->cmd_q[i];
  411. q_int = status & (cmd_q->int_ok | cmd_q->int_err);
  412. if (q_int) {
  413. cmd_q->int_status = status;
  414. cmd_q->q_status = ioread32(cmd_q->reg_status);
  415. cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
  416. /* On error, only save the first error value */
  417. if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
  418. cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
  419. cmd_q->int_rcvd = 1;
  420. /* Acknowledge the interrupt and wake the kthread */
  421. iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
  422. wake_up_interruptible(&cmd_q->int_queue);
  423. }
  424. }
  425. return IRQ_HANDLED;
  426. }
  427. static const struct ccp_actions ccp3_actions = {
  428. .aes = ccp_perform_aes,
  429. .xts_aes = ccp_perform_xts_aes,
  430. .sha = ccp_perform_sha,
  431. .rsa = ccp_perform_rsa,
  432. .passthru = ccp_perform_passthru,
  433. .ecc = ccp_perform_ecc,
  434. .init = ccp_init,
  435. .destroy = ccp_destroy,
  436. .irqhandler = ccp_irq_handler,
  437. };
  438. struct ccp_vdata ccpv3 = {
  439. .version = CCP_VERSION(3, 0),
  440. .perform = &ccp3_actions,
  441. .bar = 2,
  442. .offset = 0x20000,
  443. };