aes-spe-glue.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /*
  2. * Glue code for AES implementation for SPE instructions (PPC)
  3. *
  4. * Based on generic implementation. The assembler module takes care
  5. * about the SPE registers so it can run from interrupt context.
  6. *
  7. * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
  8. *
  9. * This program is free software; you can redistribute it and/or modify it
  10. * under the terms of the GNU General Public License as published by the Free
  11. * Software Foundation; either version 2 of the License, or (at your option)
  12. * any later version.
  13. *
  14. */
  15. #include <crypto/aes.h>
  16. #include <linux/module.h>
  17. #include <linux/init.h>
  18. #include <linux/types.h>
  19. #include <linux/errno.h>
  20. #include <linux/crypto.h>
  21. #include <asm/byteorder.h>
  22. #include <asm/switch_to.h>
  23. #include <crypto/algapi.h>
  24. /*
  25. * MAX_BYTES defines the number of bytes that are allowed to be processed
  26. * between preempt_disable() and preempt_enable(). e500 cores can issue two
  27. * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
  28. * bit unit (SU2). One of these can be a memory access that is executed via
  29. * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
  30. * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
  31. * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
  32. * included. Even with the low end model clocked at 667 MHz this equals to a
  33. * critical time window of less than 30us. The value has been choosen to
  34. * process a 512 byte disk block in one or a large 1400 bytes IPsec network
  35. * packet in two runs.
  36. *
  37. */
  38. #define MAX_BYTES 768
  39. struct ppc_aes_ctx {
  40. u32 key_enc[AES_MAX_KEYLENGTH_U32];
  41. u32 key_dec[AES_MAX_KEYLENGTH_U32];
  42. u32 rounds;
  43. };
  44. struct ppc_xts_ctx {
  45. u32 key_enc[AES_MAX_KEYLENGTH_U32];
  46. u32 key_dec[AES_MAX_KEYLENGTH_U32];
  47. u32 key_twk[AES_MAX_KEYLENGTH_U32];
  48. u32 rounds;
  49. };
  50. extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
  51. extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
  52. extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  53. u32 bytes);
  54. extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  55. u32 bytes);
  56. extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  57. u32 bytes, u8 *iv);
  58. extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  59. u32 bytes, u8 *iv);
  60. extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  61. u32 bytes, u8 *iv);
  62. extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  63. u32 bytes, u8 *iv, u32 *key_twk);
  64. extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  65. u32 bytes, u8 *iv, u32 *key_twk);
  66. extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
  67. extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
  68. extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
  69. extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
  70. unsigned int key_len);
  71. static void spe_begin(void)
  72. {
  73. /* disable preemption and save users SPE registers if required */
  74. preempt_disable();
  75. enable_kernel_spe();
  76. }
  77. static void spe_end(void)
  78. {
  79. disable_kernel_spe();
  80. /* reenable preemption */
  81. preempt_enable();
  82. }
  83. static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
  84. unsigned int key_len)
  85. {
  86. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  87. if (key_len != AES_KEYSIZE_128 &&
  88. key_len != AES_KEYSIZE_192 &&
  89. key_len != AES_KEYSIZE_256) {
  90. tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
  91. return -EINVAL;
  92. }
  93. switch (key_len) {
  94. case AES_KEYSIZE_128:
  95. ctx->rounds = 4;
  96. ppc_expand_key_128(ctx->key_enc, in_key);
  97. break;
  98. case AES_KEYSIZE_192:
  99. ctx->rounds = 5;
  100. ppc_expand_key_192(ctx->key_enc, in_key);
  101. break;
  102. case AES_KEYSIZE_256:
  103. ctx->rounds = 6;
  104. ppc_expand_key_256(ctx->key_enc, in_key);
  105. break;
  106. }
  107. ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
  108. return 0;
  109. }
  110. static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
  111. unsigned int key_len)
  112. {
  113. struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
  114. key_len >>= 1;
  115. if (key_len != AES_KEYSIZE_128 &&
  116. key_len != AES_KEYSIZE_192 &&
  117. key_len != AES_KEYSIZE_256) {
  118. tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
  119. return -EINVAL;
  120. }
  121. switch (key_len) {
  122. case AES_KEYSIZE_128:
  123. ctx->rounds = 4;
  124. ppc_expand_key_128(ctx->key_enc, in_key);
  125. ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
  126. break;
  127. case AES_KEYSIZE_192:
  128. ctx->rounds = 5;
  129. ppc_expand_key_192(ctx->key_enc, in_key);
  130. ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
  131. break;
  132. case AES_KEYSIZE_256:
  133. ctx->rounds = 6;
  134. ppc_expand_key_256(ctx->key_enc, in_key);
  135. ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
  136. break;
  137. }
  138. ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
  139. return 0;
  140. }
  141. static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
  142. {
  143. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  144. spe_begin();
  145. ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
  146. spe_end();
  147. }
  148. static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
  149. {
  150. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  151. spe_begin();
  152. ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
  153. spe_end();
  154. }
  155. static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  156. struct scatterlist *src, unsigned int nbytes)
  157. {
  158. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  159. struct blkcipher_walk walk;
  160. unsigned int ubytes;
  161. int err;
  162. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  163. blkcipher_walk_init(&walk, dst, src, nbytes);
  164. err = blkcipher_walk_virt(desc, &walk);
  165. while ((nbytes = walk.nbytes)) {
  166. ubytes = nbytes > MAX_BYTES ?
  167. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  168. nbytes -= ubytes;
  169. spe_begin();
  170. ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
  171. ctx->key_enc, ctx->rounds, nbytes);
  172. spe_end();
  173. err = blkcipher_walk_done(desc, &walk, ubytes);
  174. }
  175. return err;
  176. }
  177. static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  178. struct scatterlist *src, unsigned int nbytes)
  179. {
  180. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  181. struct blkcipher_walk walk;
  182. unsigned int ubytes;
  183. int err;
  184. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  185. blkcipher_walk_init(&walk, dst, src, nbytes);
  186. err = blkcipher_walk_virt(desc, &walk);
  187. while ((nbytes = walk.nbytes)) {
  188. ubytes = nbytes > MAX_BYTES ?
  189. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  190. nbytes -= ubytes;
  191. spe_begin();
  192. ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
  193. ctx->key_dec, ctx->rounds, nbytes);
  194. spe_end();
  195. err = blkcipher_walk_done(desc, &walk, ubytes);
  196. }
  197. return err;
  198. }
  199. static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  200. struct scatterlist *src, unsigned int nbytes)
  201. {
  202. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  203. struct blkcipher_walk walk;
  204. unsigned int ubytes;
  205. int err;
  206. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  207. blkcipher_walk_init(&walk, dst, src, nbytes);
  208. err = blkcipher_walk_virt(desc, &walk);
  209. while ((nbytes = walk.nbytes)) {
  210. ubytes = nbytes > MAX_BYTES ?
  211. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  212. nbytes -= ubytes;
  213. spe_begin();
  214. ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
  215. ctx->key_enc, ctx->rounds, nbytes, walk.iv);
  216. spe_end();
  217. err = blkcipher_walk_done(desc, &walk, ubytes);
  218. }
  219. return err;
  220. }
  221. static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  222. struct scatterlist *src, unsigned int nbytes)
  223. {
  224. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  225. struct blkcipher_walk walk;
  226. unsigned int ubytes;
  227. int err;
  228. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  229. blkcipher_walk_init(&walk, dst, src, nbytes);
  230. err = blkcipher_walk_virt(desc, &walk);
  231. while ((nbytes = walk.nbytes)) {
  232. ubytes = nbytes > MAX_BYTES ?
  233. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  234. nbytes -= ubytes;
  235. spe_begin();
  236. ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
  237. ctx->key_dec, ctx->rounds, nbytes, walk.iv);
  238. spe_end();
  239. err = blkcipher_walk_done(desc, &walk, ubytes);
  240. }
  241. return err;
  242. }
  243. static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  244. struct scatterlist *src, unsigned int nbytes)
  245. {
  246. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  247. struct blkcipher_walk walk;
  248. unsigned int pbytes, ubytes;
  249. int err;
  250. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  251. blkcipher_walk_init(&walk, dst, src, nbytes);
  252. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  253. while ((pbytes = walk.nbytes)) {
  254. pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
  255. pbytes = pbytes == nbytes ?
  256. nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
  257. ubytes = walk.nbytes - pbytes;
  258. spe_begin();
  259. ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
  260. ctx->key_enc, ctx->rounds, pbytes , walk.iv);
  261. spe_end();
  262. nbytes -= pbytes;
  263. err = blkcipher_walk_done(desc, &walk, ubytes);
  264. }
  265. return err;
  266. }
  267. static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  268. struct scatterlist *src, unsigned int nbytes)
  269. {
  270. struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  271. struct blkcipher_walk walk;
  272. unsigned int ubytes;
  273. int err;
  274. u32 *twk;
  275. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  276. blkcipher_walk_init(&walk, dst, src, nbytes);
  277. err = blkcipher_walk_virt(desc, &walk);
  278. twk = ctx->key_twk;
  279. while ((nbytes = walk.nbytes)) {
  280. ubytes = nbytes > MAX_BYTES ?
  281. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  282. nbytes -= ubytes;
  283. spe_begin();
  284. ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
  285. ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
  286. spe_end();
  287. twk = NULL;
  288. err = blkcipher_walk_done(desc, &walk, ubytes);
  289. }
  290. return err;
  291. }
  292. static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  293. struct scatterlist *src, unsigned int nbytes)
  294. {
  295. struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  296. struct blkcipher_walk walk;
  297. unsigned int ubytes;
  298. int err;
  299. u32 *twk;
  300. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  301. blkcipher_walk_init(&walk, dst, src, nbytes);
  302. err = blkcipher_walk_virt(desc, &walk);
  303. twk = ctx->key_twk;
  304. while ((nbytes = walk.nbytes)) {
  305. ubytes = nbytes > MAX_BYTES ?
  306. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  307. nbytes -= ubytes;
  308. spe_begin();
  309. ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
  310. ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
  311. spe_end();
  312. twk = NULL;
  313. err = blkcipher_walk_done(desc, &walk, ubytes);
  314. }
  315. return err;
  316. }
  317. /*
  318. * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
  319. * because the e500 platform can handle unaligned reads/writes very efficently.
  320. * This improves IPsec thoughput by another few percent. Additionally we assume
  321. * that AES context is always aligned to at least 8 bytes because it is created
  322. * with kmalloc() in the crypto infrastructure
  323. *
  324. */
  325. static struct crypto_alg aes_algs[] = { {
  326. .cra_name = "aes",
  327. .cra_driver_name = "aes-ppc-spe",
  328. .cra_priority = 300,
  329. .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
  330. .cra_blocksize = AES_BLOCK_SIZE,
  331. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  332. .cra_alignmask = 0,
  333. .cra_module = THIS_MODULE,
  334. .cra_u = {
  335. .cipher = {
  336. .cia_min_keysize = AES_MIN_KEY_SIZE,
  337. .cia_max_keysize = AES_MAX_KEY_SIZE,
  338. .cia_setkey = ppc_aes_setkey,
  339. .cia_encrypt = ppc_aes_encrypt,
  340. .cia_decrypt = ppc_aes_decrypt
  341. }
  342. }
  343. }, {
  344. .cra_name = "ecb(aes)",
  345. .cra_driver_name = "ecb-ppc-spe",
  346. .cra_priority = 300,
  347. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  348. .cra_blocksize = AES_BLOCK_SIZE,
  349. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  350. .cra_alignmask = 0,
  351. .cra_type = &crypto_blkcipher_type,
  352. .cra_module = THIS_MODULE,
  353. .cra_u = {
  354. .blkcipher = {
  355. .min_keysize = AES_MIN_KEY_SIZE,
  356. .max_keysize = AES_MAX_KEY_SIZE,
  357. .ivsize = AES_BLOCK_SIZE,
  358. .setkey = ppc_aes_setkey,
  359. .encrypt = ppc_ecb_encrypt,
  360. .decrypt = ppc_ecb_decrypt,
  361. }
  362. }
  363. }, {
  364. .cra_name = "cbc(aes)",
  365. .cra_driver_name = "cbc-ppc-spe",
  366. .cra_priority = 300,
  367. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  368. .cra_blocksize = AES_BLOCK_SIZE,
  369. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  370. .cra_alignmask = 0,
  371. .cra_type = &crypto_blkcipher_type,
  372. .cra_module = THIS_MODULE,
  373. .cra_u = {
  374. .blkcipher = {
  375. .min_keysize = AES_MIN_KEY_SIZE,
  376. .max_keysize = AES_MAX_KEY_SIZE,
  377. .ivsize = AES_BLOCK_SIZE,
  378. .setkey = ppc_aes_setkey,
  379. .encrypt = ppc_cbc_encrypt,
  380. .decrypt = ppc_cbc_decrypt,
  381. }
  382. }
  383. }, {
  384. .cra_name = "ctr(aes)",
  385. .cra_driver_name = "ctr-ppc-spe",
  386. .cra_priority = 300,
  387. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  388. .cra_blocksize = 1,
  389. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  390. .cra_alignmask = 0,
  391. .cra_type = &crypto_blkcipher_type,
  392. .cra_module = THIS_MODULE,
  393. .cra_u = {
  394. .blkcipher = {
  395. .min_keysize = AES_MIN_KEY_SIZE,
  396. .max_keysize = AES_MAX_KEY_SIZE,
  397. .ivsize = AES_BLOCK_SIZE,
  398. .setkey = ppc_aes_setkey,
  399. .encrypt = ppc_ctr_crypt,
  400. .decrypt = ppc_ctr_crypt,
  401. }
  402. }
  403. }, {
  404. .cra_name = "xts(aes)",
  405. .cra_driver_name = "xts-ppc-spe",
  406. .cra_priority = 300,
  407. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  408. .cra_blocksize = AES_BLOCK_SIZE,
  409. .cra_ctxsize = sizeof(struct ppc_xts_ctx),
  410. .cra_alignmask = 0,
  411. .cra_type = &crypto_blkcipher_type,
  412. .cra_module = THIS_MODULE,
  413. .cra_u = {
  414. .blkcipher = {
  415. .min_keysize = AES_MIN_KEY_SIZE * 2,
  416. .max_keysize = AES_MAX_KEY_SIZE * 2,
  417. .ivsize = AES_BLOCK_SIZE,
  418. .setkey = ppc_xts_setkey,
  419. .encrypt = ppc_xts_encrypt,
  420. .decrypt = ppc_xts_decrypt,
  421. }
  422. }
  423. } };
  424. static int __init ppc_aes_mod_init(void)
  425. {
  426. return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
  427. }
  428. static void __exit ppc_aes_mod_fini(void)
  429. {
  430. crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
  431. }
  432. module_init(ppc_aes_mod_init);
  433. module_exit(ppc_aes_mod_fini);
  434. MODULE_LICENSE("GPL");
  435. MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
  436. MODULE_ALIAS_CRYPTO("aes");
  437. MODULE_ALIAS_CRYPTO("ecb(aes)");
  438. MODULE_ALIAS_CRYPTO("cbc(aes)");
  439. MODULE_ALIAS_CRYPTO("ctr(aes)");
  440. MODULE_ALIAS_CRYPTO("xts(aes)");
  441. MODULE_ALIAS_CRYPTO("aes-ppc-spe");