aes-ce-core.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. /*
  2. * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
  3. *
  4. * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .text
  13. .fpu crypto-neon-fp-armv8
  14. .align 3
  15. .macro enc_round, state, key
  16. aese.8 \state, \key
  17. aesmc.8 \state, \state
  18. .endm
  19. .macro dec_round, state, key
  20. aesd.8 \state, \key
  21. aesimc.8 \state, \state
  22. .endm
  23. .macro enc_dround, key1, key2
  24. enc_round q0, \key1
  25. enc_round q0, \key2
  26. .endm
  27. .macro dec_dround, key1, key2
  28. dec_round q0, \key1
  29. dec_round q0, \key2
  30. .endm
  31. .macro enc_fround, key1, key2, key3
  32. enc_round q0, \key1
  33. aese.8 q0, \key2
  34. veor q0, q0, \key3
  35. .endm
  36. .macro dec_fround, key1, key2, key3
  37. dec_round q0, \key1
  38. aesd.8 q0, \key2
  39. veor q0, q0, \key3
  40. .endm
  41. .macro enc_dround_3x, key1, key2
  42. enc_round q0, \key1
  43. enc_round q1, \key1
  44. enc_round q2, \key1
  45. enc_round q0, \key2
  46. enc_round q1, \key2
  47. enc_round q2, \key2
  48. .endm
  49. .macro dec_dround_3x, key1, key2
  50. dec_round q0, \key1
  51. dec_round q1, \key1
  52. dec_round q2, \key1
  53. dec_round q0, \key2
  54. dec_round q1, \key2
  55. dec_round q2, \key2
  56. .endm
  57. .macro enc_fround_3x, key1, key2, key3
  58. enc_round q0, \key1
  59. enc_round q1, \key1
  60. enc_round q2, \key1
  61. aese.8 q0, \key2
  62. aese.8 q1, \key2
  63. aese.8 q2, \key2
  64. veor q0, q0, \key3
  65. veor q1, q1, \key3
  66. veor q2, q2, \key3
  67. .endm
  68. .macro dec_fround_3x, key1, key2, key3
  69. dec_round q0, \key1
  70. dec_round q1, \key1
  71. dec_round q2, \key1
  72. aesd.8 q0, \key2
  73. aesd.8 q1, \key2
  74. aesd.8 q2, \key2
  75. veor q0, q0, \key3
  76. veor q1, q1, \key3
  77. veor q2, q2, \key3
  78. .endm
  79. .macro do_block, dround, fround
  80. cmp r3, #12 @ which key size?
  81. vld1.8 {q10-q11}, [ip]!
  82. \dround q8, q9
  83. vld1.8 {q12-q13}, [ip]!
  84. \dround q10, q11
  85. vld1.8 {q10-q11}, [ip]!
  86. \dround q12, q13
  87. vld1.8 {q12-q13}, [ip]!
  88. \dround q10, q11
  89. blo 0f @ AES-128: 10 rounds
  90. vld1.8 {q10-q11}, [ip]!
  91. beq 1f @ AES-192: 12 rounds
  92. \dround q12, q13
  93. vld1.8 {q12-q13}, [ip]
  94. \dround q10, q11
  95. 0: \fround q12, q13, q14
  96. bx lr
  97. 1: \dround q12, q13
  98. \fround q10, q11, q14
  99. bx lr
  100. .endm
  101. /*
  102. * Internal, non-AAPCS compliant functions that implement the core AES
  103. * transforms. These should preserve all registers except q0 - q2 and ip
  104. * Arguments:
  105. * q0 : first in/output block
  106. * q1 : second in/output block (_3x version only)
  107. * q2 : third in/output block (_3x version only)
  108. * q8 : first round key
  109. * q9 : secound round key
  110. * ip : address of 3rd round key
  111. * q14 : final round key
  112. * r3 : number of rounds
  113. */
  114. .align 6
  115. aes_encrypt:
  116. add ip, r2, #32 @ 3rd round key
  117. .Laes_encrypt_tweak:
  118. do_block enc_dround, enc_fround
  119. ENDPROC(aes_encrypt)
  120. .align 6
  121. aes_decrypt:
  122. add ip, r2, #32 @ 3rd round key
  123. do_block dec_dround, dec_fround
  124. ENDPROC(aes_decrypt)
  125. .align 6
  126. aes_encrypt_3x:
  127. add ip, r2, #32 @ 3rd round key
  128. do_block enc_dround_3x, enc_fround_3x
  129. ENDPROC(aes_encrypt_3x)
  130. .align 6
  131. aes_decrypt_3x:
  132. add ip, r2, #32 @ 3rd round key
  133. do_block dec_dround_3x, dec_fround_3x
  134. ENDPROC(aes_decrypt_3x)
  135. .macro prepare_key, rk, rounds
  136. add ip, \rk, \rounds, lsl #4
  137. vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
  138. vld1.8 {q14}, [ip] @ load last round key
  139. .endm
  140. /*
  141. * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  142. * int blocks)
  143. * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  144. * int blocks)
  145. */
  146. ENTRY(ce_aes_ecb_encrypt)
  147. push {r4, lr}
  148. ldr r4, [sp, #8]
  149. prepare_key r2, r3
  150. .Lecbencloop3x:
  151. subs r4, r4, #3
  152. bmi .Lecbenc1x
  153. vld1.8 {q0-q1}, [r1, :64]!
  154. vld1.8 {q2}, [r1, :64]!
  155. bl aes_encrypt_3x
  156. vst1.8 {q0-q1}, [r0, :64]!
  157. vst1.8 {q2}, [r0, :64]!
  158. b .Lecbencloop3x
  159. .Lecbenc1x:
  160. adds r4, r4, #3
  161. beq .Lecbencout
  162. .Lecbencloop:
  163. vld1.8 {q0}, [r1, :64]!
  164. bl aes_encrypt
  165. vst1.8 {q0}, [r0, :64]!
  166. subs r4, r4, #1
  167. bne .Lecbencloop
  168. .Lecbencout:
  169. pop {r4, pc}
  170. ENDPROC(ce_aes_ecb_encrypt)
  171. ENTRY(ce_aes_ecb_decrypt)
  172. push {r4, lr}
  173. ldr r4, [sp, #8]
  174. prepare_key r2, r3
  175. .Lecbdecloop3x:
  176. subs r4, r4, #3
  177. bmi .Lecbdec1x
  178. vld1.8 {q0-q1}, [r1, :64]!
  179. vld1.8 {q2}, [r1, :64]!
  180. bl aes_decrypt_3x
  181. vst1.8 {q0-q1}, [r0, :64]!
  182. vst1.8 {q2}, [r0, :64]!
  183. b .Lecbdecloop3x
  184. .Lecbdec1x:
  185. adds r4, r4, #3
  186. beq .Lecbdecout
  187. .Lecbdecloop:
  188. vld1.8 {q0}, [r1, :64]!
  189. bl aes_decrypt
  190. vst1.8 {q0}, [r0, :64]!
  191. subs r4, r4, #1
  192. bne .Lecbdecloop
  193. .Lecbdecout:
  194. pop {r4, pc}
  195. ENDPROC(ce_aes_ecb_decrypt)
  196. /*
  197. * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  198. * int blocks, u8 iv[])
  199. * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  200. * int blocks, u8 iv[])
  201. */
  202. ENTRY(ce_aes_cbc_encrypt)
  203. push {r4-r6, lr}
  204. ldrd r4, r5, [sp, #16]
  205. vld1.8 {q0}, [r5]
  206. prepare_key r2, r3
  207. .Lcbcencloop:
  208. vld1.8 {q1}, [r1, :64]! @ get next pt block
  209. veor q0, q0, q1 @ ..and xor with iv
  210. bl aes_encrypt
  211. vst1.8 {q0}, [r0, :64]!
  212. subs r4, r4, #1
  213. bne .Lcbcencloop
  214. vst1.8 {q0}, [r5]
  215. pop {r4-r6, pc}
  216. ENDPROC(ce_aes_cbc_encrypt)
  217. ENTRY(ce_aes_cbc_decrypt)
  218. push {r4-r6, lr}
  219. ldrd r4, r5, [sp, #16]
  220. vld1.8 {q6}, [r5] @ keep iv in q6
  221. prepare_key r2, r3
  222. .Lcbcdecloop3x:
  223. subs r4, r4, #3
  224. bmi .Lcbcdec1x
  225. vld1.8 {q0-q1}, [r1, :64]!
  226. vld1.8 {q2}, [r1, :64]!
  227. vmov q3, q0
  228. vmov q4, q1
  229. vmov q5, q2
  230. bl aes_decrypt_3x
  231. veor q0, q0, q6
  232. veor q1, q1, q3
  233. veor q2, q2, q4
  234. vmov q6, q5
  235. vst1.8 {q0-q1}, [r0, :64]!
  236. vst1.8 {q2}, [r0, :64]!
  237. b .Lcbcdecloop3x
  238. .Lcbcdec1x:
  239. adds r4, r4, #3
  240. beq .Lcbcdecout
  241. vmov q15, q14 @ preserve last round key
  242. .Lcbcdecloop:
  243. vld1.8 {q0}, [r1, :64]! @ get next ct block
  244. veor q14, q15, q6 @ combine prev ct with last key
  245. vmov q6, q0
  246. bl aes_decrypt
  247. vst1.8 {q0}, [r0, :64]!
  248. subs r4, r4, #1
  249. bne .Lcbcdecloop
  250. .Lcbcdecout:
  251. vst1.8 {q6}, [r5] @ keep iv in q6
  252. pop {r4-r6, pc}
  253. ENDPROC(ce_aes_cbc_decrypt)
  254. /*
  255. * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  256. * int blocks, u8 ctr[])
  257. */
  258. ENTRY(ce_aes_ctr_encrypt)
  259. push {r4-r6, lr}
  260. ldrd r4, r5, [sp, #16]
  261. vld1.8 {q6}, [r5] @ load ctr
  262. prepare_key r2, r3
  263. vmov r6, s27 @ keep swabbed ctr in r6
  264. rev r6, r6
  265. cmn r6, r4 @ 32 bit overflow?
  266. bcs .Lctrloop
  267. .Lctrloop3x:
  268. subs r4, r4, #3
  269. bmi .Lctr1x
  270. add r6, r6, #1
  271. vmov q0, q6
  272. vmov q1, q6
  273. rev ip, r6
  274. add r6, r6, #1
  275. vmov q2, q6
  276. vmov s7, ip
  277. rev ip, r6
  278. add r6, r6, #1
  279. vmov s11, ip
  280. vld1.8 {q3-q4}, [r1, :64]!
  281. vld1.8 {q5}, [r1, :64]!
  282. bl aes_encrypt_3x
  283. veor q0, q0, q3
  284. veor q1, q1, q4
  285. veor q2, q2, q5
  286. rev ip, r6
  287. vst1.8 {q0-q1}, [r0, :64]!
  288. vst1.8 {q2}, [r0, :64]!
  289. vmov s27, ip
  290. b .Lctrloop3x
  291. .Lctr1x:
  292. adds r4, r4, #3
  293. beq .Lctrout
  294. .Lctrloop:
  295. vmov q0, q6
  296. bl aes_encrypt
  297. subs r4, r4, #1
  298. bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
  299. vld1.8 {q3}, [r1, :64]!
  300. veor q3, q0, q3
  301. vst1.8 {q3}, [r0, :64]!
  302. adds r6, r6, #1 @ increment BE ctr
  303. rev ip, r6
  304. vmov s27, ip
  305. bcs .Lctrcarry
  306. teq r4, #0
  307. bne .Lctrloop
  308. .Lctrout:
  309. vst1.8 {q6}, [r5]
  310. pop {r4-r6, pc}
  311. .Lctrhalfblock:
  312. vld1.8 {d1}, [r1, :64]
  313. veor d0, d0, d1
  314. vst1.8 {d0}, [r0, :64]
  315. pop {r4-r6, pc}
  316. .Lctrcarry:
  317. .irp sreg, s26, s25, s24
  318. vmov ip, \sreg @ load next word of ctr
  319. rev ip, ip @ ... to handle the carry
  320. adds ip, ip, #1
  321. rev ip, ip
  322. vmov \sreg, ip
  323. bcc 0f
  324. .endr
  325. 0: teq r4, #0
  326. beq .Lctrout
  327. b .Lctrloop
  328. ENDPROC(ce_aes_ctr_encrypt)
  329. /*
  330. * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
  331. * int blocks, u8 iv[], u8 const rk2[], int first)
  332. * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
  333. * int blocks, u8 iv[], u8 const rk2[], int first)
  334. */
  335. .macro next_tweak, out, in, const, tmp
  336. vshr.s64 \tmp, \in, #63
  337. vand \tmp, \tmp, \const
  338. vadd.u64 \out, \in, \in
  339. vext.8 \tmp, \tmp, \tmp, #8
  340. veor \out, \out, \tmp
  341. .endm
  342. .align 3
  343. .Lxts_mul_x:
  344. .quad 1, 0x87
  345. ce_aes_xts_init:
  346. vldr d14, .Lxts_mul_x
  347. vldr d15, .Lxts_mul_x + 8
  348. ldrd r4, r5, [sp, #16] @ load args
  349. ldr r6, [sp, #28]
  350. vld1.8 {q0}, [r5] @ load iv
  351. teq r6, #1 @ start of a block?
  352. bxne lr
  353. @ Encrypt the IV in q0 with the second AES key. This should only
  354. @ be done at the start of a block.
  355. ldr r6, [sp, #24] @ load AES key 2
  356. prepare_key r6, r3
  357. add ip, r6, #32 @ 3rd round key of key 2
  358. b .Laes_encrypt_tweak @ tail call
  359. ENDPROC(ce_aes_xts_init)
  360. ENTRY(ce_aes_xts_encrypt)
  361. push {r4-r6, lr}
  362. bl ce_aes_xts_init @ run shared prologue
  363. prepare_key r2, r3
  364. vmov q3, q0
  365. teq r6, #0 @ start of a block?
  366. bne .Lxtsenc3x
  367. .Lxtsencloop3x:
  368. next_tweak q3, q3, q7, q6
  369. .Lxtsenc3x:
  370. subs r4, r4, #3
  371. bmi .Lxtsenc1x
  372. vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
  373. vld1.8 {q2}, [r1, :64]!
  374. next_tweak q4, q3, q7, q6
  375. veor q0, q0, q3
  376. next_tweak q5, q4, q7, q6
  377. veor q1, q1, q4
  378. veor q2, q2, q5
  379. bl aes_encrypt_3x
  380. veor q0, q0, q3
  381. veor q1, q1, q4
  382. veor q2, q2, q5
  383. vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
  384. vst1.8 {q2}, [r0, :64]!
  385. vmov q3, q5
  386. teq r4, #0
  387. beq .Lxtsencout
  388. b .Lxtsencloop3x
  389. .Lxtsenc1x:
  390. adds r4, r4, #3
  391. beq .Lxtsencout
  392. .Lxtsencloop:
  393. vld1.8 {q0}, [r1, :64]!
  394. veor q0, q0, q3
  395. bl aes_encrypt
  396. veor q0, q0, q3
  397. vst1.8 {q0}, [r0, :64]!
  398. subs r4, r4, #1
  399. beq .Lxtsencout
  400. next_tweak q3, q3, q7, q6
  401. b .Lxtsencloop
  402. .Lxtsencout:
  403. vst1.8 {q3}, [r5]
  404. pop {r4-r6, pc}
  405. ENDPROC(ce_aes_xts_encrypt)
  406. ENTRY(ce_aes_xts_decrypt)
  407. push {r4-r6, lr}
  408. bl ce_aes_xts_init @ run shared prologue
  409. prepare_key r2, r3
  410. vmov q3, q0
  411. teq r6, #0 @ start of a block?
  412. bne .Lxtsdec3x
  413. .Lxtsdecloop3x:
  414. next_tweak q3, q3, q7, q6
  415. .Lxtsdec3x:
  416. subs r4, r4, #3
  417. bmi .Lxtsdec1x
  418. vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
  419. vld1.8 {q2}, [r1, :64]!
  420. next_tweak q4, q3, q7, q6
  421. veor q0, q0, q3
  422. next_tweak q5, q4, q7, q6
  423. veor q1, q1, q4
  424. veor q2, q2, q5
  425. bl aes_decrypt_3x
  426. veor q0, q0, q3
  427. veor q1, q1, q4
  428. veor q2, q2, q5
  429. vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
  430. vst1.8 {q2}, [r0, :64]!
  431. vmov q3, q5
  432. teq r4, #0
  433. beq .Lxtsdecout
  434. b .Lxtsdecloop3x
  435. .Lxtsdec1x:
  436. adds r4, r4, #3
  437. beq .Lxtsdecout
  438. .Lxtsdecloop:
  439. vld1.8 {q0}, [r1, :64]!
  440. veor q0, q0, q3
  441. add ip, r2, #32 @ 3rd round key
  442. bl aes_decrypt
  443. veor q0, q0, q3
  444. vst1.8 {q0}, [r0, :64]!
  445. subs r4, r4, #1
  446. beq .Lxtsdecout
  447. next_tweak q3, q3, q7, q6
  448. b .Lxtsdecloop
  449. .Lxtsdecout:
  450. vst1.8 {q3}, [r5]
  451. pop {r4-r6, pc}
  452. ENDPROC(ce_aes_xts_decrypt)
  453. /*
  454. * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
  455. * AES sbox substitution on each byte in
  456. * 'input'
  457. */
  458. ENTRY(ce_aes_sub)
  459. vdup.32 q1, r0
  460. veor q0, q0, q0
  461. aese.8 q0, q1
  462. vmov r0, s0
  463. bx lr
  464. ENDPROC(ce_aes_sub)
  465. /*
  466. * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
  467. * operation on round key *src
  468. */
  469. ENTRY(ce_aes_invert)
  470. vld1.8 {q0}, [r1]
  471. aesimc.8 q0, q0
  472. vst1.8 {q0}, [r0]
  473. bx lr
  474. ENDPROC(ce_aes_invert)