Эх сурвалжийг харах

crypto: arm64/aes-ccm-ce: fix for big endian

The AES-CCM implementation that uses ARMv8 Crypto Extensions instructions
refers to the AES round keys as pairs of 64-bit quantities, which causes
failures when building the code for big endian. In addition, it byte swaps
the input counter unconditionally, while this is only required for little
endian builds. So fix both issues.

Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Ard Biesheuvel 8 жил өмнө
parent
commit
56e4e76c68

+ 27 - 26
arch/arm64/crypto/aes-ce-ccm-core.S

@@ -9,6 +9,7 @@
  */
  */
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 
 	.text
 	.text
 	.arch	armv8-a+crypto
 	.arch	armv8-a+crypto
@@ -19,7 +20,7 @@
 	 */
 	 */
 ENTRY(ce_aes_ccm_auth_data)
 ENTRY(ce_aes_ccm_auth_data)
 	ldr	w8, [x3]			/* leftover from prev round? */
 	ldr	w8, [x3]			/* leftover from prev round? */
-	ld1	{v0.2d}, [x0]			/* load mac */
+	ld1	{v0.16b}, [x0]			/* load mac */
 	cbz	w8, 1f
 	cbz	w8, 1f
 	sub	w8, w8, #16
 	sub	w8, w8, #16
 	eor	v1.16b, v1.16b, v1.16b
 	eor	v1.16b, v1.16b, v1.16b
@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	beq	8f				/* out of input? */
 	beq	8f				/* out of input? */
 	cbnz	w8, 0b
 	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.2d}, [x4]			/* load first round key */
+1:	ld1	{v3.16b}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
 	prfm	pldl1strm, [x1]
 	cmp	w5, #12				/* which key size? */
 	cmp	w5, #12				/* which key size? */
 	add	x6, x4, #16
 	add	x6, x4, #16
@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 	mov	v5.16b, v3.16b
 	mov	v5.16b, v3.16b
 	b	4f
 	b	4f
 2:	mov	v4.16b, v3.16b
 2:	mov	v4.16b, v3.16b
-	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
+	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
 3:	aese	v0.16b, v4.16b
 3:	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.2d}, [x6], #16		/* load next round key */
+4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.2d}, [x6], #16		/* load next round key */
+5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
 	subs	w7, w7, #3
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
-	ld1	{v5.2d}, [x6], #16		/* load next round key */
+	ld1	{v5.16b}, [x6], #16		/* load next round key */
 	bpl	3b
 	bpl	3b
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	subs	w2, w2, #16			/* last data? */
 	subs	w2, w2, #16			/* last data? */
@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	ld1	{v1.16b}, [x1], #16		/* load next input block */
 	ld1	{v1.16b}, [x1], #16		/* load next input block */
 	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
 	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
 	bne	1b
 	bne	1b
-6:	st1	{v0.2d}, [x0]			/* store mac */
+6:	st1	{v0.16b}, [x0]			/* store mac */
 	beq	10f
 	beq	10f
 	adds	w2, w2, #16
 	adds	w2, w2, #16
 	beq	10f
 	beq	10f
@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	adds	w7, w7, #1
 	adds	w7, w7, #1
 	bne	9b
 	bne	9b
 	eor	v0.16b, v0.16b, v1.16b
 	eor	v0.16b, v0.16b, v1.16b
-	st1	{v0.2d}, [x0]
+	st1	{v0.16b}, [x0]
 10:	str	w8, [x3]
 10:	str	w8, [x3]
 	ret
 	ret
 ENDPROC(ce_aes_ccm_auth_data)
 ENDPROC(ce_aes_ccm_auth_data)
@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
 	 * 			 u32 rounds);
 	 * 			 u32 rounds);
 	 */
 	 */
 ENTRY(ce_aes_ccm_final)
 ENTRY(ce_aes_ccm_final)
-	ld1	{v3.2d}, [x2], #16		/* load first round key */
-	ld1	{v0.2d}, [x0]			/* load mac */
+	ld1	{v3.16b}, [x2], #16		/* load first round key */
+	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
 	cmp	w3, #12				/* which key size? */
 	sub	w3, w3, #2			/* modified # of rounds */
 	sub	w3, w3, #2			/* modified # of rounds */
-	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
+	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
 	bmi	0f
 	bmi	0f
 	bne	3f
 	bne	3f
 	mov	v5.16b, v3.16b
 	mov	v5.16b, v3.16b
 	b	2f
 	b	2f
 0:	mov	v4.16b, v3.16b
 0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
+1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
+2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
+3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b
 	/* final round key cancels out */
 	/* final round key cancels out */
 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
-	st1	{v0.2d}, [x0]			/* store result */
+	st1	{v0.16b}, [x0]			/* store result */
 	ret
 	ret
 ENDPROC(ce_aes_ccm_final)
 ENDPROC(ce_aes_ccm_final)
 
 
 	.macro	aes_ccm_do_crypt,enc
 	.macro	aes_ccm_do_crypt,enc
 	ldr	x8, [x6, #8]			/* load lower ctr */
 	ldr	x8, [x6, #8]			/* load lower ctr */
-	ld1	{v0.2d}, [x5]			/* load mac */
-	rev	x8, x8				/* keep swabbed ctr in reg */
+	ld1	{v0.16b}, [x5]			/* load mac */
+CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 0:	/* outer loop */
 0:	/* outer loop */
-	ld1	{v1.1d}, [x6]			/* load upper ctr */
+	ld1	{v1.8b}, [x6]			/* load upper ctr */
 	prfm	pldl1strm, [x1]
 	prfm	pldl1strm, [x1]
 	add	x8, x8, #1
 	add	x8, x8, #1
 	rev	x9, x8
 	rev	x9, x8
 	cmp	w4, #12				/* which key size? */
 	cmp	w4, #12				/* which key size? */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.2d}, [x3]			/* load first round key */
+	ld1	{v3.16b}, [x3]			/* load first round key */
 	add	x10, x3, #16
 	add	x10, x3, #16
 	bmi	1f
 	bmi	1f
 	bne	4f
 	bne	4f
 	mov	v5.16b, v3.16b
 	mov	v5.16b, v3.16b
 	b	3f
 	b	3f
 1:	mov	v4.16b, v3.16b
 1:	mov	v4.16b, v3.16b
-	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
+	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
+3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
+4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v3.16b
 	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-	ld1	{v5.2d}, [x10], #16		/* load next round key */
+	ld1	{v5.16b}, [x10], #16		/* load next round key */
 	bpl	2b
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b
@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
 	st1	{v1.16b}, [x0], #16		/* write output block */
 	st1	{v1.16b}, [x0], #16		/* write output block */
 	bne	0b
 	bne	0b
-	rev	x8, x8
-	st1	{v0.2d}, [x5]			/* store mac */
+CPU_LE(	rev	x8, x8			)
+	st1	{v0.16b}, [x5]			/* store mac */
 	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
 	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
 5:	ret
 5:	ret
 
 
 6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
-	st1	{v0.2d}, [x5]			/* store mac */
+	st1	{v0.16b}, [x5]			/* store mac */
 	add	w2, w2, #16			/* process partial tail block */
 	add	w2, w2, #16			/* process partial tail block */
 7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
 7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
 	umov	w6, v1.b[0]			/* get top crypted ctr byte */
 	umov	w6, v1.b[0]			/* get top crypted ctr byte */