浏览代码

crypto: arm/aes-ce - remove cra_alignmask

Remove the unnecessary alignmask: it is much more efficient to deal with
the misalignment in the core algorithm than relying on the crypto API to
copy the data to a suitably aligned buffer.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Ard Biesheuvel 8 年之前
父节点
当前提交
1465fb13d3
共有 2 个文件被更改,包括 47 次插入52 次删除
  1. 41 43
      arch/arm/crypto/aes-ce-core.S
  2. 6 9
      arch/arm/crypto/aes-ce-glue.c

+ 41 - 43
arch/arm/crypto/aes-ce-core.S

@@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt)
 .Lecbencloop3x:
 .Lecbencloop3x:
 	subs		r4, r4, #3
 	subs		r4, r4, #3
 	bmi		.Lecbenc1x
 	bmi		.Lecbenc1x
-	vld1.8		{q0-q1}, [r1, :64]!
-	vld1.8		{q2}, [r1, :64]!
+	vld1.8		{q0-q1}, [r1]!
+	vld1.8		{q2}, [r1]!
 	bl		aes_encrypt_3x
 	bl		aes_encrypt_3x
-	vst1.8		{q0-q1}, [r0, :64]!
-	vst1.8		{q2}, [r0, :64]!
+	vst1.8		{q0-q1}, [r0]!
+	vst1.8		{q2}, [r0]!
 	b		.Lecbencloop3x
 	b		.Lecbencloop3x
 .Lecbenc1x:
 .Lecbenc1x:
 	adds		r4, r4, #3
 	adds		r4, r4, #3
 	beq		.Lecbencout
 	beq		.Lecbencout
 .Lecbencloop:
 .Lecbencloop:
-	vld1.8		{q0}, [r1, :64]!
+	vld1.8		{q0}, [r1]!
 	bl		aes_encrypt
 	bl		aes_encrypt
-	vst1.8		{q0}, [r0, :64]!
+	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	subs		r4, r4, #1
 	bne		.Lecbencloop
 	bne		.Lecbencloop
 .Lecbencout:
 .Lecbencout:
@@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt)
 .Lecbdecloop3x:
 .Lecbdecloop3x:
 	subs		r4, r4, #3
 	subs		r4, r4, #3
 	bmi		.Lecbdec1x
 	bmi		.Lecbdec1x
-	vld1.8		{q0-q1}, [r1, :64]!
-	vld1.8		{q2}, [r1, :64]!
+	vld1.8		{q0-q1}, [r1]!
+	vld1.8		{q2}, [r1]!
 	bl		aes_decrypt_3x
 	bl		aes_decrypt_3x
-	vst1.8		{q0-q1}, [r0, :64]!
-	vst1.8		{q2}, [r0, :64]!
+	vst1.8		{q0-q1}, [r0]!
+	vst1.8		{q2}, [r0]!
 	b		.Lecbdecloop3x
 	b		.Lecbdecloop3x
 .Lecbdec1x:
 .Lecbdec1x:
 	adds		r4, r4, #3
 	adds		r4, r4, #3
 	beq		.Lecbdecout
 	beq		.Lecbdecout
 .Lecbdecloop:
 .Lecbdecloop:
-	vld1.8		{q0}, [r1, :64]!
+	vld1.8		{q0}, [r1]!
 	bl		aes_decrypt
 	bl		aes_decrypt
-	vst1.8		{q0}, [r0, :64]!
+	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	subs		r4, r4, #1
 	bne		.Lecbdecloop
 	bne		.Lecbdecloop
 .Lecbdecout:
 .Lecbdecout:
@@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt)
 	vld1.8		{q0}, [r5]
 	vld1.8		{q0}, [r5]
 	prepare_key	r2, r3
 	prepare_key	r2, r3
 .Lcbcencloop:
 .Lcbcencloop:
-	vld1.8		{q1}, [r1, :64]!	@ get next pt block
+	vld1.8		{q1}, [r1]!		@ get next pt block
 	veor		q0, q0, q1		@ ..and xor with iv
 	veor		q0, q0, q1		@ ..and xor with iv
 	bl		aes_encrypt
 	bl		aes_encrypt
-	vst1.8		{q0}, [r0, :64]!
+	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	subs		r4, r4, #1
 	bne		.Lcbcencloop
 	bne		.Lcbcencloop
 	vst1.8		{q0}, [r5]
 	vst1.8		{q0}, [r5]
@@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt)
 .Lcbcdecloop3x:
 .Lcbcdecloop3x:
 	subs		r4, r4, #3
 	subs		r4, r4, #3
 	bmi		.Lcbcdec1x
 	bmi		.Lcbcdec1x
-	vld1.8		{q0-q1}, [r1, :64]!
-	vld1.8		{q2}, [r1, :64]!
+	vld1.8		{q0-q1}, [r1]!
+	vld1.8		{q2}, [r1]!
 	vmov		q3, q0
 	vmov		q3, q0
 	vmov		q4, q1
 	vmov		q4, q1
 	vmov		q5, q2
 	vmov		q5, q2
@@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt)
 	veor		q1, q1, q3
 	veor		q1, q1, q3
 	veor		q2, q2, q4
 	veor		q2, q2, q4
 	vmov		q6, q5
 	vmov		q6, q5
-	vst1.8		{q0-q1}, [r0, :64]!
-	vst1.8		{q2}, [r0, :64]!
+	vst1.8		{q0-q1}, [r0]!
+	vst1.8		{q2}, [r0]!
 	b		.Lcbcdecloop3x
 	b		.Lcbcdecloop3x
 .Lcbcdec1x:
 .Lcbcdec1x:
 	adds		r4, r4, #3
 	adds		r4, r4, #3
 	beq		.Lcbcdecout
 	beq		.Lcbcdecout
 	vmov		q15, q14		@ preserve last round key
 	vmov		q15, q14		@ preserve last round key
 .Lcbcdecloop:
 .Lcbcdecloop:
-	vld1.8		{q0}, [r1, :64]!	@ get next ct block
+	vld1.8		{q0}, [r1]!		@ get next ct block
 	veor		q14, q15, q6		@ combine prev ct with last key
 	veor		q14, q15, q6		@ combine prev ct with last key
 	vmov		q6, q0
 	vmov		q6, q0
 	bl		aes_decrypt
 	bl		aes_decrypt
-	vst1.8		{q0}, [r0, :64]!
+	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	subs		r4, r4, #1
 	bne		.Lcbcdecloop
 	bne		.Lcbcdecloop
 .Lcbcdecout:
 .Lcbcdecout:
@@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt)
 	rev		ip, r6
 	rev		ip, r6
 	add		r6, r6, #1
 	add		r6, r6, #1
 	vmov		s11, ip
 	vmov		s11, ip
-	vld1.8		{q3-q4}, [r1, :64]!
-	vld1.8		{q5}, [r1, :64]!
+	vld1.8		{q3-q4}, [r1]!
+	vld1.8		{q5}, [r1]!
 	bl		aes_encrypt_3x
 	bl		aes_encrypt_3x
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	veor		q1, q1, q4
 	veor		q1, q1, q4
 	veor		q2, q2, q5
 	veor		q2, q2, q5
 	rev		ip, r6
 	rev		ip, r6
-	vst1.8		{q0-q1}, [r0, :64]!
-	vst1.8		{q2}, [r0, :64]!
+	vst1.8		{q0-q1}, [r0]!
+	vst1.8		{q2}, [r0]!
 	vmov		s27, ip
 	vmov		s27, ip
 	b		.Lctrloop3x
 	b		.Lctrloop3x
 .Lctr1x:
 .Lctr1x:
@@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt)
 	vmov		q0, q6
 	vmov		q0, q6
 	bl		aes_encrypt
 	bl		aes_encrypt
 	subs		r4, r4, #1
 	subs		r4, r4, #1
-	bmi		.Lctrhalfblock		@ blocks < 0 means 1/2 block
-	vld1.8		{q3}, [r1, :64]!
+	bmi		.Lctrtailblock		@ blocks < 0 means tail block
+	vld1.8		{q3}, [r1]!
 	veor		q3, q0, q3
 	veor		q3, q0, q3
-	vst1.8		{q3}, [r0, :64]!
+	vst1.8		{q3}, [r0]!
 
 
 	adds		r6, r6, #1		@ increment BE ctr
 	adds		r6, r6, #1		@ increment BE ctr
 	rev		ip, r6
 	rev		ip, r6
@@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt)
 	vst1.8		{q6}, [r5]
 	vst1.8		{q6}, [r5]
 	pop		{r4-r6, pc}
 	pop		{r4-r6, pc}
 
 
-.Lctrhalfblock:
-	vld1.8		{d1}, [r1, :64]
-	veor		d0, d0, d1
-	vst1.8		{d0}, [r0, :64]
+.Lctrtailblock:
+	vst1.8		{q0}, [r0, :64]		@ return just the key stream
 	pop		{r4-r6, pc}
 	pop		{r4-r6, pc}
 
 
 .Lctrcarry:
 .Lctrcarry:
@@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt)
 .Lxtsenc3x:
 .Lxtsenc3x:
 	subs		r4, r4, #3
 	subs		r4, r4, #3
 	bmi		.Lxtsenc1x
 	bmi		.Lxtsenc1x
-	vld1.8		{q0-q1}, [r1, :64]!	@ get 3 pt blocks
-	vld1.8		{q2}, [r1, :64]!
+	vld1.8		{q0-q1}, [r1]!		@ get 3 pt blocks
+	vld1.8		{q2}, [r1]!
 	next_tweak	q4, q3, q7, q6
 	next_tweak	q4, q3, q7, q6
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	next_tweak	q5, q4, q7, q6
 	next_tweak	q5, q4, q7, q6
@@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt)
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	veor		q1, q1, q4
 	veor		q1, q1, q4
 	veor		q2, q2, q5
 	veor		q2, q2, q5
-	vst1.8		{q0-q1}, [r0, :64]!	@ write 3 ct blocks
-	vst1.8		{q2}, [r0, :64]!
+	vst1.8		{q0-q1}, [r0]!		@ write 3 ct blocks
+	vst1.8		{q2}, [r0]!
 	vmov		q3, q5
 	vmov		q3, q5
 	teq		r4, #0
 	teq		r4, #0
 	beq		.Lxtsencout
 	beq		.Lxtsencout
@@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt)
 	adds		r4, r4, #3
 	adds		r4, r4, #3
 	beq		.Lxtsencout
 	beq		.Lxtsencout
 .Lxtsencloop:
 .Lxtsencloop:
-	vld1.8		{q0}, [r1, :64]!
+	vld1.8		{q0}, [r1]!
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	bl		aes_encrypt
 	bl		aes_encrypt
 	veor		q0, q0, q3
 	veor		q0, q0, q3
-	vst1.8		{q0}, [r0, :64]!
+	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	subs		r4, r4, #1
 	beq		.Lxtsencout
 	beq		.Lxtsencout
 	next_tweak	q3, q3, q7, q6
 	next_tweak	q3, q3, q7, q6
@@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt)
 .Lxtsdec3x:
 .Lxtsdec3x:
 	subs		r4, r4, #3
 	subs		r4, r4, #3
 	bmi		.Lxtsdec1x
 	bmi		.Lxtsdec1x
-	vld1.8		{q0-q1}, [r1, :64]!	@ get 3 ct blocks
-	vld1.8		{q2}, [r1, :64]!
+	vld1.8		{q0-q1}, [r1]!		@ get 3 ct blocks
+	vld1.8		{q2}, [r1]!
 	next_tweak	q4, q3, q7, q6
 	next_tweak	q4, q3, q7, q6
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	next_tweak	q5, q4, q7, q6
 	next_tweak	q5, q4, q7, q6
@@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt)
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	veor		q1, q1, q4
 	veor		q1, q1, q4
 	veor		q2, q2, q5
 	veor		q2, q2, q5
-	vst1.8		{q0-q1}, [r0, :64]!	@ write 3 pt blocks
-	vst1.8		{q2}, [r0, :64]!
+	vst1.8		{q0-q1}, [r0]!		@ write 3 pt blocks
+	vst1.8		{q2}, [r0]!
 	vmov		q3, q5
 	vmov		q3, q5
 	teq		r4, #0
 	teq		r4, #0
 	beq		.Lxtsdecout
 	beq		.Lxtsdecout
@@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt)
 	adds		r4, r4, #3
 	adds		r4, r4, #3
 	beq		.Lxtsdecout
 	beq		.Lxtsdecout
 .Lxtsdecloop:
 .Lxtsdecloop:
-	vld1.8		{q0}, [r1, :64]!
+	vld1.8		{q0}, [r1]!
 	veor		q0, q0, q3
 	veor		q0, q0, q3
 	add		ip, r2, #32		@ 3rd round key
 	add		ip, r2, #32		@ 3rd round key
 	bl		aes_decrypt
 	bl		aes_decrypt
 	veor		q0, q0, q3
 	veor		q0, q0, q3
-	vst1.8		{q0}, [r0, :64]!
+	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	subs		r4, r4, #1
 	beq		.Lxtsdecout
 	beq		.Lxtsdecout
 	next_tweak	q3, q3, q7, q6
 	next_tweak	q3, q3, q7, q6

+ 6 - 9
arch/arm/crypto/aes-ce-glue.c

@@ -278,14 +278,15 @@ static int ctr_encrypt(struct skcipher_request *req)
 		u8 *tsrc = walk.src.virt.addr;
 		u8 *tsrc = walk.src.virt.addr;
 
 
 		/*
 		/*
-		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
-		 * to tell aes_ctr_encrypt() to only read half a block.
+		 * Tell aes_ctr_encrypt() to process a tail block.
 		 */
 		 */
-		blocks = (nbytes <= 8) ? -1 : 1;
+		blocks = -1;
 
 
-		ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
+		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
 				   num_rounds(ctx), blocks, walk.iv);
 				   num_rounds(ctx), blocks, walk.iv);
-		memcpy(tdst, tail, nbytes);
+		if (tdst != tsrc)
+			memcpy(tdst, tsrc, nbytes);
+		crypto_xor(tdst, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 	kernel_neon_end();
 	kernel_neon_end();
@@ -345,7 +346,6 @@ static struct skcipher_alg aes_algs[] = { {
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_alignmask		= 7,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 	},
 	},
 	.min_keysize	= AES_MIN_KEY_SIZE,
 	.min_keysize	= AES_MIN_KEY_SIZE,
@@ -361,7 +361,6 @@ static struct skcipher_alg aes_algs[] = { {
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_alignmask		= 7,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 	},
 	},
 	.min_keysize	= AES_MIN_KEY_SIZE,
 	.min_keysize	= AES_MIN_KEY_SIZE,
@@ -378,7 +377,6 @@ static struct skcipher_alg aes_algs[] = { {
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_blocksize		= 1,
 		.cra_blocksize		= 1,
 		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_alignmask		= 7,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 	},
 	},
 	.min_keysize	= AES_MIN_KEY_SIZE,
 	.min_keysize	= AES_MIN_KEY_SIZE,
@@ -396,7 +394,6 @@ static struct skcipher_alg aes_algs[] = { {
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_flags		= CRYPTO_ALG_INTERNAL,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_blocksize		= AES_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
 		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
-		.cra_alignmask		= 7,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 	},
 	},
 	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
 	.min_keysize	= 2 * AES_MIN_KEY_SIZE,