6 years ago · 62606c224d
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,21 +191,11 @@ Currently, the following pairs of encryption modes are supported:
 
															 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
														
 
															 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
														
 
															-- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
														
 
															 It is strongly recommended to use AES-256-XTS for contents encryption.
														
 
															 AES-128-CBC was added only for low-powered embedded devices with
														
 
															 crypto accelerators such as CAAM or CESA that do not support XTS.
														
 
															-Similarly, Speck128/256 support was only added for older or low-end
														
 
															-CPUs which cannot do AES fast enough -- especially ARM CPUs which have
														
 
															-NEON instructions but not the Cryptography Extensions -- and for which
														
 
															-it would not otherwise be feasible to use encryption at all.  It is
														
 
															-not recommended to use Speck on CPUs that have AES instructions.
														
 
															-Speck support is only available if it has been enabled in the crypto
														
 
															-API via CONFIG_CRYPTO_SPECK.  Also, on ARM platforms, to get
														
 
															-acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
														
 
															-
														
 
															 New encryption modes can be added relatively easily, without changes
														
 
															 to individual filesystems.  However, authenticated encryption (AE)
														
 
															 modes are not currently supported because of the difficulty of dealing
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7578,14 +7578,6 @@ S:	Supported
 
															 F:	drivers/infiniband/hw/i40iw/
														
 
															 F:	include/uapi/rdma/i40iw-abi.h
														
 
															-INTEL SHA MULTIBUFFER DRIVER
														
 
															-M:	Megha Dey <megha.dey@linux.intel.com>
														
 
															-R:	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															-L:	linux-crypto@vger.kernel.org
														
 
															-S:	Supported
														
 
															-F:	arch/x86/crypto/sha*-mb/
														
 
															-F:	crypto/mcryptd.c
														
 
															-
														
 
															 INTEL TELEMETRY DRIVER
														
 
															 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
														
 
															 L:	platform-driver-x86@vger.kernel.org
														
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE
 
															 	depends on KERNEL_MODE_NEON
														
 
															 	select CRYPTO_HASH
														
 
															 	select CRYPTO_CRYPTD
														
 
															+	select CRYPTO_GF128MUL
														
 
															 	help
														
 
															 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
														
 
															 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
														
@@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON
 
															 	select CRYPTO_BLKCIPHER
														
 
															 	select CRYPTO_CHACHA20
														
 
															-config CRYPTO_SPECK_NEON
														
 
															-	tristate "NEON accelerated Speck cipher algorithms"
														
 
															-	depends on KERNEL_MODE_NEON
														
 
															-	select CRYPTO_BLKCIPHER
														
 
															-	select CRYPTO_SPECK
														
 
															-
														
 
															 endif
														
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 
															 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
														
 
															 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
														
 
															 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
														
 
															-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
														
 
															 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
														
 
															 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
														
@@ -54,7 +53,6 @@ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 
															 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
														
 
															 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
														
 
															 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
														
 
															-speck-neon-y := speck-neon-core.o speck-neon-glue.o
														
 
															 ifdef REGENERATE_ARM_CRYPTO
														
 
															 quiet_cmd_perl = PERL    $@
														
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ b/arch/arm/crypto/chacha20-neon-core.S
@@ -18,6 +18,34 @@
 
															  * (at your option) any later version.
														
 
															  */
														
 
															+ /*
														
 
															+  * NEON doesn't have a rotate instruction.  The alternatives are, more or less:
														
 
															+  *
														
 
															+  * (a)  vshl.u32 + vsri.u32		(needs temporary register)
														
 
															+  * (b)  vshl.u32 + vshr.u32 + vorr	(needs temporary register)
														
 
															+  * (c)  vrev32.16			(16-bit rotations only)
														
 
															+  * (d)  vtbl.8 + vtbl.8		(multiple of 8 bits rotations only,
														
 
															+  *					 needs index vector)
														
 
															+  *
														
 
															+  * ChaCha20 has 16, 12, 8, and 7-bit rotations.  For the 12 and 7-bit
														
 
															+  * rotations, the only choices are (a) and (b).  We use (a) since it takes
														
 
															+  * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
														
 
															+  *
														
 
															+  * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
														
 
															+  * and doesn't need a temporary register.
														
 
															+  *
														
 
															+  * For the 8-bit rotation, we use vtbl.8 + vtbl.8.  On Cortex-A7, this sequence
														
 
															+  * is twice as fast as (a), even when doing (a) on multiple registers
														
 
															+  * simultaneously to eliminate the stall between vshl and vsri.  Also, it
														
 
															+  * parallelizes better when temporary registers are scarce.
														
 
															+  *
														
 
															+  * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
														
 
															+  * (a), so the need to load the rotation table actually makes the vtbl method
														
 
															+  * slightly slower overall on that CPU (~1.3% slower ChaCha20).  Still, it
														
 
															+  * seems to be a good compromise to get a more significant speed boost on some
														
 
															+  * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
														
 
															+  */
														
 
															+
														
 
															 #include <linux/linkage.h>
														
 
															 	.text
														
@@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon)
 
															 	vmov		q10, q2
														
 
															 	vmov		q11, q3
														
 
															+	adr		ip, .Lrol8_table
														
 
															 	mov		r3, #10
														
 
															+	vld1.8		{d10}, [ip, :64]
														
 
															 .Ldoubleround:
														
 
															 	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
														
@@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon)
 
															 	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
														
 
															 	vadd.i32	q0, q0, q1
														
 
															-	veor		q4, q3, q0
														
 
															-	vshl.u32	q3, q4, #8
														
 
															-	vsri.u32	q3, q4, #24
														
 
															+	veor		q3, q3, q0
														
 
															+	vtbl.8		d6, {d6}, d10
														
 
															+	vtbl.8		d7, {d7}, d10
														
 
															 	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
														
 
															 	vadd.i32	q2, q2, q3
														
@@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon)
 
															 	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
														
 
															 	vadd.i32	q0, q0, q1
														
 
															-	veor		q4, q3, q0
														
 
															-	vshl.u32	q3, q4, #8
														
 
															-	vsri.u32	q3, q4, #24
														
 
															+	veor		q3, q3, q0
														
 
															+	vtbl.8		d6, {d6}, d10
														
 
															+	vtbl.8		d7, {d7}, d10
														
 
															 	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
														
 
															 	vadd.i32	q2, q2, q3
														
@@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon)
 
															 	bx		lr
														
 
															 ENDPROC(chacha20_block_xor_neon)
														
 
															+	.align		4
														
 
															+.Lctrinc:	.word	0, 1, 2, 3
														
 
															+.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
														
 
															+
														
 
															 	.align		5
														
 
															 ENTRY(chacha20_4block_xor_neon)
														
 
															-	push		{r4-r6, lr}
														
 
															-	mov		ip, sp			// preserve the stack pointer
														
 
															-	sub		r3, sp, #0x20		// allocate a 32 byte buffer
														
 
															-	bic		r3, r3, #0x1f		// aligned to 32 bytes
														
 
															-	mov		sp, r3
														
 
															+	push		{r4-r5}
														
 
															+	mov		r4, sp			// preserve the stack pointer
														
 
															+	sub		ip, sp, #0x20		// allocate a 32 byte buffer
														
 
															+	bic		ip, ip, #0x1f		// aligned to 32 bytes
														
 
															+	mov		sp, ip
														
 
															 	// r0: Input state matrix, s
														
 
															 	// r1: 4 data blocks output, o
														
@@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon)
 
															 	// This function encrypts four consecutive ChaCha20 blocks by loading
														
 
															 	// the state matrix in NEON registers four times. The algorithm performs
														
 
															 	// each operation on the corresponding word of each state matrix, hence
														
 
															-	// requires no word shuffling. For final XORing step we transpose the
														
 
															-	// matrix by interleaving 32- and then 64-bit words, which allows us to
														
 
															-	// do XOR in NEON registers.
														
 
															+	// requires no word shuffling. The words are re-interleaved before the
														
 
															+	// final addition of the original state and the XORing step.
														
 
															 	//
														
 
															-	// x0..15[0-3] = s0..3[0..3]
														
 
															-	add		r3, r0, #0x20
														
 
															+	// x0..15[0-3] = s0..15[0-3]
														
 
															+	add		ip, r0, #0x20
														
 
															 	vld1.32		{q0-q1}, [r0]
														
 
															-	vld1.32		{q2-q3}, [r3]
														
 
															+	vld1.32		{q2-q3}, [ip]
														
 
															-	adr		r3, CTRINC
														
 
															+	adr		r5, .Lctrinc
														
 
															 	vdup.32		q15, d7[1]
														
 
															 	vdup.32		q14, d7[0]
														
 
															-	vld1.32		{q11}, [r3, :128]
														
 
															+	vld1.32		{q4}, [r5, :128]
														
 
															 	vdup.32		q13, d6[1]
														
 
															 	vdup.32		q12, d6[0]
														
 
															-	vadd.i32	q12, q12, q11		// x12 += counter values 0-3
														
 
															 	vdup.32		q11, d5[1]
														
 
															 	vdup.32		q10, d5[0]
														
 
															+	vadd.u32	q12, q12, q4		// x12 += counter values 0-3
														
 
															 	vdup.32		q9, d4[1]
														
 
															 	vdup.32		q8, d4[0]
														
 
															 	vdup.32		q7, d3[1]
														
@@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon)
 
															 	vdup.32		q1, d0[1]
														
 
															 	vdup.32		q0, d0[0]
														
 
															+	adr		ip, .Lrol8_table
														
 
															 	mov		r3, #10
														
 
															+	b		1f
														
 
															 .Ldoubleround4:
														
 
															+	vld1.32		{q8-q9}, [sp, :256]
														
 
															+1:
														
 
															 	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
														
 
															 	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
														
 
															 	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
														
@@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon)
 
															 	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
														
 
															 	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
														
 
															 	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
														
 
															+	vld1.8		{d16}, [ip, :64]
														
 
															 	vadd.i32	q0, q0, q4
														
 
															 	vadd.i32	q1, q1, q5
														
 
															 	vadd.i32	q2, q2, q6
														
 
															 	vadd.i32	q3, q3, q7
														
 
															-	veor		q8, q12, q0
														
 
															-	veor		q9, q13, q1
														
 
															-	vshl.u32	q12, q8, #8
														
 
															-	vshl.u32	q13, q9, #8
														
 
															-	vsri.u32	q12, q8, #24
														
 
															-	vsri.u32	q13, q9, #24
														
 
															+	veor		q12, q12, q0
														
 
															+	veor		q13, q13, q1
														
 
															+	veor		q14, q14, q2
														
 
															+	veor		q15, q15, q3
														
 
															-	veor		q8, q14, q2
														
 
															-	veor		q9, q15, q3
														
 
															-	vshl.u32	q14, q8, #8
														
 
															-	vshl.u32	q15, q9, #8
														
 
															-	vsri.u32	q14, q8, #24
														
 
															-	vsri.u32	q15, q9, #24
														
 
															+	vtbl.8		d24, {d24}, d16
														
 
															+	vtbl.8		d25, {d25}, d16
														
 
															+	vtbl.8		d26, {d26}, d16
														
 
															+	vtbl.8		d27, {d27}, d16
														
 
															+	vtbl.8		d28, {d28}, d16
														
 
															+	vtbl.8		d29, {d29}, d16
														
 
															+	vtbl.8		d30, {d30}, d16
														
 
															+	vtbl.8		d31, {d31}, d16
														
 
															 	vld1.32		{q8-q9}, [sp, :256]
														
@@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon)
 
															 	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
														
 
															 	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
														
 
															 	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
														
 
															+	vld1.8		{d16}, [ip, :64]
														
 
															 	vadd.i32	q0, q0, q5
														
 
															 	vadd.i32	q1, q1, q6
														
 
															 	vadd.i32	q2, q2, q7
														
 
															 	vadd.i32	q3, q3, q4
														
 
															-	veor		q8, q15, q0
														
 
															-	veor		q9, q12, q1
														
 
															-	vshl.u32	q15, q8, #8
														
 
															-	vshl.u32	q12, q9, #8
														
 
															-	vsri.u32	q15, q8, #24
														
 
															-	vsri.u32	q12, q9, #24
														
 
															+	veor		q15, q15, q0
														
 
															+	veor		q12, q12, q1
														
 
															+	veor		q13, q13, q2
														
 
															+	veor		q14, q14, q3
														
 
															-	veor		q8, q13, q2
														
 
															-	veor		q9, q14, q3
														
 
															-	vshl.u32	q13, q8, #8
														
 
															-	vshl.u32	q14, q9, #8
														
 
															-	vsri.u32	q13, q8, #24
														
 
															-	vsri.u32	q14, q9, #24
														
 
															+	vtbl.8		d30, {d30}, d16
														
 
															+	vtbl.8		d31, {d31}, d16
														
 
															+	vtbl.8		d24, {d24}, d16
														
 
															+	vtbl.8		d25, {d25}, d16
														
 
															+	vtbl.8		d26, {d26}, d16
														
 
															+	vtbl.8		d27, {d27}, d16
														
 
															+	vtbl.8		d28, {d28}, d16
														
 
															+	vtbl.8		d29, {d29}, d16
														
 
															 	vld1.32		{q8-q9}, [sp, :256]
														
@@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon)
 
															 	vsri.u32	q6, q9, #25
														
 
															 	subs		r3, r3, #1
														
 
															-	beq		0f
														
 
															-
														
 
															-	vld1.32		{q8-q9}, [sp, :256]
														
 
															-	b		.Ldoubleround4
														
 
															-
														
 
															-	// x0[0-3] += s0[0]
														
 
															-	// x1[0-3] += s0[1]
														
 
															-	// x2[0-3] += s0[2]
														
 
															-	// x3[0-3] += s0[3]
														
 
															-0:	ldmia		r0!, {r3-r6}
														
 
															-	vdup.32		q8, r3
														
 
															-	vdup.32		q9, r4
														
 
															-	vadd.i32	q0, q0, q8
														
 
															-	vadd.i32	q1, q1, q9
														
 
															-	vdup.32		q8, r5
														
 
															-	vdup.32		q9, r6
														
 
															-	vadd.i32	q2, q2, q8
														
 
															-	vadd.i32	q3, q3, q9
														
 
															-
														
 
															-	// x4[0-3] += s1[0]
														
 
															-	// x5[0-3] += s1[1]
														
 
															-	// x6[0-3] += s1[2]
														
 
															-	// x7[0-3] += s1[3]
														
 
															-	ldmia		r0!, {r3-r6}
														
 
															-	vdup.32		q8, r3
														
 
															-	vdup.32		q9, r4
														
 
															-	vadd.i32	q4, q4, q8
														
 
															-	vadd.i32	q5, q5, q9
														
 
															-	vdup.32		q8, r5
														
 
															-	vdup.32		q9, r6
														
 
															-	vadd.i32	q6, q6, q8
														
 
															-	vadd.i32	q7, q7, q9
														
 
															-
														
 
															-	// interleave 32-bit words in state n, n+1
														
 
															-	vzip.32		q0, q1
														
 
															-	vzip.32		q2, q3
														
 
															-	vzip.32		q4, q5
														
 
															-	vzip.32		q6, q7
														
 
															-
														
 
															-	// interleave 64-bit words in state n, n+2
														
 
															+	bne		.Ldoubleround4
														
 
															+
														
 
															+	// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
														
 
															+	// x8..9[0-3] are on the stack.
														
 
															+
														
 
															+	// Re-interleave the words in the first two rows of each block (x0..7).
														
 
															+	// Also add the counter values 0-3 to x12[0-3].
														
 
															+	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
														
 
															+	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
														
 
															+	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
														
 
															+	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
														
 
															+	vzip.32		q6, q7			// => (6 7 6 7) (6 7 6 7)
														
 
															+	  vadd.u32	q12, q8			// x12 += counter values 0-3
														
 
															 	vswp		d1, d4
														
 
															 	vswp		d3, d6
														
 
															+	  vld1.32	{q8-q9}, [r0]!		// load s0..7
														
 
															 	vswp		d9, d12
														
 
															 	vswp		d11, d14
														
 
															-	// xor with corresponding input, write to output
														
 
															+	// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
														
 
															+	// after XORing the first 32 bytes.
														
 
															+	vswp		q1, q4
														
 
															+
														
 
															+	// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
														
 
															+
														
 
															+	// x0..3[0-3] += s0..3[0-3]	(add orig state to 1st row of each block)
														
 
															+	vadd.u32	q0, q0, q8
														
 
															+	vadd.u32	q2, q2, q8
														
 
															+	vadd.u32	q4, q4, q8
														
 
															+	vadd.u32	q3, q3, q8
														
 
															+
														
 
															+	// x4..7[0-3] += s4..7[0-3]	(add orig state to 2nd row of each block)
														
 
															+	vadd.u32	q1, q1, q9
														
 
															+	vadd.u32	q6, q6, q9
														
 
															+	vadd.u32	q5, q5, q9
														
 
															+	vadd.u32	q7, q7, q9
														
 
															+
														
 
															+	// XOR first 32 bytes using keystream from first two rows of first block
														
 
															 	vld1.8		{q8-q9}, [r2]!
														
 
															 	veor		q8, q8, q0
														
 
															-	veor		q9, q9, q4
														
 
															+	veor		q9, q9, q1
														
 
															 	vst1.8		{q8-q9}, [r1]!
														
 
															+	// Re-interleave the words in the last two rows of each block (x8..15).
														
 
															 	vld1.32		{q8-q9}, [sp, :256]
														
 
															-
														
 
															-	// x8[0-3] += s2[0]
														
 
															-	// x9[0-3] += s2[1]
														
 
															-	// x10[0-3] += s2[2]
														
 
															-	// x11[0-3] += s2[3]
														
 
															-	ldmia		r0!, {r3-r6}
														
 
															-	vdup.32		q0, r3
														
 
															-	vdup.32		q4, r4
														
 
															-	vadd.i32	q8, q8, q0
														
 
															-	vadd.i32	q9, q9, q4
														
 
															-	vdup.32		q0, r5
														
 
															-	vdup.32		q4, r6
														
 
															-	vadd.i32	q10, q10, q0
														
 
															-	vadd.i32	q11, q11, q4
														
 
															-
														
 
															-	// x12[0-3] += s3[0]
														
 
															-	// x13[0-3] += s3[1]
														
 
															-	// x14[0-3] += s3[2]
														
 
															-	// x15[0-3] += s3[3]
														
 
															-	ldmia		r0!, {r3-r6}
														
 
															-	vdup.32		q0, r3
														
 
															-	vdup.32		q4, r4
														
 
															-	adr		r3, CTRINC
														
 
															-	vadd.i32	q12, q12, q0
														
 
															-	vld1.32		{q0}, [r3, :128]
														
 
															-	vadd.i32	q13, q13, q4
														
 
															-	vadd.i32	q12, q12, q0		// x12 += counter values 0-3
														
 
															-
														
 
															-	vdup.32		q0, r5
														
 
															-	vdup.32		q4, r6
														
 
															-	vadd.i32	q14, q14, q0
														
 
															-	vadd.i32	q15, q15, q4
														
 
															-
														
 
															-	// interleave 32-bit words in state n, n+1
														
 
															-	vzip.32		q8, q9
														
 
															-	vzip.32		q10, q11
														
 
															-	vzip.32		q12, q13
														
 
															-	vzip.32		q14, q15
														
 
															-
														
 
															-	// interleave 64-bit words in state n, n+2
														
 
															-	vswp		d17, d20
														
 
															-	vswp		d19, d22
														
 
															+	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
														
 
															+	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
														
 
															+	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
														
 
															+	vzip.32		q10, q11	// => (10 11 10 11) (10 11 10 11)
														
 
															+	  vld1.32	{q0-q1}, [r0]	// load s8..15
														
 
															 	vswp		d25, d28
														
 
															 	vswp		d27, d30
														
 
															+	vswp		d17, d20
														
 
															+	vswp		d19, d22
														
 
															+
														
 
															+	// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
														
 
															+
														
 
															+	// x8..11[0-3] += s8..11[0-3]	(add orig state to 3rd row of each block)
														
 
															+	vadd.u32	q8,  q8,  q0
														
 
															+	vadd.u32	q10, q10, q0
														
 
															+	vadd.u32	q9,  q9,  q0
														
 
															+	vadd.u32	q11, q11, q0
														
 
															+
														
 
															+	// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
														
 
															+	vadd.u32	q12, q12, q1
														
 
															+	vadd.u32	q14, q14, q1
														
 
															+	vadd.u32	q13, q13, q1
														
 
															+	vadd.u32	q15, q15, q1
														
 
															-	vmov		q4, q1
														
 
															+	// XOR the rest of the data with the keystream
														
 
															 	vld1.8		{q0-q1}, [r2]!
														
 
															 	veor		q0, q0, q8
														
@@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon)
 
															 	vst1.8		{q0-q1}, [r1]!
														
 
															 	vld1.8		{q0-q1}, [r2]
														
 
															+	  mov		sp, r4		// restore original stack pointer
														
 
															 	veor		q0, q0, q11
														
 
															 	veor		q1, q1, q15
														
 
															 	vst1.8		{q0-q1}, [r1]
														
 
															-	mov		sp, ip
														
 
															-	pop		{r4-r6, pc}
														
 
															+	pop		{r4-r5}
														
 
															+	bx		lr
														
 
															 ENDPROC(chacha20_4block_xor_neon)
														
 
															-
														
 
															-	.align		4
														
 
															-CTRINC:	.word		0, 1, 2, 3
														
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void)
 
															 				  ARRAY_SIZE(crc32_pmull_algs));
														
 
															 }
														
 
															-static const struct cpu_feature crc32_cpu_feature[] = {
														
 
															+static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
														
 
															 	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
														
 
															 };
														
 
															 MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
														
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -63,6 +63,33 @@
 
															 	k48		.req	d31
														
 
															 	SHASH2_p64	.req	d31
														
 
															+	HH		.req	q10
														
 
															+	HH3		.req	q11
														
 
															+	HH4		.req	q12
														
 
															+	HH34		.req	q13
														
 
															+
														
 
															+	HH_L		.req	d20
														
 
															+	HH_H		.req	d21
														
 
															+	HH3_L		.req	d22
														
 
															+	HH3_H		.req	d23
														
 
															+	HH4_L		.req	d24
														
 
															+	HH4_H		.req	d25
														
 
															+	HH34_L		.req	d26
														
 
															+	HH34_H		.req	d27
														
 
															+	SHASH2_H	.req	d29
														
 
															+
														
 
															+	XL2		.req	q5
														
 
															+	XM2		.req	q6
														
 
															+	XH2		.req	q7
														
 
															+	T3		.req	q8
														
 
															+
														
 
															+	XL2_L		.req	d10
														
 
															+	XL2_H		.req	d11
														
 
															+	XM2_L		.req	d12
														
 
															+	XM2_H		.req	d13
														
 
															+	T3_L		.req	d16
														
 
															+	T3_H		.req	d17
														
 
															+
														
 
															 	.text
														
 
															 	.fpu		crypto-neon-fp-armv8
														
@@ -175,12 +202,77 @@
 
															 	beq		0f
														
 
															 	vld1.64		{T1}, [ip]
														
 
															 	teq		r0, #0
														
 
															-	b		1f
														
 
															+	b		3f
														
 
															+
														
 
															+0:	.ifc		\pn, p64
														
 
															+	tst		r0, #3			// skip until #blocks is a
														
 
															+	bne		2f			// round multiple of 4
														
 
															+
														
 
															+	vld1.8		{XL2-XM2}, [r2]!
														
 
															+1:	vld1.8		{T3-T2}, [r2]!
														
 
															+	vrev64.8	XL2, XL2
														
 
															+	vrev64.8	XM2, XM2
														
 
															+
														
 
															+	subs		r0, r0, #4
														
 
															+
														
 
															+	vext.8		T1, XL2, XL2, #8
														
 
															+	veor		XL2_H, XL2_H, XL_L
														
 
															+	veor		XL, XL, T1
														
 
															+
														
 
															+	vrev64.8	T3, T3
														
 
															+	vrev64.8	T1, T2
														
 
															+
														
 
															+	vmull.p64	XH, HH4_H, XL_H			// a1 * b1
														
 
															+	veor		XL2_H, XL2_H, XL_H
														
 
															+	vmull.p64	XL, HH4_L, XL_L			// a0 * b0
														
 
															+	vmull.p64	XM, HH34_H, XL2_H		// (a1 + a0)(b1 + b0)
														
 
															+
														
 
															+	vmull.p64	XH2, HH3_H, XM2_L		// a1 * b1
														
 
															+	veor		XM2_L, XM2_L, XM2_H
														
 
															+	vmull.p64	XL2, HH3_L, XM2_H		// a0 * b0
														
 
															+	vmull.p64	XM2, HH34_L, XM2_L		// (a1 + a0)(b1 + b0)
														
 
															+
														
 
															+	veor		XH, XH, XH2
														
 
															+	veor		XL, XL, XL2
														
 
															+	veor		XM, XM, XM2
														
 
															+
														
 
															+	vmull.p64	XH2, HH_H, T3_L			// a1 * b1
														
 
															+	veor		T3_L, T3_L, T3_H
														
 
															+	vmull.p64	XL2, HH_L, T3_H			// a0 * b0
														
 
															+	vmull.p64	XM2, SHASH2_H, T3_L		// (a1 + a0)(b1 + b0)
														
 
															+
														
 
															+	veor		XH, XH, XH2
														
 
															+	veor		XL, XL, XL2
														
 
															+	veor		XM, XM, XM2
														
 
															+
														
 
															+	vmull.p64	XH2, SHASH_H, T1_L		// a1 * b1
														
 
															+	veor		T1_L, T1_L, T1_H
														
 
															+	vmull.p64	XL2, SHASH_L, T1_H		// a0 * b0
														
 
															+	vmull.p64	XM2, SHASH2_p64, T1_L		// (a1 + a0)(b1 + b0)
														
 
															+
														
 
															+	veor		XH, XH, XH2
														
 
															+	veor		XL, XL, XL2
														
 
															+	veor		XM, XM, XM2
														
 
															-0:	vld1.64		{T1}, [r2]!
														
 
															+	beq		4f
														
 
															+
														
 
															+	vld1.8		{XL2-XM2}, [r2]!
														
 
															+
														
 
															+	veor		T1, XL, XH
														
 
															+	veor		XM, XM, T1
														
 
															+
														
 
															+	__pmull_reduce_p64
														
 
															+
														
 
															+	veor		T1, T1, XH
														
 
															+	veor		XL, XL, T1
														
 
															+
														
 
															+	b		1b
														
 
															+	.endif
														
 
															+
														
 
															+2:	vld1.64		{T1}, [r2]!
														
 
															 	subs		r0, r0, #1
														
 
															-1:	/* multiply XL by SHASH in GF(2^128) */
														
 
															+3:	/* multiply XL by SHASH in GF(2^128) */
														
 
															 #ifndef CONFIG_CPU_BIG_ENDIAN
														
 
															 	vrev64.8	T1, T1
														
 
															 #endif
														
@@ -193,7 +285,7 @@
 
															 	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
														
 
															 	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
														
 
															-	veor		T1, XL, XH
														
 
															+4:	veor		T1, XL, XH
														
 
															 	veor		XM, XM, T1
														
 
															 	__pmull_reduce_\pn
														
@@ -212,8 +304,14 @@
 
															 	 *			   struct ghash_key const *k, const char *head)
														
 
															 	 */
														
 
															 ENTRY(pmull_ghash_update_p64)
														
 
															-	vld1.64		{SHASH}, [r3]
														
 
															+	vld1.64		{SHASH}, [r3]!
														
 
															+	vld1.64		{HH}, [r3]!
														
 
															+	vld1.64		{HH3-HH4}, [r3]
														
 
															+
														
 
															 	veor		SHASH2_p64, SHASH_L, SHASH_H
														
 
															+	veor		SHASH2_H, HH_L, HH_H
														
 
															+	veor		HH34_L, HH3_L, HH3_H
														
 
															+	veor		HH34_H, HH4_L, HH4_H
														
 
															 	vmov.i8		MASK, #0xe1
														
 
															 	vshl.u64	MASK, MASK, #57
														
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
														
 
															  *
														
 
															- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
														
 
															+ * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify it
														
 
															  * under the terms of the GNU General Public License version 2 as published
														
@@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash");
 
															 #define GHASH_DIGEST_SIZE	16
														
 
															 struct ghash_key {
														
 
															-	u64	a;
														
 
															-	u64	b;
														
 
															+	u64	h[2];
														
 
															+	u64	h2[2];
														
 
															+	u64	h3[2];
														
 
															+	u64	h4[2];
														
 
															 };
														
 
															 struct ghash_desc_ctx {
														
@@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
															 	return 0;
														
 
															 }
														
 
															+static void ghash_reflect(u64 h[], const be128 *k)
														
 
															+{
														
 
															+	u64 carry = be64_to_cpu(k->a) >> 63;
														
 
															+
														
 
															+	h[0] = (be64_to_cpu(k->b) << 1) | carry;
														
 
															+	h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
														
 
															+
														
 
															+	if (carry)
														
 
															+		h[1] ^= 0xc200000000000000UL;
														
 
															+}
														
 
															+
														
 
															 static int ghash_setkey(struct crypto_shash *tfm,
														
 
															 			const u8 *inkey, unsigned int keylen)
														
 
															 {
														
 
															 	struct ghash_key *key = crypto_shash_ctx(tfm);
														
 
															-	u64 a, b;
														
 
															+	be128 h, k;
														
 
															 	if (keylen != GHASH_BLOCK_SIZE) {
														
 
															 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
														
 
															 		return -EINVAL;
														
 
															 	}
														
 
															-	/* perform multiplication by 'x' in GF(2^128) */
														
 
															-	b = get_unaligned_be64(inkey);
														
 
															-	a = get_unaligned_be64(inkey + 8);
														
 
															+	memcpy(&k, inkey, GHASH_BLOCK_SIZE);
														
 
															+	ghash_reflect(key->h, &k);
														
 
															+
														
 
															+	h = k;
														
 
															+	gf128mul_lle(&h, &k);
														
 
															+	ghash_reflect(key->h2, &h);
														
 
															-	key->a = (a << 1) | (b >> 63);
														
 
															-	key->b = (b << 1) | (a >> 63);
														
 
															+	gf128mul_lle(&h, &k);
														
 
															+	ghash_reflect(key->h3, &h);
														
 
															-	if (b >> 63)
														
 
															-		key->b ^= 0xc200000000000000UL;
														
 
															+	gf128mul_lle(&h, &k);
														
 
															+	ghash_reflect(key->h4, &h);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/arm/crypto/speck-neon-core.S
+++ b/arch/arm/crypto/speck-neon-core.S
@@ -1,434 +0,0 @@
 
															-// SPDX-License-Identifier: GPL-2.0
														
 
															-/*
														
 
															- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
														
 
															- *
														
 
															- * Copyright (c) 2018 Google, Inc
														
 
															- *
														
 
															- * Author: Eric Biggers <ebiggers@google.com>
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-
														
 
															-	.text
														
 
															-	.fpu		neon
														
 
															-
														
 
															-	// arguments
														
 
															-	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
														
 
															-	NROUNDS		.req	r1	// int nrounds
														
 
															-	DST		.req	r2	// void *dst
														
 
															-	SRC		.req	r3	// const void *src
														
 
															-	NBYTES		.req	r4	// unsigned int nbytes
														
 
															-	TWEAK		.req	r5	// void *tweak
														
 
															-
														
 
															-	// registers which hold the data being encrypted/decrypted
														
 
															-	X0		.req	q0
														
 
															-	X0_L		.req	d0
														
 
															-	X0_H		.req	d1
														
 
															-	Y0		.req	q1
														
 
															-	Y0_H		.req	d3
														
 
															-	X1		.req	q2
														
 
															-	X1_L		.req	d4
														
 
															-	X1_H		.req	d5
														
 
															-	Y1		.req	q3
														
 
															-	Y1_H		.req	d7
														
 
															-	X2		.req	q4
														
 
															-	X2_L		.req	d8
														
 
															-	X2_H		.req	d9
														
 
															-	Y2		.req	q5
														
 
															-	Y2_H		.req	d11
														
 
															-	X3		.req	q6
														
 
															-	X3_L		.req	d12
														
 
															-	X3_H		.req	d13
														
 
															-	Y3		.req	q7
														
 
															-	Y3_H		.req	d15
														
 
															-
														
 
															-	// the round key, duplicated in all lanes
														
 
															-	ROUND_KEY	.req	q8
														
 
															-	ROUND_KEY_L	.req	d16
														
 
															-	ROUND_KEY_H	.req	d17
														
 
															-
														
 
															-	// index vector for vtbl-based 8-bit rotates
														
 
															-	ROTATE_TABLE	.req	d18
														
 
															-
														
 
															-	// multiplication table for updating XTS tweaks
														
 
															-	GF128MUL_TABLE	.req	d19
														
 
															-	GF64MUL_TABLE	.req	d19
														
 
															-
														
 
															-	// current XTS tweak value(s)
														
 
															-	TWEAKV		.req	q10
														
 
															-	TWEAKV_L	.req	d20
														
 
															-	TWEAKV_H	.req	d21
														
 
															-
														
 
															-	TMP0		.req	q12
														
 
															-	TMP0_L		.req	d24
														
 
															-	TMP0_H		.req	d25
														
 
															-	TMP1		.req	q13
														
 
															-	TMP2		.req	q14
														
 
															-	TMP3		.req	q15
														
 
															-
														
 
															-	.align		4
														
 
															-.Lror64_8_table:
														
 
															-	.byte		1, 2, 3, 4, 5, 6, 7, 0
														
 
															-.Lror32_8_table:
														
 
															-	.byte		1, 2, 3, 0, 5, 6, 7, 4
														
 
															-.Lrol64_8_table:
														
 
															-	.byte		7, 0, 1, 2, 3, 4, 5, 6
														
 
															-.Lrol32_8_table:
														
 
															-	.byte		3, 0, 1, 2, 7, 4, 5, 6
														
 
															-.Lgf128mul_table:
														
 
															-	.byte		0, 0x87
														
 
															-	.fill		14
														
 
															-.Lgf64mul_table:
														
 
															-	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
														
 
															-	.fill		12
														
 
															-
														
 
															-/*
														
 
															- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
														
 
															- *
														
 
															- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
														
 
															- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
														
 
															- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
														
 
															- *
														
 
															- * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
														
 
															- * the vtbl approach is faster on some processors and the same speed on others.
														
 
															- */
														
 
															-.macro _speck_round_128bytes	n
														
 
															-
														
 
															-	// x = ror(x, 8)
														
 
															-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
														
 
															-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
														
 
															-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
														
 
															-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
														
 
															-
														
 
															-	// x += y
														
 
															-	vadd.u\n	X0, Y0
														
 
															-	vadd.u\n	X1, Y1
														
 
															-	vadd.u\n	X2, Y2
														
 
															-	vadd.u\n	X3, Y3
														
 
															-
														
 
															-	// x ^= k
														
 
															-	veor		X0, ROUND_KEY
														
 
															-	veor		X1, ROUND_KEY
														
 
															-	veor		X2, ROUND_KEY
														
 
															-	veor		X3, ROUND_KEY
														
 
															-
														
 
															-	// y = rol(y, 3)
														
 
															-	vshl.u\n	TMP0, Y0, #3
														
 
															-	vshl.u\n	TMP1, Y1, #3
														
 
															-	vshl.u\n	TMP2, Y2, #3
														
 
															-	vshl.u\n	TMP3, Y3, #3
														
 
															-	vsri.u\n	TMP0, Y0, #(\n - 3)
														
 
															-	vsri.u\n	TMP1, Y1, #(\n - 3)
														
 
															-	vsri.u\n	TMP2, Y2, #(\n - 3)
														
 
															-	vsri.u\n	TMP3, Y3, #(\n - 3)
														
 
															-
														
 
															-	// y ^= x
														
 
															-	veor		Y0, TMP0, X0
														
 
															-	veor		Y1, TMP1, X1
														
 
															-	veor		Y2, TMP2, X2
														
 
															-	veor		Y3, TMP3, X3
														
 
															-.endm
														
 
															-
														
 
															-/*
														
 
															- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
														
 
															- *
														
 
															- * This is the inverse of _speck_round_128bytes().
														
 
															- */
														
 
															-.macro _speck_unround_128bytes	n
														
 
															-
														
 
															-	// y ^= x
														
 
															-	veor		TMP0, Y0, X0
														
 
															-	veor		TMP1, Y1, X1
														
 
															-	veor		TMP2, Y2, X2
														
 
															-	veor		TMP3, Y3, X3
														
 
															-
														
 
															-	// y = ror(y, 3)
														
 
															-	vshr.u\n	Y0, TMP0, #3
														
 
															-	vshr.u\n	Y1, TMP1, #3
														
 
															-	vshr.u\n	Y2, TMP2, #3
														
 
															-	vshr.u\n	Y3, TMP3, #3
														
 
															-	vsli.u\n	Y0, TMP0, #(\n - 3)
														
 
															-	vsli.u\n	Y1, TMP1, #(\n - 3)
														
 
															-	vsli.u\n	Y2, TMP2, #(\n - 3)
														
 
															-	vsli.u\n	Y3, TMP3, #(\n - 3)
														
 
															-
														
 
															-	// x ^= k
														
 
															-	veor		X0, ROUND_KEY
														
 
															-	veor		X1, ROUND_KEY
														
 
															-	veor		X2, ROUND_KEY
														
 
															-	veor		X3, ROUND_KEY
														
 
															-
														
 
															-	// x -= y
														
 
															-	vsub.u\n	X0, Y0
														
 
															-	vsub.u\n	X1, Y1
														
 
															-	vsub.u\n	X2, Y2
														
 
															-	vsub.u\n	X3, Y3
														
 
															-
														
 
															-	// x = rol(x, 8);
														
 
															-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
														
 
															-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
														
 
															-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
														
 
															-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
														
 
															-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
														
 
															-.endm
														
 
															-
														
 
															-.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
														
 
															-
														
 
															-	// Load the next source block
														
 
															-	vld1.8		{\dst_reg}, [SRC]!
														
 
															-
														
 
															-	// Save the current tweak in the tweak buffer
														
 
															-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
														
 
															-
														
 
															-	// XOR the next source block with the current tweak
														
 
															-	veor		\dst_reg, TWEAKV
														
 
															-
														
 
															-	/*
														
 
															-	 * Calculate the next tweak by multiplying the current one by x,
														
 
															-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
														
 
															-	 */
														
 
															-	vshr.u64	\tmp, TWEAKV, #63
														
 
															-	vshl.u64	TWEAKV, #1
														
 
															-	veor		TWEAKV_H, \tmp\()_L
														
 
															-	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
														
 
															-	veor		TWEAKV_L, \tmp\()_H
														
 
															-.endm
														
 
															-
														
 
															-.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
														
 
															-
														
 
															-	// Load the next two source blocks
														
 
															-	vld1.8		{\dst_reg}, [SRC]!
														
 
															-
														
 
															-	// Save the current two tweaks in the tweak buffer
														
 
															-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
														
 
															-
														
 
															-	// XOR the next two source blocks with the current two tweaks
														
 
															-	veor		\dst_reg, TWEAKV
														
 
															-
														
 
															-	/*
														
 
															-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
														
 
															-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
														
 
															-	 */
														
 
															-	vshr.u64	\tmp, TWEAKV, #62
														
 
															-	vshl.u64	TWEAKV, #2
														
 
															-	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
														
 
															-	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
														
 
															-	veor		TWEAKV, \tmp
														
 
															-.endm
														
 
															-
														
 
															-/*
														
 
															- * _speck_xts_crypt() - Speck-XTS encryption/decryption
														
 
															- *
														
 
															- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
														
 
															- * using Speck-XTS, specifically the variant with a block size of '2n' and round
														
 
															- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
														
 
															- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
														
 
															- * nonzero multiple of 128.
														
 
															- */
														
 
															-.macro _speck_xts_crypt	n, decrypting
														
 
															-	push		{r4-r7}
														
 
															-	mov		r7, sp
														
 
															-
														
 
															-	/*
														
 
															-	 * The first four parameters were passed in registers r0-r3.  Load the
														
 
															-	 * additional parameters, which were passed on the stack.
														
 
															-	 */
														
 
															-	ldr		NBYTES, [sp, #16]
														
 
															-	ldr		TWEAK, [sp, #20]
														
 
															-
														
 
															-	/*
														
 
															-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
														
 
															-	 * round key rather than the first, since for decryption the round keys
														
 
															-	 * are used in reverse order.
														
 
															-	 */
														
 
															-.if \decrypting
														
 
															-.if \n == 64
														
 
															-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
														
 
															-	sub		ROUND_KEYS, #8
														
 
															-.else
														
 
															-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
														
 
															-	sub		ROUND_KEYS, #4
														
 
															-.endif
														
 
															-.endif
														
 
															-
														
 
															-	// Load the index vector for vtbl-based 8-bit rotates
														
 
															-.if \decrypting
														
 
															-	ldr		r12, =.Lrol\n\()_8_table
														
 
															-.else
														
 
															-	ldr		r12, =.Lror\n\()_8_table
														
 
															-.endif
														
 
															-	vld1.8		{ROTATE_TABLE}, [r12:64]
														
 
															-
														
 
															-	// One-time XTS preparation
														
 
															-
														
 
															-	/*
														
 
															-	 * Allocate stack space to store 128 bytes worth of tweaks.  For
														
 
															-	 * performance, this space is aligned to a 16-byte boundary so that we
														
 
															-	 * can use the load/store instructions that declare 16-byte alignment.
														
 
															-	 * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
														
 
															-	 */
														
 
															-	sub		r12, sp, #128
														
 
															-	bic		r12, #0xf
														
 
															-	mov		sp, r12
														
 
															-
														
 
															-.if \n == 64
														
 
															-	// Load first tweak
														
 
															-	vld1.8		{TWEAKV}, [TWEAK]
														
 
															-
														
 
															-	// Load GF(2^128) multiplication table
														
 
															-	ldr		r12, =.Lgf128mul_table
														
 
															-	vld1.8		{GF128MUL_TABLE}, [r12:64]
														
 
															-.else
														
 
															-	// Load first tweak
														
 
															-	vld1.8		{TWEAKV_L}, [TWEAK]
														
 
															-
														
 
															-	// Load GF(2^64) multiplication table
														
 
															-	ldr		r12, =.Lgf64mul_table
														
 
															-	vld1.8		{GF64MUL_TABLE}, [r12:64]
														
 
															-
														
 
															-	// Calculate second tweak, packing it together with the first
														
 
															-	vshr.u64	TMP0_L, TWEAKV_L, #63
														
 
															-	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
														
 
															-	vshl.u64	TWEAKV_H, TWEAKV_L, #1
														
 
															-	veor		TWEAKV_H, TMP0_L
														
 
															-.endif
														
 
															-
														
 
															-.Lnext_128bytes_\@:
														
 
															-
														
 
															-	/*
														
 
															-	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
														
 
															-	 * values, and save the tweaks on the stack for later.  Then
														
 
															-	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
														
 
															-	 * that the X[0-3] registers contain only the second halves of blocks,
														
 
															-	 * and the Y[0-3] registers contain only the first halves of blocks.
														
 
															-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
														
 
															-	 */
														
 
															-	mov		r12, sp
														
 
															-.if \n == 64
														
 
															-	_xts128_precrypt_one	X0, r12, TMP0
														
 
															-	_xts128_precrypt_one	Y0, r12, TMP0
														
 
															-	_xts128_precrypt_one	X1, r12, TMP0
														
 
															-	_xts128_precrypt_one	Y1, r12, TMP0
														
 
															-	_xts128_precrypt_one	X2, r12, TMP0
														
 
															-	_xts128_precrypt_one	Y2, r12, TMP0
														
 
															-	_xts128_precrypt_one	X3, r12, TMP0
														
 
															-	_xts128_precrypt_one	Y3, r12, TMP0
														
 
															-	vswp		X0_L, Y0_H
														
 
															-	vswp		X1_L, Y1_H
														
 
															-	vswp		X2_L, Y2_H
														
 
															-	vswp		X3_L, Y3_H
														
 
															-.else
														
 
															-	_xts64_precrypt_two	X0, r12, TMP0
														
 
															-	_xts64_precrypt_two	Y0, r12, TMP0
														
 
															-	_xts64_precrypt_two	X1, r12, TMP0
														
 
															-	_xts64_precrypt_two	Y1, r12, TMP0
														
 
															-	_xts64_precrypt_two	X2, r12, TMP0
														
 
															-	_xts64_precrypt_two	Y2, r12, TMP0
														
 
															-	_xts64_precrypt_two	X3, r12, TMP0
														
 
															-	_xts64_precrypt_two	Y3, r12, TMP0
														
 
															-	vuzp.32		Y0, X0
														
 
															-	vuzp.32		Y1, X1
														
 
															-	vuzp.32		Y2, X2
														
 
															-	vuzp.32		Y3, X3
														
 
															-.endif
														
 
															-
														
 
															-	// Do the cipher rounds
														
 
															-
														
 
															-	mov		r12, ROUND_KEYS
														
 
															-	mov		r6, NROUNDS
														
 
															-
														
 
															-.Lnext_round_\@:
														
 
															-.if \decrypting
														
 
															-.if \n == 64
														
 
															-	vld1.64		ROUND_KEY_L, [r12]
														
 
															-	sub		r12, #8
														
 
															-	vmov		ROUND_KEY_H, ROUND_KEY_L
														
 
															-.else
														
 
															-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
														
 
															-	sub		r12, #4
														
 
															-.endif
														
 
															-	_speck_unround_128bytes	\n
														
 
															-.else
														
 
															-.if \n == 64
														
 
															-	vld1.64		ROUND_KEY_L, [r12]!
														
 
															-	vmov		ROUND_KEY_H, ROUND_KEY_L
														
 
															-.else
														
 
															-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
														
 
															-.endif
														
 
															-	_speck_round_128bytes	\n
														
 
															-.endif
														
 
															-	subs		r6, r6, #1
														
 
															-	bne		.Lnext_round_\@
														
 
															-
														
 
															-	// Re-interleave the 'x' and 'y' elements of each block
														
 
															-.if \n == 64
														
 
															-	vswp		X0_L, Y0_H
														
 
															-	vswp		X1_L, Y1_H
														
 
															-	vswp		X2_L, Y2_H
														
 
															-	vswp		X3_L, Y3_H
														
 
															-.else
														
 
															-	vzip.32		Y0, X0
														
 
															-	vzip.32		Y1, X1
														
 
															-	vzip.32		Y2, X2
														
 
															-	vzip.32		Y3, X3
														
 
															-.endif
														
 
															-
														
 
															-	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
														
 
															-	mov		r12, sp
														
 
															-	vld1.8		{TMP0, TMP1}, [r12:128]!
														
 
															-	vld1.8		{TMP2, TMP3}, [r12:128]!
														
 
															-	veor		X0, TMP0
														
 
															-	veor		Y0, TMP1
														
 
															-	veor		X1, TMP2
														
 
															-	veor		Y1, TMP3
														
 
															-	vld1.8		{TMP0, TMP1}, [r12:128]!
														
 
															-	vld1.8		{TMP2, TMP3}, [r12:128]!
														
 
															-	veor		X2, TMP0
														
 
															-	veor		Y2, TMP1
														
 
															-	veor		X3, TMP2
														
 
															-	veor		Y3, TMP3
														
 
															-
														
 
															-	// Store the ciphertext in the destination buffer
														
 
															-	vst1.8		{X0, Y0}, [DST]!
														
 
															-	vst1.8		{X1, Y1}, [DST]!
														
 
															-	vst1.8		{X2, Y2}, [DST]!
														
 
															-	vst1.8		{X3, Y3}, [DST]!
														
 
															-
														
 
															-	// Continue if there are more 128-byte chunks remaining, else return
														
 
															-	subs		NBYTES, #128
														
 
															-	bne		.Lnext_128bytes_\@
														
 
															-
														
 
															-	// Store the next tweak
														
 
															-.if \n == 64
														
 
															-	vst1.8		{TWEAKV}, [TWEAK]
														
 
															-.else
														
 
															-	vst1.8		{TWEAKV_L}, [TWEAK]
														
 
															-.endif
														
 
															-
														
 
															-	mov		sp, r7
														
 
															-	pop		{r4-r7}
														
 
															-	bx		lr
														
 
															-.endm
														
 
															-
														
 
															-ENTRY(speck128_xts_encrypt_neon)
														
 
															-	_speck_xts_crypt	n=64, decrypting=0
														
 
															-ENDPROC(speck128_xts_encrypt_neon)
														
 
															-
														
 
															-ENTRY(speck128_xts_decrypt_neon)
														
 
															-	_speck_xts_crypt	n=64, decrypting=1
														
 
															-ENDPROC(speck128_xts_decrypt_neon)
														
 
															-
														
 
															-ENTRY(speck64_xts_encrypt_neon)
														
 
															-	_speck_xts_crypt	n=32, decrypting=0
														
 
															-ENDPROC(speck64_xts_encrypt_neon)
														
 
															-
														
 
															-ENTRY(speck64_xts_decrypt_neon)
														
 
															-	_speck_xts_crypt	n=32, decrypting=1
														
 
															-ENDPROC(speck64_xts_decrypt_neon)
														
--- a/arch/arm/crypto/speck-neon-glue.c
+++ b/arch/arm/crypto/speck-neon-glue.c
@@ -1,288 +0,0 @@
 
															-// SPDX-License-Identifier: GPL-2.0
														
 
															-/*
														
 
															- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
														
 
															- *
														
 
															- * Copyright (c) 2018 Google, Inc
														
 
															- *
														
 
															- * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
														
 
															- * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
														
 
															- * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
														
 
															- * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
														
 
															- * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
														
 
															- * OCB and PMAC"), represented as 0x1B.
														
 
															- */
														
 
															-
														
 
															-#include <asm/hwcap.h>
														
 
															-#include <asm/neon.h>
														
 
															-#include <asm/simd.h>
														
 
															-#include <crypto/algapi.h>
														
 
															-#include <crypto/gf128mul.h>
														
 
															-#include <crypto/internal/skcipher.h>
														
 
															-#include <crypto/speck.h>
														
 
															-#include <crypto/xts.h>
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/module.h>
														
 
															-
														
 
															-/* The assembly functions only handle multiples of 128 bytes */
														
 
															-#define SPECK_NEON_CHUNK_SIZE	128
														
 
															-
														
 
															-/* Speck128 */
														
 
															-
														
 
															-struct speck128_xts_tfm_ctx {
														
 
															-	struct speck128_tfm_ctx main_key;
														
 
															-	struct speck128_tfm_ctx tweak_key;
														
 
															-};
														
 
															-
														
 
															-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
														
 
															-					  void *dst, const void *src,
														
 
															-					  unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
														
 
															-					  void *dst, const void *src,
														
 
															-					  unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
														
 
															-				     u8 *, const u8 *);
														
 
															-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
														
 
															-					  const void *, unsigned int, void *);
														
 
															-
														
 
															-static __always_inline int
														
 
															-__speck128_xts_crypt(struct skcipher_request *req,
														
 
															-		     speck128_crypt_one_t crypt_one,
														
 
															-		     speck128_xts_crypt_many_t crypt_many)
														
 
															-{
														
 
															-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct skcipher_walk walk;
														
 
															-	le128 tweak;
														
 
															-	int err;
														
 
															-
														
 
															-	err = skcipher_walk_virt(&walk, req, true);
														
 
															-
														
 
															-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
														
 
															-
														
 
															-	while (walk.nbytes > 0) {
														
 
															-		unsigned int nbytes = walk.nbytes;
														
 
															-		u8 *dst = walk.dst.virt.addr;
														
 
															-		const u8 *src = walk.src.virt.addr;
														
 
															-
														
 
															-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
														
 
															-			unsigned int count;
														
 
															-
														
 
															-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
														
 
															-			kernel_neon_begin();
														
 
															-			(*crypt_many)(ctx->main_key.round_keys,
														
 
															-				      ctx->main_key.nrounds,
														
 
															-				      dst, src, count, &tweak);
														
 
															-			kernel_neon_end();
														
 
															-			dst += count;
														
 
															-			src += count;
														
 
															-			nbytes -= count;
														
 
															-		}
														
 
															-
														
 
															-		/* Handle any remainder with generic code */
														
 
															-		while (nbytes >= sizeof(tweak)) {
														
 
															-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
														
 
															-			(*crypt_one)(&ctx->main_key, dst, dst);
														
 
															-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
														
 
															-			gf128mul_x_ble(&tweak, &tweak);
														
 
															-
														
 
															-			dst += sizeof(tweak);
														
 
															-			src += sizeof(tweak);
														
 
															-			nbytes -= sizeof(tweak);
														
 
															-		}
														
 
															-		err = skcipher_walk_done(&walk, nbytes);
														
 
															-	}
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int speck128_xts_encrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
														
 
															-				    speck128_xts_encrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck128_xts_decrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
														
 
															-				    speck128_xts_decrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
														
 
															-			       unsigned int keylen)
														
 
															-{
														
 
															-	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	int err;
														
 
															-
														
 
															-	err = xts_verify_key(tfm, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	keylen /= 2;
														
 
															-
														
 
															-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
														
 
															-}
														
 
															-
														
 
															-/* Speck64 */
														
 
															-
														
 
															-struct speck64_xts_tfm_ctx {
														
 
															-	struct speck64_tfm_ctx main_key;
														
 
															-	struct speck64_tfm_ctx tweak_key;
														
 
															-};
														
 
															-
														
 
															-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
														
 
															-					 void *dst, const void *src,
														
 
															-					 unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
														
 
															-					 void *dst, const void *src,
														
 
															-					 unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
														
 
															-				    u8 *, const u8 *);
														
 
															-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
														
 
															-					 const void *, unsigned int, void *);
														
 
															-
														
 
															-static __always_inline int
														
 
															-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
														
 
															-		    speck64_xts_crypt_many_t crypt_many)
														
 
															-{
														
 
															-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct skcipher_walk walk;
														
 
															-	__le64 tweak;
														
 
															-	int err;
														
 
															-
														
 
															-	err = skcipher_walk_virt(&walk, req, true);
														
 
															-
														
 
															-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
														
 
															-
														
 
															-	while (walk.nbytes > 0) {
														
 
															-		unsigned int nbytes = walk.nbytes;
														
 
															-		u8 *dst = walk.dst.virt.addr;
														
 
															-		const u8 *src = walk.src.virt.addr;
														
 
															-
														
 
															-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
														
 
															-			unsigned int count;
														
 
															-
														
 
															-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
														
 
															-			kernel_neon_begin();
														
 
															-			(*crypt_many)(ctx->main_key.round_keys,
														
 
															-				      ctx->main_key.nrounds,
														
 
															-				      dst, src, count, &tweak);
														
 
															-			kernel_neon_end();
														
 
															-			dst += count;
														
 
															-			src += count;
														
 
															-			nbytes -= count;
														
 
															-		}
														
 
															-
														
 
															-		/* Handle any remainder with generic code */
														
 
															-		while (nbytes >= sizeof(tweak)) {
														
 
															-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
														
 
															-			(*crypt_one)(&ctx->main_key, dst, dst);
														
 
															-			*(__le64 *)dst ^= tweak;
														
 
															-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
														
 
															-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
														
 
															-					     0x1B : 0));
														
 
															-			dst += sizeof(tweak);
														
 
															-			src += sizeof(tweak);
														
 
															-			nbytes -= sizeof(tweak);
														
 
															-		}
														
 
															-		err = skcipher_walk_done(&walk, nbytes);
														
 
															-	}
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int speck64_xts_encrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
														
 
															-				   speck64_xts_encrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck64_xts_decrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
														
 
															-				   speck64_xts_decrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
														
 
															-			      unsigned int keylen)
														
 
															-{
														
 
															-	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	int err;
														
 
															-
														
 
															-	err = xts_verify_key(tfm, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	keylen /= 2;
														
 
															-
														
 
															-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
														
 
															-}
														
 
															-
														
 
															-static struct skcipher_alg speck_algs[] = {
														
 
															-	{
														
 
															-		.base.cra_name		= "xts(speck128)",
														
 
															-		.base.cra_driver_name	= "xts-speck128-neon",
														
 
															-		.base.cra_priority	= 300,
														
 
															-		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
														
 
															-		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
														
 
															-		.base.cra_alignmask	= 7,
														
 
															-		.base.cra_module	= THIS_MODULE,
														
 
															-		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
														
 
															-		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
														
 
															-		.ivsize			= SPECK128_BLOCK_SIZE,
														
 
															-		.walksize		= SPECK_NEON_CHUNK_SIZE,
														
 
															-		.setkey			= speck128_xts_setkey,
														
 
															-		.encrypt		= speck128_xts_encrypt,
														
 
															-		.decrypt		= speck128_xts_decrypt,
														
 
															-	}, {
														
 
															-		.base.cra_name		= "xts(speck64)",
														
 
															-		.base.cra_driver_name	= "xts-speck64-neon",
														
 
															-		.base.cra_priority	= 300,
														
 
															-		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
														
 
															-		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
														
 
															-		.base.cra_alignmask	= 7,
														
 
															-		.base.cra_module	= THIS_MODULE,
														
 
															-		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
														
 
															-		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
														
 
															-		.ivsize			= SPECK64_BLOCK_SIZE,
														
 
															-		.walksize		= SPECK_NEON_CHUNK_SIZE,
														
 
															-		.setkey			= speck64_xts_setkey,
														
 
															-		.encrypt		= speck64_xts_encrypt,
														
 
															-		.decrypt		= speck64_xts_decrypt,
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-static int __init speck_neon_module_init(void)
														
 
															-{
														
 
															-	if (!(elf_hwcap & HWCAP_NEON))
														
 
															-		return -ENODEV;
														
 
															-	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
														
 
															-}
														
 
															-
														
 
															-static void __exit speck_neon_module_exit(void)
														
 
															-{
														
 
															-	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
														
 
															-}
														
 
															-
														
 
															-module_init(speck_neon_module_init);
														
 
															-module_exit(speck_neon_module_exit);
														
 
															-
														
 
															-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
														
 
															-MODULE_ALIAS_CRYPTO("xts(speck128)");
														
 
															-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
														
 
															-MODULE_ALIAS_CRYPTO("xts(speck64)");
														
 
															-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
														
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -698,6 +698,7 @@ CONFIG_MEMTEST=y
 
															 CONFIG_SECURITY=y
														
 
															 CONFIG_CRYPTO_ECHAINIV=y
														
 
															 CONFIG_CRYPTO_ANSI_CPRNG=y
														
 
															+CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=y
														
 
															 CONFIG_ARM64_CRYPTO=y
														
 
															 CONFIG_CRYPTO_SHA1_ARM64_CE=y
														
 
															 CONFIG_CRYPTO_SHA2_ARM64_CE=y
														
@@ -706,7 +707,6 @@ CONFIG_CRYPTO_SHA3_ARM64=m
 
															 CONFIG_CRYPTO_SM3_ARM64_CE=m
														
 
															 CONFIG_CRYPTO_GHASH_ARM64_CE=y
														
 
															 CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
														
 
															-CONFIG_CRYPTO_CRC32_ARM64_CE=m
														
 
															 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
														
 
															 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
														
 
															 CONFIG_CRYPTO_CHACHA20_NEON=m
														
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -66,11 +66,6 @@ config CRYPTO_CRCT10DIF_ARM64_CE
 
															 	depends on KERNEL_MODE_NEON && CRC_T10DIF
														
 
															 	select CRYPTO_HASH
														
 
															-config CRYPTO_CRC32_ARM64_CE
														
 
															-	tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
														
 
															-	depends on CRC32
														
 
															-	select CRYPTO_HASH
														
 
															-
														
 
															 config CRYPTO_AES_ARM64
														
 
															 	tristate "AES core cipher using scalar instructions"
														
 
															 	select CRYPTO_AES
														
@@ -119,10 +114,4 @@ config CRYPTO_AES_ARM64_BS
 
															 	select CRYPTO_AES_ARM64
														
 
															 	select CRYPTO_SIMD
														
 
															-config CRYPTO_SPECK_NEON
														
 
															-	tristate "NEON accelerated Speck cipher algorithms"
														
 
															-	depends on KERNEL_MODE_NEON
														
 
															-	select CRYPTO_BLKCIPHER
														
 
															-	select CRYPTO_SPECK
														
 
															-
														
 
															 endif
														
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,9 +32,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
															 obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
														
 
															 crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
														
 
															-obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
														
 
															-crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
														
 
															-
														
 
															 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
														
 
															 aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
														
@@ -56,9 +53,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 
															 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
														
 
															 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
														
 
															-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
														
 
															-speck-neon-y := speck-neon-core.o speck-neon-glue.o
														
 
															-
														
 
															 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
														
 
															 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
														
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -17,6 +17,11 @@
 
															 	.arch		armv8-a+crypto
														
 
															+	xtsmask		.req	v16
														
 
															+
														
 
															+	.macro		xts_reload_mask, tmp
														
 
															+	.endm
														
 
															+
														
 
															 	/* preload all round keys */
														
 
															 	.macro		load_round_keys, rounds, rk
														
 
															 	cmp		\rounds, #12
														
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
 
															 #include <crypto/internal/hash.h>
														
 
															 #include <crypto/internal/simd.h>
														
 
															 #include <crypto/internal/skcipher.h>
														
 
															+#include <crypto/scatterwalk.h>
														
 
															 #include <linux/module.h>
														
 
															 #include <linux/cpufeature.h>
														
 
															 #include <crypto/xts.h>
														
@@ -31,6 +32,8 @@
 
															 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
														
 
															 #define aes_cbc_encrypt		ce_aes_cbc_encrypt
														
 
															 #define aes_cbc_decrypt		ce_aes_cbc_decrypt
														
 
															+#define aes_cbc_cts_encrypt	ce_aes_cbc_cts_encrypt
														
 
															+#define aes_cbc_cts_decrypt	ce_aes_cbc_cts_decrypt
														
 
															 #define aes_ctr_encrypt		ce_aes_ctr_encrypt
														
 
															 #define aes_xts_encrypt		ce_aes_xts_encrypt
														
 
															 #define aes_xts_decrypt		ce_aes_xts_decrypt
														
@@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 
															 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
														
 
															 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
														
 
															 #define aes_cbc_decrypt		neon_aes_cbc_decrypt
														
 
															+#define aes_cbc_cts_encrypt	neon_aes_cbc_cts_encrypt
														
 
															+#define aes_cbc_cts_decrypt	neon_aes_cbc_cts_decrypt
														
 
															 #define aes_ctr_encrypt		neon_aes_ctr_encrypt
														
 
															 #define aes_xts_encrypt		neon_aes_xts_encrypt
														
 
															 #define aes_xts_decrypt		neon_aes_xts_decrypt
														
@@ -63,30 +68,41 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 
															 MODULE_LICENSE("GPL v2");
														
 
															 /* defined in aes-modes.S */
														
 
															-asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
														
 
															+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															 				int rounds, int blocks);
														
 
															-asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
														
 
															+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															 				int rounds, int blocks);
														
 
															-asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
														
 
															+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															 				int rounds, int blocks, u8 iv[]);
														
 
															-asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
														
 
															+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															 				int rounds, int blocks, u8 iv[]);
														
 
															-asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
														
 
															+asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															+				int rounds, int bytes, u8 const iv[]);
														
 
															+asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															+				int rounds, int bytes, u8 const iv[]);
														
 
															+
														
 
															+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															 				int rounds, int blocks, u8 ctr[]);
														
 
															-asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
														
 
															-				int rounds, int blocks, u8 const rk2[], u8 iv[],
														
 
															+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
														
 
															+				int rounds, int blocks, u32 const rk2[], u8 iv[],
														
 
															 				int first);
														
 
															-asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
														
 
															-				int rounds, int blocks, u8 const rk2[], u8 iv[],
														
 
															+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
														
 
															+				int rounds, int blocks, u32 const rk2[], u8 iv[],
														
 
															 				int first);
														
 
															 asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
														
 
															 			       int blocks, u8 dg[], int enc_before,
														
 
															 			       int enc_after);
														
 
															+struct cts_cbc_req_ctx {
														
 
															+	struct scatterlist sg_src[2];
														
 
															+	struct scatterlist sg_dst[2];
														
 
															+	struct skcipher_request subreq;
														
 
															+};
														
 
															+
														
 
															 struct crypto_aes_xts_ctx {
														
 
															 	struct crypto_aes_ctx key1;
														
 
															 	struct crypto_aes_ctx __aligned(8) key2;
														
@@ -142,7 +158,7 @@ static int ecb_encrypt(struct skcipher_request *req)
 
															 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key_enc, rounds, blocks);
														
 
															+				ctx->key_enc, rounds, blocks);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
@@ -162,7 +178,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
															 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key_dec, rounds, blocks);
														
 
															+				ctx->key_dec, rounds, blocks);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
@@ -182,7 +198,7 @@ static int cbc_encrypt(struct skcipher_request *req)
 
															 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
														
 
															+				ctx->key_enc, rounds, blocks, walk.iv);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
@@ -202,13 +218,149 @@ static int cbc_decrypt(struct skcipher_request *req)
 
															 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
														
 
															+				ctx->key_dec, rounds, blocks, walk.iv);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
 
															 	return err;
														
 
															 }
														
 
															+static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
														
 
															+{
														
 
															+	crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int cts_cbc_encrypt(struct skcipher_request *req)
														
 
															+{
														
 
															+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															+	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
														
 
															+	int err, rounds = 6 + ctx->key_length / 4;
														
 
															+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
														
 
															+	struct scatterlist *src = req->src, *dst = req->dst;
														
 
															+	struct skcipher_walk walk;
														
 
															+
														
 
															+	skcipher_request_set_tfm(&rctx->subreq, tfm);
														
 
															+
														
 
															+	if (req->cryptlen <= AES_BLOCK_SIZE) {
														
 
															+		if (req->cryptlen < AES_BLOCK_SIZE)
														
 
															+			return -EINVAL;
														
 
															+		cbc_blocks = 1;
														
 
															+	}
														
 
															+
														
 
															+	if (cbc_blocks > 0) {
														
 
															+		unsigned int blocks;
														
 
															+
														
 
															+		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
														
 
															+					   cbc_blocks * AES_BLOCK_SIZE,
														
 
															+					   req->iv);
														
 
															+
														
 
															+		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
														
 
															+
														
 
															+		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															+			kernel_neon_begin();
														
 
															+			aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															+					ctx->key_enc, rounds, blocks, walk.iv);
														
 
															+			kernel_neon_end();
														
 
															+			err = skcipher_walk_done(&walk,
														
 
															+						 walk.nbytes % AES_BLOCK_SIZE);
														
 
															+		}
														
 
															+		if (err)
														
 
															+			return err;
														
 
															+
														
 
															+		if (req->cryptlen == AES_BLOCK_SIZE)
														
 
															+			return 0;
														
 
															+
														
 
															+		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
														
 
															+					     rctx->subreq.cryptlen);
														
 
															+		if (req->dst != req->src)
														
 
															+			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
														
 
															+					       rctx->subreq.cryptlen);
														
 
															+	}
														
 
															+
														
 
															+	/* handle ciphertext stealing */
														
 
															+	skcipher_request_set_crypt(&rctx->subreq, src, dst,
														
 
															+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
														
 
															+				   req->iv);
														
 
															+
														
 
															+	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	kernel_neon_begin();
														
 
															+	aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															+			    ctx->key_enc, rounds, walk.nbytes, walk.iv);
														
 
															+	kernel_neon_end();
														
 
															+
														
 
															+	return skcipher_walk_done(&walk, 0);
														
 
															+}
														
 
															+
														
 
															+static int cts_cbc_decrypt(struct skcipher_request *req)
														
 
															+{
														
 
															+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															+	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
														
 
															+	int err, rounds = 6 + ctx->key_length / 4;
														
 
															+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
														
 
															+	struct scatterlist *src = req->src, *dst = req->dst;
														
 
															+	struct skcipher_walk walk;
														
 
															+
														
 
															+	skcipher_request_set_tfm(&rctx->subreq, tfm);
														
 
															+
														
 
															+	if (req->cryptlen <= AES_BLOCK_SIZE) {
														
 
															+		if (req->cryptlen < AES_BLOCK_SIZE)
														
 
															+			return -EINVAL;
														
 
															+		cbc_blocks = 1;
														
 
															+	}
														
 
															+
														
 
															+	if (cbc_blocks > 0) {
														
 
															+		unsigned int blocks;
														
 
															+
														
 
															+		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
														
 
															+					   cbc_blocks * AES_BLOCK_SIZE,
														
 
															+					   req->iv);
														
 
															+
														
 
															+		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
														
 
															+
														
 
															+		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															+			kernel_neon_begin();
														
 
															+			aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															+					ctx->key_dec, rounds, blocks, walk.iv);
														
 
															+			kernel_neon_end();
														
 
															+			err = skcipher_walk_done(&walk,
														
 
															+						 walk.nbytes % AES_BLOCK_SIZE);
														
 
															+		}
														
 
															+		if (err)
														
 
															+			return err;
														
 
															+
														
 
															+		if (req->cryptlen == AES_BLOCK_SIZE)
														
 
															+			return 0;
														
 
															+
														
 
															+		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
														
 
															+					     rctx->subreq.cryptlen);
														
 
															+		if (req->dst != req->src)
														
 
															+			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
														
 
															+					       rctx->subreq.cryptlen);
														
 
															+	}
														
 
															+
														
 
															+	/* handle ciphertext stealing */
														
 
															+	skcipher_request_set_crypt(&rctx->subreq, src, dst,
														
 
															+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
														
 
															+				   req->iv);
														
 
															+
														
 
															+	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	kernel_neon_begin();
														
 
															+	aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															+			    ctx->key_dec, rounds, walk.nbytes, walk.iv);
														
 
															+	kernel_neon_end();
														
 
															+
														
 
															+	return skcipher_walk_done(&walk, 0);
														
 
															+}
														
 
															+
														
 
															 static int ctr_encrypt(struct skcipher_request *req)
														
 
															 {
														
 
															 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
@@ -222,7 +374,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
															 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
														
 
															+				ctx->key_enc, rounds, blocks, walk.iv);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
@@ -238,7 +390,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
															 		blocks = -1;
														
 
															 		kernel_neon_begin();
														
 
															-		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
														
 
															+		aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
														
 
															 				blocks, walk.iv);
														
 
															 		kernel_neon_end();
														
 
															 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
														
@@ -272,8 +424,8 @@ static int xts_encrypt(struct skcipher_request *req)
 
															 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key1.key_enc, rounds, blocks,
														
 
															-				(u8 *)ctx->key2.key_enc, walk.iv, first);
														
 
															+				ctx->key1.key_enc, rounds, blocks,
														
 
															+				ctx->key2.key_enc, walk.iv, first);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
@@ -294,8 +446,8 @@ static int xts_decrypt(struct skcipher_request *req)
 
															 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
														
 
															 		kernel_neon_begin();
														
 
															 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
														
 
															-				(u8 *)ctx->key1.key_dec, rounds, blocks,
														
 
															-				(u8 *)ctx->key2.key_enc, walk.iv, first);
														
 
															+				ctx->key1.key_dec, rounds, blocks,
														
 
															+				ctx->key2.key_enc, walk.iv, first);
														
 
															 		kernel_neon_end();
														
 
															 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
														
 
															 	}
														
@@ -334,6 +486,24 @@ static struct skcipher_alg aes_algs[] = { {
 
															 	.setkey		= skcipher_aes_setkey,
														
 
															 	.encrypt	= cbc_encrypt,
														
 
															 	.decrypt	= cbc_decrypt,
														
 
															+}, {
														
 
															+	.base = {
														
 
															+		.cra_name		= "__cts(cbc(aes))",
														
 
															+		.cra_driver_name	= "__cts-cbc-aes-" MODE,
														
 
															+		.cra_priority		= PRIO,
														
 
															+		.cra_flags		= CRYPTO_ALG_INTERNAL,
														
 
															+		.cra_blocksize		= AES_BLOCK_SIZE,
														
 
															+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
														
 
															+		.cra_module		= THIS_MODULE,
														
 
															+	},
														
 
															+	.min_keysize	= AES_MIN_KEY_SIZE,
														
 
															+	.max_keysize	= AES_MAX_KEY_SIZE,
														
 
															+	.ivsize		= AES_BLOCK_SIZE,
														
 
															+	.walksize	= 2 * AES_BLOCK_SIZE,
														
 
															+	.setkey		= skcipher_aes_setkey,
														
 
															+	.encrypt	= cts_cbc_encrypt,
														
 
															+	.decrypt	= cts_cbc_decrypt,
														
 
															+	.init		= cts_cbc_init_tfm,
														
 
															 }, {
														
 
															 	.base = {
														
 
															 		.cra_name		= "__ctr(aes)",
														
@@ -412,7 +582,6 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
															 {
														
 
															 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
														
 
															 	be128 *consts = (be128 *)ctx->consts;
														
 
															-	u8 *rk = (u8 *)ctx->key.key_enc;
														
 
															 	int rounds = 6 + key_len / 4;
														
 
															 	int err;
														
@@ -422,7 +591,8 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
															 	/* encrypt the zero vector */
														
 
															 	kernel_neon_begin();
														
 
															-	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
														
 
															+	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
														
 
															+			rounds, 1);
														
 
															 	kernel_neon_end();
														
 
															 	cmac_gf128_mul_by_x(consts, consts);
														
@@ -441,7 +611,6 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
															 	};
														
 
															 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
														
 
															-	u8 *rk = (u8 *)ctx->key.key_enc;
														
 
															 	int rounds = 6 + key_len / 4;
														
 
															 	u8 key[AES_BLOCK_SIZE];
														
 
															 	int err;
														
@@ -451,8 +620,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
															 		return err;
														
 
															 	kernel_neon_begin();
														
 
															-	aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
														
 
															-	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
														
 
															+	aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
														
 
															+	aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
														
 
															 	kernel_neon_end();
														
 
															 	return cbcmac_setkey(tfm, key, sizeof(key));
														
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
 
															 	.align		4
														
 
															 aes_encrypt_block4x:
														
 
															-	encrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
														
 
															+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
														
 
															 	ret
														
 
															 ENDPROC(aes_encrypt_block4x)
														
 
															 aes_decrypt_block4x:
														
 
															-	decrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
														
 
															+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
														
 
															 	ret
														
 
															 ENDPROC(aes_decrypt_block4x)
														
@@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
 
															 	 */
														
 
															 AES_ENTRY(aes_ecb_encrypt)
														
 
															-	frame_push	5
														
 
															+	stp		x29, x30, [sp, #-16]!
														
 
															+	mov		x29, sp
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-
														
 
															-.Lecbencrestart:
														
 
															-	enc_prepare	w22, x21, x5
														
 
															+	enc_prepare	w3, x2, x5
														
 
															 .LecbencloopNx:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lecbenc1x
														
 
															-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
														
 
															+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
														
 
															 	bl		aes_encrypt_block4x
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															-	cond_yield_neon	.Lecbencrestart
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	b		.LecbencloopNx
														
 
															 .Lecbenc1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lecbencout
														
 
															 .Lecbencloop:
														
 
															-	ld1		{v0.16b}, [x20], #16		/* get next pt block */
														
 
															-	encrypt_block	v0, w22, x21, x5, w6
														
 
															-	st1		{v0.16b}, [x19], #16
														
 
															-	subs		w23, w23, #1
														
 
															+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
														
 
															+	encrypt_block	v0, w3, x2, x5, w6
														
 
															+	st1		{v0.16b}, [x0], #16
														
 
															+	subs		w4, w4, #1
														
 
															 	bne		.Lecbencloop
														
 
															 .Lecbencout:
														
 
															-	frame_pop
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															 	ret
														
 
															 AES_ENDPROC(aes_ecb_encrypt)
														
 
															 AES_ENTRY(aes_ecb_decrypt)
														
 
															-	frame_push	5
														
 
															+	stp		x29, x30, [sp, #-16]!
														
 
															+	mov		x29, sp
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-
														
 
															-.Lecbdecrestart:
														
 
															-	dec_prepare	w22, x21, x5
														
 
															+	dec_prepare	w3, x2, x5
														
 
															 .LecbdecloopNx:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lecbdec1x
														
 
															-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
														
 
															+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
														
 
															 	bl		aes_decrypt_block4x
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															-	cond_yield_neon	.Lecbdecrestart
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	b		.LecbdecloopNx
														
 
															 .Lecbdec1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lecbdecout
														
 
															 .Lecbdecloop:
														
 
															-	ld1		{v0.16b}, [x20], #16		/* get next ct block */
														
 
															-	decrypt_block	v0, w22, x21, x5, w6
														
 
															-	st1		{v0.16b}, [x19], #16
														
 
															-	subs		w23, w23, #1
														
 
															+	ld1		{v0.16b}, [x1], #16		/* get next ct block */
														
 
															+	decrypt_block	v0, w3, x2, x5, w6
														
 
															+	st1		{v0.16b}, [x0], #16
														
 
															+	subs		w4, w4, #1
														
 
															 	bne		.Lecbdecloop
														
 
															 .Lecbdecout:
														
 
															-	frame_pop
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															 	ret
														
 
															 AES_ENDPROC(aes_ecb_decrypt)
														
@@ -108,162 +94,211 @@ AES_ENDPROC(aes_ecb_decrypt)
 
															 	 */
														
 
															 AES_ENTRY(aes_cbc_encrypt)
														
 
															-	frame_push	6
														
 
															-
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-	mov		x24, x5
														
 
															-
														
 
															-.Lcbcencrestart:
														
 
															-	ld1		{v4.16b}, [x24]			/* get iv */
														
 
															-	enc_prepare	w22, x21, x6
														
 
															+	ld1		{v4.16b}, [x5]			/* get iv */
														
 
															+	enc_prepare	w3, x2, x6
														
 
															 .Lcbcencloop4x:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lcbcenc1x
														
 
															-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
														
 
															+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
														
 
															 	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
														
 
															-	encrypt_block	v0, w22, x21, x6, w7
														
 
															+	encrypt_block	v0, w3, x2, x6, w7
														
 
															 	eor		v1.16b, v1.16b, v0.16b
														
 
															-	encrypt_block	v1, w22, x21, x6, w7
														
 
															+	encrypt_block	v1, w3, x2, x6, w7
														
 
															 	eor		v2.16b, v2.16b, v1.16b
														
 
															-	encrypt_block	v2, w22, x21, x6, w7
														
 
															+	encrypt_block	v2, w3, x2, x6, w7
														
 
															 	eor		v3.16b, v3.16b, v2.16b
														
 
															-	encrypt_block	v3, w22, x21, x6, w7
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															+	encrypt_block	v3, w3, x2, x6, w7
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	mov		v4.16b, v3.16b
														
 
															-	st1		{v4.16b}, [x24]			/* return iv */
														
 
															-	cond_yield_neon	.Lcbcencrestart
														
 
															 	b		.Lcbcencloop4x
														
 
															 .Lcbcenc1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lcbcencout
														
 
															 .Lcbcencloop:
														
 
															-	ld1		{v0.16b}, [x20], #16		/* get next pt block */
														
 
															+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
														
 
															 	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
														
 
															-	encrypt_block	v4, w22, x21, x6, w7
														
 
															-	st1		{v4.16b}, [x19], #16
														
 
															-	subs		w23, w23, #1
														
 
															+	encrypt_block	v4, w3, x2, x6, w7
														
 
															+	st1		{v4.16b}, [x0], #16
														
 
															+	subs		w4, w4, #1
														
 
															 	bne		.Lcbcencloop
														
 
															 .Lcbcencout:
														
 
															-	st1		{v4.16b}, [x24]			/* return iv */
														
 
															-	frame_pop
														
 
															+	st1		{v4.16b}, [x5]			/* return iv */
														
 
															 	ret
														
 
															 AES_ENDPROC(aes_cbc_encrypt)
														
 
															 AES_ENTRY(aes_cbc_decrypt)
														
 
															-	frame_push	6
														
 
															-
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-	mov		x24, x5
														
 
															+	stp		x29, x30, [sp, #-16]!
														
 
															+	mov		x29, sp
														
 
															-.Lcbcdecrestart:
														
 
															-	ld1		{v7.16b}, [x24]			/* get iv */
														
 
															-	dec_prepare	w22, x21, x6
														
 
															+	ld1		{v7.16b}, [x5]			/* get iv */
														
 
															+	dec_prepare	w3, x2, x6
														
 
															 .LcbcdecloopNx:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lcbcdec1x
														
 
															-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
														
 
															+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
														
 
															 	mov		v4.16b, v0.16b
														
 
															 	mov		v5.16b, v1.16b
														
 
															 	mov		v6.16b, v2.16b
														
 
															 	bl		aes_decrypt_block4x
														
 
															-	sub		x20, x20, #16
														
 
															+	sub		x1, x1, #16
														
 
															 	eor		v0.16b, v0.16b, v7.16b
														
 
															 	eor		v1.16b, v1.16b, v4.16b
														
 
															-	ld1		{v7.16b}, [x20], #16		/* reload 1 ct block */
														
 
															+	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
														
 
															 	eor		v2.16b, v2.16b, v5.16b
														
 
															 	eor		v3.16b, v3.16b, v6.16b
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															-	st1		{v7.16b}, [x24]			/* return iv */
														
 
															-	cond_yield_neon	.Lcbcdecrestart
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	b		.LcbcdecloopNx
														
 
															 .Lcbcdec1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lcbcdecout
														
 
															 .Lcbcdecloop:
														
 
															-	ld1		{v1.16b}, [x20], #16		/* get next ct block */
														
 
															+	ld1		{v1.16b}, [x1], #16		/* get next ct block */
														
 
															 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
														
 
															-	decrypt_block	v0, w22, x21, x6, w7
														
 
															+	decrypt_block	v0, w3, x2, x6, w7
														
 
															 	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
														
 
															 	mov		v7.16b, v1.16b			/* ct is next iv */
														
 
															-	st1		{v0.16b}, [x19], #16
														
 
															-	subs		w23, w23, #1
														
 
															+	st1		{v0.16b}, [x0], #16
														
 
															+	subs		w4, w4, #1
														
 
															 	bne		.Lcbcdecloop
														
 
															 .Lcbcdecout:
														
 
															-	st1		{v7.16b}, [x24]			/* return iv */
														
 
															-	frame_pop
														
 
															+	st1		{v7.16b}, [x5]			/* return iv */
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															 	ret
														
 
															 AES_ENDPROC(aes_cbc_decrypt)
														
 
															+	/*
														
 
															+	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															+	 *		       int rounds, int bytes, u8 const iv[])
														
 
															+	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
														
 
															+	 *		       int rounds, int bytes, u8 const iv[])
														
 
															+	 */
														
 
															+
														
 
															+AES_ENTRY(aes_cbc_cts_encrypt)
														
 
															+	adr_l		x8, .Lcts_permute_table
														
 
															+	sub		x4, x4, #16
														
 
															+	add		x9, x8, #32
														
 
															+	add		x8, x8, x4
														
 
															+	sub		x9, x9, x4
														
 
															+	ld1		{v3.16b}, [x8]
														
 
															+	ld1		{v4.16b}, [x9]
														
 
															+
														
 
															+	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
														
 
															+	ld1		{v1.16b}, [x1]
														
 
															+
														
 
															+	ld1		{v5.16b}, [x5]			/* get iv */
														
 
															+	enc_prepare	w3, x2, x6
														
 
															+
														
 
															+	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
														
 
															+	tbl		v1.16b, {v1.16b}, v4.16b
														
 
															+	encrypt_block	v0, w3, x2, x6, w7
														
 
															+
														
 
															+	eor		v1.16b, v1.16b, v0.16b
														
 
															+	tbl		v0.16b, {v0.16b}, v3.16b
														
 
															+	encrypt_block	v1, w3, x2, x6, w7
														
 
															+
														
 
															+	add		x4, x0, x4
														
 
															+	st1		{v0.16b}, [x4]			/* overlapping stores */
														
 
															+	st1		{v1.16b}, [x0]
														
 
															+	ret
														
 
															+AES_ENDPROC(aes_cbc_cts_encrypt)
														
 
															+
														
 
															+AES_ENTRY(aes_cbc_cts_decrypt)
														
 
															+	adr_l		x8, .Lcts_permute_table
														
 
															+	sub		x4, x4, #16
														
 
															+	add		x9, x8, #32
														
 
															+	add		x8, x8, x4
														
 
															+	sub		x9, x9, x4
														
 
															+	ld1		{v3.16b}, [x8]
														
 
															+	ld1		{v4.16b}, [x9]
														
 
															+
														
 
															+	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
														
 
															+	ld1		{v1.16b}, [x1]
														
 
															+
														
 
															+	ld1		{v5.16b}, [x5]			/* get iv */
														
 
															+	dec_prepare	w3, x2, x6
														
 
															+
														
 
															+	tbl		v2.16b, {v1.16b}, v4.16b
														
 
															+	decrypt_block	v0, w3, x2, x6, w7
														
 
															+	eor		v2.16b, v2.16b, v0.16b
														
 
															+
														
 
															+	tbx		v0.16b, {v1.16b}, v4.16b
														
 
															+	tbl		v2.16b, {v2.16b}, v3.16b
														
 
															+	decrypt_block	v0, w3, x2, x6, w7
														
 
															+	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
														
 
															+
														
 
															+	add		x4, x0, x4
														
 
															+	st1		{v2.16b}, [x4]			/* overlapping stores */
														
 
															+	st1		{v0.16b}, [x0]
														
 
															+	ret
														
 
															+AES_ENDPROC(aes_cbc_cts_decrypt)
														
 
															+
														
 
															+	.section	".rodata", "a"
														
 
															+	.align		6
														
 
															+.Lcts_permute_table:
														
 
															+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
														
 
															+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
														
 
															+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
														
 
															+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
														
 
															+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
														
 
															+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
														
 
															+	.previous
														
 
															+
														
 
															+
														
 
															 	/*
														
 
															 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
														
 
															 	 *		   int blocks, u8 ctr[])
														
 
															 	 */
														
 
															 AES_ENTRY(aes_ctr_encrypt)
														
 
															-	frame_push	6
														
 
															+	stp		x29, x30, [sp, #-16]!
														
 
															+	mov		x29, sp
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-	mov		x24, x5
														
 
															-
														
 
															-.Lctrrestart:
														
 
															-	enc_prepare	w22, x21, x6
														
 
															-	ld1		{v4.16b}, [x24]
														
 
															+	enc_prepare	w3, x2, x6
														
 
															+	ld1		{v4.16b}, [x5]
														
 
															 	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
														
 
															 	rev		x6, x6
														
 
															+	cmn		w6, w4			/* 32 bit overflow? */
														
 
															+	bcs		.Lctrloop
														
 
															 .LctrloopNx:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lctr1x
														
 
															-	cmn		w6, #4			/* 32 bit overflow? */
														
 
															-	bcs		.Lctr1x
														
 
															-	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
														
 
															-	dup		v7.4s, w6
														
 
															+	add		w7, w6, #1
														
 
															 	mov		v0.16b, v4.16b
														
 
															-	add		v7.4s, v7.4s, v8.4s
														
 
															+	add		w8, w6, #2
														
 
															 	mov		v1.16b, v4.16b
														
 
															-	rev32		v8.16b, v7.16b
														
 
															+	add		w9, w6, #3
														
 
															 	mov		v2.16b, v4.16b
														
 
															+	rev		w7, w7
														
 
															 	mov		v3.16b, v4.16b
														
 
															-	mov		v1.s[3], v8.s[0]
														
 
															-	mov		v2.s[3], v8.s[1]
														
 
															-	mov		v3.s[3], v8.s[2]
														
 
															-	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
														
 
															+	rev		w8, w8
														
 
															+	mov		v1.s[3], w7
														
 
															+	rev		w9, w9
														
 
															+	mov		v2.s[3], w8
														
 
															+	mov		v3.s[3], w9
														
 
															+	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
														
 
															 	bl		aes_encrypt_block4x
														
 
															 	eor		v0.16b, v5.16b, v0.16b
														
 
															-	ld1		{v5.16b}, [x20], #16		/* get 1 input block  */
														
 
															+	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
														
 
															 	eor		v1.16b, v6.16b, v1.16b
														
 
															 	eor		v2.16b, v7.16b, v2.16b
														
 
															 	eor		v3.16b, v5.16b, v3.16b
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	add		x6, x6, #4
														
 
															 	rev		x7, x6
														
 
															 	ins		v4.d[1], x7
														
 
															-	cbz		w23, .Lctrout
														
 
															-	st1		{v4.16b}, [x24]		/* return next CTR value */
														
 
															-	cond_yield_neon	.Lctrrestart
														
 
															+	cbz		w4, .Lctrout
														
 
															 	b		.LctrloopNx
														
 
															 .Lctr1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lctrout
														
 
															 .Lctrloop:
														
 
															 	mov		v0.16b, v4.16b
														
 
															-	encrypt_block	v0, w22, x21, x8, w7
														
 
															+	encrypt_block	v0, w3, x2, x8, w7
														
 
															 	adds		x6, x6, #1		/* increment BE ctr */
														
 
															 	rev		x7, x6
														
@@ -271,22 +306,22 @@ AES_ENTRY(aes_ctr_encrypt)
 
															 	bcs		.Lctrcarry		/* overflow? */
														
 
															 .Lctrcarrydone:
														
 
															-	subs		w23, w23, #1
														
 
															+	subs		w4, w4, #1
														
 
															 	bmi		.Lctrtailblock		/* blocks <0 means tail block */
														
 
															-	ld1		{v3.16b}, [x20], #16
														
 
															+	ld1		{v3.16b}, [x1], #16
														
 
															 	eor		v3.16b, v0.16b, v3.16b
														
 
															-	st1		{v3.16b}, [x19], #16
														
 
															+	st1		{v3.16b}, [x0], #16
														
 
															 	bne		.Lctrloop
														
 
															 .Lctrout:
														
 
															-	st1		{v4.16b}, [x24]		/* return next CTR value */
														
 
															-.Lctrret:
														
 
															-	frame_pop
														
 
															+	st1		{v4.16b}, [x5]		/* return next CTR value */
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															 	ret
														
 
															 .Lctrtailblock:
														
 
															-	st1		{v0.16b}, [x19]
														
 
															-	b		.Lctrret
														
 
															+	st1		{v0.16b}, [x0]
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															+	ret
														
 
															 .Lctrcarry:
														
 
															 	umov		x7, v4.d[0]		/* load upper word of ctr  */
														
@@ -296,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
 
															 	ins		v4.d[0], x7
														
 
															 	b		.Lctrcarrydone
														
 
															 AES_ENDPROC(aes_ctr_encrypt)
														
 
															-	.ltorg
														
 
															 	/*
														
@@ -306,150 +340,132 @@ AES_ENDPROC(aes_ctr_encrypt)
 
															 	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
														
 
															 	 */
														
 
															-	.macro		next_tweak, out, in, const, tmp
														
 
															+	.macro		next_tweak, out, in, tmp
														
 
															 	sshr		\tmp\().2d,  \in\().2d,   #63
														
 
															-	and		\tmp\().16b, \tmp\().16b, \const\().16b
														
 
															+	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
														
 
															 	add		\out\().2d,  \in\().2d,   \in\().2d
														
 
															 	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
														
 
															 	eor		\out\().16b, \out\().16b, \tmp\().16b
														
 
															 	.endm
														
 
															-.Lxts_mul_x:
														
 
															-CPU_LE(	.quad		1, 0x87		)
														
 
															-CPU_BE(	.quad		0x87, 1		)
														
 
															+	.macro		xts_load_mask, tmp
														
 
															+	movi		xtsmask.2s, #0x1
														
 
															+	movi		\tmp\().2s, #0x87
														
 
															+	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
														
 
															+	.endm
														
 
															 AES_ENTRY(aes_xts_encrypt)
														
 
															-	frame_push	6
														
 
															+	stp		x29, x30, [sp, #-16]!
														
 
															+	mov		x29, sp
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-	mov		x24, x6
														
 
															-
														
 
															-	ld1		{v4.16b}, [x24]
														
 
															+	ld1		{v4.16b}, [x6]
														
 
															+	xts_load_mask	v8
														
 
															 	cbz		w7, .Lxtsencnotfirst
														
 
															 	enc_prepare	w3, x5, x8
														
 
															 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
														
 
															 	enc_switch_key	w3, x2, x8
														
 
															-	ldr		q7, .Lxts_mul_x
														
 
															 	b		.LxtsencNx
														
 
															-.Lxtsencrestart:
														
 
															-	ld1		{v4.16b}, [x24]
														
 
															 .Lxtsencnotfirst:
														
 
															-	enc_prepare	w22, x21, x8
														
 
															+	enc_prepare	w3, x2, x8
														
 
															 .LxtsencloopNx:
														
 
															-	ldr		q7, .Lxts_mul_x
														
 
															-	next_tweak	v4, v4, v7, v8
														
 
															+	next_tweak	v4, v4, v8
														
 
															 .LxtsencNx:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lxtsenc1x
														
 
															-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
														
 
															-	next_tweak	v5, v4, v7, v8
														
 
															+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
														
 
															+	next_tweak	v5, v4, v8
														
 
															 	eor		v0.16b, v0.16b, v4.16b
														
 
															-	next_tweak	v6, v5, v7, v8
														
 
															+	next_tweak	v6, v5, v8
														
 
															 	eor		v1.16b, v1.16b, v5.16b
														
 
															 	eor		v2.16b, v2.16b, v6.16b
														
 
															-	next_tweak	v7, v6, v7, v8
														
 
															+	next_tweak	v7, v6, v8
														
 
															 	eor		v3.16b, v3.16b, v7.16b
														
 
															 	bl		aes_encrypt_block4x
														
 
															 	eor		v3.16b, v3.16b, v7.16b
														
 
															 	eor		v0.16b, v0.16b, v4.16b
														
 
															 	eor		v1.16b, v1.16b, v5.16b
														
 
															 	eor		v2.16b, v2.16b, v6.16b
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	mov		v4.16b, v7.16b
														
 
															-	cbz		w23, .Lxtsencout
														
 
															-	st1		{v4.16b}, [x24]
														
 
															-	cond_yield_neon	.Lxtsencrestart
														
 
															+	cbz		w4, .Lxtsencout
														
 
															+	xts_reload_mask	v8
														
 
															 	b		.LxtsencloopNx
														
 
															 .Lxtsenc1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lxtsencout
														
 
															 .Lxtsencloop:
														
 
															-	ld1		{v1.16b}, [x20], #16
														
 
															+	ld1		{v1.16b}, [x1], #16
														
 
															 	eor		v0.16b, v1.16b, v4.16b
														
 
															-	encrypt_block	v0, w22, x21, x8, w7
														
 
															+	encrypt_block	v0, w3, x2, x8, w7
														
 
															 	eor		v0.16b, v0.16b, v4.16b
														
 
															-	st1		{v0.16b}, [x19], #16
														
 
															-	subs		w23, w23, #1
														
 
															+	st1		{v0.16b}, [x0], #16
														
 
															+	subs		w4, w4, #1
														
 
															 	beq		.Lxtsencout
														
 
															-	next_tweak	v4, v4, v7, v8
														
 
															+	next_tweak	v4, v4, v8
														
 
															 	b		.Lxtsencloop
														
 
															 .Lxtsencout:
														
 
															-	st1		{v4.16b}, [x24]
														
 
															-	frame_pop
														
 
															+	st1		{v4.16b}, [x6]
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															 	ret
														
 
															 AES_ENDPROC(aes_xts_encrypt)
														
 
															 AES_ENTRY(aes_xts_decrypt)
														
 
															-	frame_push	6
														
 
															+	stp		x29, x30, [sp, #-16]!
														
 
															+	mov		x29, sp
														
 
															-	mov		x19, x0
														
 
															-	mov		x20, x1
														
 
															-	mov		x21, x2
														
 
															-	mov		x22, x3
														
 
															-	mov		x23, x4
														
 
															-	mov		x24, x6
														
 
															-
														
 
															-	ld1		{v4.16b}, [x24]
														
 
															+	ld1		{v4.16b}, [x6]
														
 
															+	xts_load_mask	v8
														
 
															 	cbz		w7, .Lxtsdecnotfirst
														
 
															 	enc_prepare	w3, x5, x8
														
 
															 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
														
 
															 	dec_prepare	w3, x2, x8
														
 
															-	ldr		q7, .Lxts_mul_x
														
 
															 	b		.LxtsdecNx
														
 
															-.Lxtsdecrestart:
														
 
															-	ld1		{v4.16b}, [x24]
														
 
															 .Lxtsdecnotfirst:
														
 
															-	dec_prepare	w22, x21, x8
														
 
															+	dec_prepare	w3, x2, x8
														
 
															 .LxtsdecloopNx:
														
 
															-	ldr		q7, .Lxts_mul_x
														
 
															-	next_tweak	v4, v4, v7, v8
														
 
															+	next_tweak	v4, v4, v8
														
 
															 .LxtsdecNx:
														
 
															-	subs		w23, w23, #4
														
 
															+	subs		w4, w4, #4
														
 
															 	bmi		.Lxtsdec1x
														
 
															-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
														
 
															-	next_tweak	v5, v4, v7, v8
														
 
															+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
														
 
															+	next_tweak	v5, v4, v8
														
 
															 	eor		v0.16b, v0.16b, v4.16b
														
 
															-	next_tweak	v6, v5, v7, v8
														
 
															+	next_tweak	v6, v5, v8
														
 
															 	eor		v1.16b, v1.16b, v5.16b
														
 
															 	eor		v2.16b, v2.16b, v6.16b
														
 
															-	next_tweak	v7, v6, v7, v8
														
 
															+	next_tweak	v7, v6, v8
														
 
															 	eor		v3.16b, v3.16b, v7.16b
														
 
															 	bl		aes_decrypt_block4x
														
 
															 	eor		v3.16b, v3.16b, v7.16b
														
 
															 	eor		v0.16b, v0.16b, v4.16b
														
 
															 	eor		v1.16b, v1.16b, v5.16b
														
 
															 	eor		v2.16b, v2.16b, v6.16b
														
 
															-	st1		{v0.16b-v3.16b}, [x19], #64
														
 
															+	st1		{v0.16b-v3.16b}, [x0], #64
														
 
															 	mov		v4.16b, v7.16b
														
 
															-	cbz		w23, .Lxtsdecout
														
 
															-	st1		{v4.16b}, [x24]
														
 
															-	cond_yield_neon	.Lxtsdecrestart
														
 
															+	cbz		w4, .Lxtsdecout
														
 
															+	xts_reload_mask	v8
														
 
															 	b		.LxtsdecloopNx
														
 
															 .Lxtsdec1x:
														
 
															-	adds		w23, w23, #4
														
 
															+	adds		w4, w4, #4
														
 
															 	beq		.Lxtsdecout
														
 
															 .Lxtsdecloop:
														
 
															-	ld1		{v1.16b}, [x20], #16
														
 
															+	ld1		{v1.16b}, [x1], #16
														
 
															 	eor		v0.16b, v1.16b, v4.16b
														
 
															-	decrypt_block	v0, w22, x21, x8, w7
														
 
															+	decrypt_block	v0, w3, x2, x8, w7
														
 
															 	eor		v0.16b, v0.16b, v4.16b
														
 
															-	st1		{v0.16b}, [x19], #16
														
 
															-	subs		w23, w23, #1
														
 
															+	st1		{v0.16b}, [x0], #16
														
 
															+	subs		w4, w4, #1
														
 
															 	beq		.Lxtsdecout
														
 
															-	next_tweak	v4, v4, v7, v8
														
 
															+	next_tweak	v4, v4, v8
														
 
															 	b		.Lxtsdecloop
														
 
															 .Lxtsdecout:
														
 
															-	st1		{v4.16b}, [x24]
														
 
															-	frame_pop
														
 
															+	st1		{v4.16b}, [x6]
														
 
															+	ldp		x29, x30, [sp], #16
														
 
															 	ret
														
 
															 AES_ENDPROC(aes_xts_decrypt)
														
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -14,6 +14,12 @@
 
															 #define AES_ENTRY(func)		ENTRY(neon_ ## func)
														
 
															 #define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
														
 
															+	xtsmask		.req	v7
														
 
															+
														
 
															+	.macro		xts_reload_mask, tmp
														
 
															+	xts_load_mask	\tmp
														
 
															+	.endm
														
 
															+
														
 
															 	/* multiply by polynomial 'x' in GF(2^8) */
														
 
															 	.macro		mul_by_x, out, in, temp, const
														
 
															 	sshr		\temp, \in, #7
														
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -1,287 +0,0 @@
 
															-/*
														
 
															- * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
														
 
															- *
														
 
															- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of the GNU General Public License version 2 as
														
 
															- * published by the Free Software Foundation.
														
 
															- */
														
 
															-
														
 
															-/* GPL HEADER START
														
 
															- *
														
 
															- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of the GNU General Public License version 2 only,
														
 
															- * as published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License version 2 for more details (a copy is included
														
 
															- * in the LICENSE file that accompanied this code).
														
 
															- *
														
 
															- * You should have received a copy of the GNU General Public License
														
 
															- * version 2 along with this program; If not, see http://www.gnu.org/licenses
														
 
															- *
														
 
															- * Please  visit http://www.xyratex.com/contact if you need additional
														
 
															- * information or have any questions.
														
 
															- *
														
 
															- * GPL HEADER END
														
 
															- */
														
 
															-
														
 
															-/*
														
 
															- * Copyright 2012 Xyratex Technology Limited
														
 
															- *
														
 
															- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
														
 
															- * calculation.
														
 
															- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
														
 
															- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
														
 
															- * at:
														
 
															- * http://www.intel.com/products/processor/manuals/
														
 
															- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
														
 
															- * Volume 2B: Instruction Set Reference, N-Z
														
 
															- *
														
 
															- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
														
 
															- *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/assembler.h>
														
 
															-
														
 
															-	.section	".rodata", "a"
														
 
															-	.align		6
														
 
															-	.cpu		generic+crypto+crc
														
 
															-
														
 
															-.Lcrc32_constants:
														
 
															-	/*
														
 
															-	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
														
 
															-	 * #define CONSTANT_R1  0x154442bd4LL
														
 
															-	 *
														
 
															-	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
														
 
															-	 * #define CONSTANT_R2  0x1c6e41596LL
														
 
															-	 */
														
 
															-	.octa		0x00000001c6e415960000000154442bd4
														
 
															-
														
 
															-	/*
														
 
															-	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
														
 
															-	 * #define CONSTANT_R3  0x1751997d0LL
														
 
															-	 *
														
 
															-	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
														
 
															-	 * #define CONSTANT_R4  0x0ccaa009eLL
														
 
															-	 */
														
 
															-	.octa		0x00000000ccaa009e00000001751997d0
														
 
															-
														
 
															-	/*
														
 
															-	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
														
 
															-	 * #define CONSTANT_R5  0x163cd6124LL
														
 
															-	 */
														
 
															-	.quad		0x0000000163cd6124
														
 
															-	.quad		0x00000000FFFFFFFF
														
 
															-
														
 
															-	/*
														
 
															-	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
														
 
															-	 *
														
 
															-	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
														
 
															-	 *                                                      = 0x1F7011641LL
														
 
															-	 * #define CONSTANT_RU  0x1F7011641LL
														
 
															-	 */
														
 
															-	.octa		0x00000001F701164100000001DB710641
														
 
															-
														
 
															-.Lcrc32c_constants:
														
 
															-	.octa		0x000000009e4addf800000000740eef02
														
 
															-	.octa		0x000000014cd00bd600000000f20c0dfe
														
 
															-	.quad		0x00000000dd45aab8
														
 
															-	.quad		0x00000000FFFFFFFF
														
 
															-	.octa		0x00000000dea713f10000000105ec76f0
														
 
															-
														
 
															-	vCONSTANT	.req	v0
														
 
															-	dCONSTANT	.req	d0
														
 
															-	qCONSTANT	.req	q0
														
 
															-
														
 
															-	BUF		.req	x19
														
 
															-	LEN		.req	x20
														
 
															-	CRC		.req	x21
														
 
															-	CONST		.req	x22
														
 
															-
														
 
															-	vzr		.req	v9
														
 
															-
														
 
															-	/**
														
 
															-	 * Calculate crc32
														
 
															-	 * BUF - buffer
														
 
															-	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
														
 
															-	 * CRC - initial crc32
														
 
															-	 * return %eax crc32
														
 
															-	 * uint crc32_pmull_le(unsigned char const *buffer,
														
 
															-	 *                     size_t len, uint crc32)
														
 
															-	 */
														
 
															-	.text
														
 
															-ENTRY(crc32_pmull_le)
														
 
															-	adr_l		x3, .Lcrc32_constants
														
 
															-	b		0f
														
 
															-
														
 
															-ENTRY(crc32c_pmull_le)
														
 
															-	adr_l		x3, .Lcrc32c_constants
														
 
															-
														
 
															-0:	frame_push	4, 64
														
 
															-
														
 
															-	mov		BUF, x0
														
 
															-	mov		LEN, x1
														
 
															-	mov		CRC, x2
														
 
															-	mov		CONST, x3
														
 
															-
														
 
															-	bic		LEN, LEN, #15
														
 
															-	ld1		{v1.16b-v4.16b}, [BUF], #0x40
														
 
															-	movi		vzr.16b, #0
														
 
															-	fmov		dCONSTANT, CRC
														
 
															-	eor		v1.16b, v1.16b, vCONSTANT.16b
														
 
															-	sub		LEN, LEN, #0x40
														
 
															-	cmp		LEN, #0x40
														
 
															-	b.lt		less_64
														
 
															-
														
 
															-	ldr		qCONSTANT, [CONST]
														
 
															-
														
 
															-loop_64:		/* 64 bytes Full cache line folding */
														
 
															-	sub		LEN, LEN, #0x40
														
 
															-
														
 
															-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
														
 
															-	pmull2		v6.1q, v2.2d, vCONSTANT.2d
														
 
															-	pmull2		v7.1q, v3.2d, vCONSTANT.2d
														
 
															-	pmull2		v8.1q, v4.2d, vCONSTANT.2d
														
 
															-
														
 
															-	pmull		v1.1q, v1.1d, vCONSTANT.1d
														
 
															-	pmull		v2.1q, v2.1d, vCONSTANT.1d
														
 
															-	pmull		v3.1q, v3.1d, vCONSTANT.1d
														
 
															-	pmull		v4.1q, v4.1d, vCONSTANT.1d
														
 
															-
														
 
															-	eor		v1.16b, v1.16b, v5.16b
														
 
															-	ld1		{v5.16b}, [BUF], #0x10
														
 
															-	eor		v2.16b, v2.16b, v6.16b
														
 
															-	ld1		{v6.16b}, [BUF], #0x10
														
 
															-	eor		v3.16b, v3.16b, v7.16b
														
 
															-	ld1		{v7.16b}, [BUF], #0x10
														
 
															-	eor		v4.16b, v4.16b, v8.16b
														
 
															-	ld1		{v8.16b}, [BUF], #0x10
														
 
															-
														
 
															-	eor		v1.16b, v1.16b, v5.16b
														
 
															-	eor		v2.16b, v2.16b, v6.16b
														
 
															-	eor		v3.16b, v3.16b, v7.16b
														
 
															-	eor		v4.16b, v4.16b, v8.16b
														
 
															-
														
 
															-	cmp		LEN, #0x40
														
 
															-	b.lt		less_64
														
 
															-
														
 
															-	if_will_cond_yield_neon
														
 
															-	stp		q1, q2, [sp, #.Lframe_local_offset]
														
 
															-	stp		q3, q4, [sp, #.Lframe_local_offset + 32]
														
 
															-	do_cond_yield_neon
														
 
															-	ldp		q1, q2, [sp, #.Lframe_local_offset]
														
 
															-	ldp		q3, q4, [sp, #.Lframe_local_offset + 32]
														
 
															-	ldr		qCONSTANT, [CONST]
														
 
															-	movi		vzr.16b, #0
														
 
															-	endif_yield_neon
														
 
															-	b		loop_64
														
 
															-
														
 
															-less_64:		/* Folding cache line into 128bit */
														
 
															-	ldr		qCONSTANT, [CONST, #16]
														
 
															-
														
 
															-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
														
 
															-	pmull		v1.1q, v1.1d, vCONSTANT.1d
														
 
															-	eor		v1.16b, v1.16b, v5.16b
														
 
															-	eor		v1.16b, v1.16b, v2.16b
														
 
															-
														
 
															-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
														
 
															-	pmull		v1.1q, v1.1d, vCONSTANT.1d
														
 
															-	eor		v1.16b, v1.16b, v5.16b
														
 
															-	eor		v1.16b, v1.16b, v3.16b
														
 
															-
														
 
															-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
														
 
															-	pmull		v1.1q, v1.1d, vCONSTANT.1d
														
 
															-	eor		v1.16b, v1.16b, v5.16b
														
 
															-	eor		v1.16b, v1.16b, v4.16b
														
 
															-
														
 
															-	cbz		LEN, fold_64
														
 
															-
														
 
															-loop_16:		/* Folding rest buffer into 128bit */
														
 
															-	subs		LEN, LEN, #0x10
														
 
															-
														
 
															-	ld1		{v2.16b}, [BUF], #0x10
														
 
															-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
														
 
															-	pmull		v1.1q, v1.1d, vCONSTANT.1d
														
 
															-	eor		v1.16b, v1.16b, v5.16b
														
 
															-	eor		v1.16b, v1.16b, v2.16b
														
 
															-
														
 
															-	b.ne		loop_16
														
 
															-
														
 
															-fold_64:
														
 
															-	/* perform the last 64 bit fold, also adds 32 zeroes
														
 
															-	 * to the input stream */
														
 
															-	ext		v2.16b, v1.16b, v1.16b, #8
														
 
															-	pmull2		v2.1q, v2.2d, vCONSTANT.2d
														
 
															-	ext		v1.16b, v1.16b, vzr.16b, #8
														
 
															-	eor		v1.16b, v1.16b, v2.16b
														
 
															-
														
 
															-	/* final 32-bit fold */
														
 
															-	ldr		dCONSTANT, [CONST, #32]
														
 
															-	ldr		d3, [CONST, #40]
														
 
															-
														
 
															-	ext		v2.16b, v1.16b, vzr.16b, #4
														
 
															-	and		v1.16b, v1.16b, v3.16b
														
 
															-	pmull		v1.1q, v1.1d, vCONSTANT.1d
														
 
															-	eor		v1.16b, v1.16b, v2.16b
														
 
															-
														
 
															-	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
														
 
															-	ldr		qCONSTANT, [CONST, #48]
														
 
															-
														
 
															-	and		v2.16b, v1.16b, v3.16b
														
 
															-	ext		v2.16b, vzr.16b, v2.16b, #8
														
 
															-	pmull2		v2.1q, v2.2d, vCONSTANT.2d
														
 
															-	and		v2.16b, v2.16b, v3.16b
														
 
															-	pmull		v2.1q, v2.1d, vCONSTANT.1d
														
 
															-	eor		v1.16b, v1.16b, v2.16b
														
 
															-	mov		w0, v1.s[1]
														
 
															-
														
 
															-	frame_pop
														
 
															-	ret
														
 
															-ENDPROC(crc32_pmull_le)
														
 
															-ENDPROC(crc32c_pmull_le)
														
 
															-
														
 
															-	.macro		__crc32, c
														
 
															-0:	subs		x2, x2, #16
														
 
															-	b.mi		8f
														
 
															-	ldp		x3, x4, [x1], #16
														
 
															-CPU_BE(	rev		x3, x3		)
														
 
															-CPU_BE(	rev		x4, x4		)
														
 
															-	crc32\c\()x	w0, w0, x3
														
 
															-	crc32\c\()x	w0, w0, x4
														
 
															-	b.ne		0b
														
 
															-	ret
														
 
															-
														
 
															-8:	tbz		x2, #3, 4f
														
 
															-	ldr		x3, [x1], #8
														
 
															-CPU_BE(	rev		x3, x3		)
														
 
															-	crc32\c\()x	w0, w0, x3
														
 
															-4:	tbz		x2, #2, 2f
														
 
															-	ldr		w3, [x1], #4
														
 
															-CPU_BE(	rev		w3, w3		)
														
 
															-	crc32\c\()w	w0, w0, w3
														
 
															-2:	tbz		x2, #1, 1f
														
 
															-	ldrh		w3, [x1], #2
														
 
															-CPU_BE(	rev16		w3, w3		)
														
 
															-	crc32\c\()h	w0, w0, w3
														
 
															-1:	tbz		x2, #0, 0f
														
 
															-	ldrb		w3, [x1]
														
 
															-	crc32\c\()b	w0, w0, w3
														
 
															-0:	ret
														
 
															-	.endm
														
 
															-
														
 
															-	.align		5
														
 
															-ENTRY(crc32_armv8_le)
														
 
															-	__crc32
														
 
															-ENDPROC(crc32_armv8_le)
														
 
															-
														
 
															-	.align		5
														
 
															-ENTRY(crc32c_armv8_le)
														
 
															-	__crc32		c
														
 
															-ENDPROC(crc32c_armv8_le)
														
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -1,244 +0,0 @@
 
															-/*
														
 
															- * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
														
 
															- *
														
 
															- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of the GNU General Public License version 2 as
														
 
															- * published by the Free Software Foundation.
														
 
															- */
														
 
															-
														
 
															-#include <linux/cpufeature.h>
														
 
															-#include <linux/crc32.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/string.h>
														
 
															-
														
 
															-#include <crypto/internal/hash.h>
														
 
															-
														
 
															-#include <asm/hwcap.h>
														
 
															-#include <asm/neon.h>
														
 
															-#include <asm/simd.h>
														
 
															-#include <asm/unaligned.h>
														
 
															-
														
 
															-#define PMULL_MIN_LEN		64L	/* minimum size of buffer
														
 
															-					 * for crc32_pmull_le_16 */
														
 
															-#define SCALE_F			16L	/* size of NEON register */
														
 
															-
														
 
															-asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
														
 
															-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
														
 
															-
														
 
															-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
														
 
															-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
														
 
															-
														
 
															-static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
														
 
															-static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
														
 
															-
														
 
															-static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	u32 *key = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	*key = 0;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	u32 *key = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	*key = ~0;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
														
 
															-			      unsigned int keylen)
														
 
															-{
														
 
															-	u32 *mctx = crypto_shash_ctx(hash);
														
 
															-
														
 
															-	if (keylen != sizeof(u32)) {
														
 
															-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
														
 
															-		return -EINVAL;
														
 
															-	}
														
 
															-	*mctx = le32_to_cpup((__le32 *)key);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32_pmull_init(struct shash_desc *desc)
														
 
															-{
														
 
															-	u32 *mctx = crypto_shash_ctx(desc->tfm);
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-
														
 
															-	*crc = *mctx;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32_update(struct shash_desc *desc, const u8 *data,
														
 
															-			unsigned int length)
														
 
															-{
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-
														
 
															-	*crc = crc32_armv8_le(*crc, data, length);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32c_update(struct shash_desc *desc, const u8 *data,
														
 
															-			 unsigned int length)
														
 
															-{
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-
														
 
															-	*crc = crc32c_armv8_le(*crc, data, length);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
														
 
															-			 unsigned int length)
														
 
															-{
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-	unsigned int l;
														
 
															-
														
 
															-	if ((u64)data % SCALE_F) {
														
 
															-		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
														
 
															-
														
 
															-		*crc = fallback_crc32(*crc, data, l);
														
 
															-
														
 
															-		data += l;
														
 
															-		length -= l;
														
 
															-	}
														
 
															-
														
 
															-	if (length >= PMULL_MIN_LEN && may_use_simd()) {
														
 
															-		l = round_down(length, SCALE_F);
														
 
															-
														
 
															-		kernel_neon_begin();
														
 
															-		*crc = crc32_pmull_le(data, l, *crc);
														
 
															-		kernel_neon_end();
														
 
															-
														
 
															-		data += l;
														
 
															-		length -= l;
														
 
															-	}
														
 
															-
														
 
															-	if (length > 0)
														
 
															-		*crc = fallback_crc32(*crc, data, length);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
														
 
															-			 unsigned int length)
														
 
															-{
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-	unsigned int l;
														
 
															-
														
 
															-	if ((u64)data % SCALE_F) {
														
 
															-		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
														
 
															-
														
 
															-		*crc = fallback_crc32c(*crc, data, l);
														
 
															-
														
 
															-		data += l;
														
 
															-		length -= l;
														
 
															-	}
														
 
															-
														
 
															-	if (length >= PMULL_MIN_LEN && may_use_simd()) {
														
 
															-		l = round_down(length, SCALE_F);
														
 
															-
														
 
															-		kernel_neon_begin();
														
 
															-		*crc = crc32c_pmull_le(data, l, *crc);
														
 
															-		kernel_neon_end();
														
 
															-
														
 
															-		data += l;
														
 
															-		length -= l;
														
 
															-	}
														
 
															-
														
 
															-	if (length > 0) {
														
 
															-		*crc = fallback_crc32c(*crc, data, length);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
														
 
															-{
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-
														
 
															-	put_unaligned_le32(*crc, out);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
														
 
															-{
														
 
															-	u32 *crc = shash_desc_ctx(desc);
														
 
															-
														
 
															-	put_unaligned_le32(~*crc, out);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static struct shash_alg crc32_pmull_algs[] = { {
														
 
															-	.setkey			= crc32_pmull_setkey,
														
 
															-	.init			= crc32_pmull_init,
														
 
															-	.update			= crc32_update,
														
 
															-	.final			= crc32_pmull_final,
														
 
															-	.descsize		= sizeof(u32),
														
 
															-	.digestsize		= sizeof(u32),
														
 
															-
														
 
															-	.base.cra_ctxsize	= sizeof(u32),
														
 
															-	.base.cra_init		= crc32_pmull_cra_init,
														
 
															-	.base.cra_name		= "crc32",
														
 
															-	.base.cra_driver_name	= "crc32-arm64-ce",
														
 
															-	.base.cra_priority	= 200,
														
 
															-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
														
 
															-	.base.cra_blocksize	= 1,
														
 
															-	.base.cra_module	= THIS_MODULE,
														
 
															-}, {
														
 
															-	.setkey			= crc32_pmull_setkey,
														
 
															-	.init			= crc32_pmull_init,
														
 
															-	.update			= crc32c_update,
														
 
															-	.final			= crc32c_pmull_final,
														
 
															-	.descsize		= sizeof(u32),
														
 
															-	.digestsize		= sizeof(u32),
														
 
															-
														
 
															-	.base.cra_ctxsize	= sizeof(u32),
														
 
															-	.base.cra_init		= crc32c_pmull_cra_init,
														
 
															-	.base.cra_name		= "crc32c",
														
 
															-	.base.cra_driver_name	= "crc32c-arm64-ce",
														
 
															-	.base.cra_priority	= 200,
														
 
															-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
														
 
															-	.base.cra_blocksize	= 1,
														
 
															-	.base.cra_module	= THIS_MODULE,
														
 
															-} };
														
 
															-
														
 
															-static int __init crc32_pmull_mod_init(void)
														
 
															-{
														
 
															-	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
														
 
															-		crc32_pmull_algs[0].update = crc32_pmull_update;
														
 
															-		crc32_pmull_algs[1].update = crc32c_pmull_update;
														
 
															-
														
 
															-		if (elf_hwcap & HWCAP_CRC32) {
														
 
															-			fallback_crc32 = crc32_armv8_le;
														
 
															-			fallback_crc32c = crc32c_armv8_le;
														
 
															-		} else {
														
 
															-			fallback_crc32 = crc32_le;
														
 
															-			fallback_crc32c = __crc32c_le;
														
 
															-		}
														
 
															-	} else if (!(elf_hwcap & HWCAP_CRC32)) {
														
 
															-		return -ENODEV;
														
 
															-	}
														
 
															-	return crypto_register_shashes(crc32_pmull_algs,
														
 
															-				       ARRAY_SIZE(crc32_pmull_algs));
														
 
															-}
														
 
															-
														
 
															-static void __exit crc32_pmull_mod_exit(void)
														
 
															-{
														
 
															-	crypto_unregister_shashes(crc32_pmull_algs,
														
 
															-				  ARRAY_SIZE(crc32_pmull_algs));
														
 
															-}
														
 
															-
														
 
															-static const struct cpu_feature crc32_cpu_feature[] = {
														
 
															-	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
														
 
															-};
														
 
															-MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
														
 
															-
														
 
															-module_init(crc32_pmull_mod_init);
														
 
															-module_exit(crc32_pmull_mod_exit);
														
 
															-
														
 
															-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
														
 
															-MODULE_LICENSE("GPL v2");
														
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -80,7 +80,186 @@
 
															 	vzr		.req	v13
														
 
															-ENTRY(crc_t10dif_pmull)
														
 
															+	ad		.req	v14
														
 
															+	bd		.req	v10
														
 
															+
														
 
															+	k00_16		.req	v15
														
 
															+	k32_48		.req	v16
														
 
															+
														
 
															+	t3		.req	v17
														
 
															+	t4		.req	v18
														
 
															+	t5		.req	v19
														
 
															+	t6		.req	v20
														
 
															+	t7		.req	v21
														
 
															+	t8		.req	v22
														
 
															+	t9		.req	v23
														
 
															+
														
 
															+	perm1		.req	v24
														
 
															+	perm2		.req	v25
														
 
															+	perm3		.req	v26
														
 
															+	perm4		.req	v27
														
 
															+
														
 
															+	bd1		.req	v28
														
 
															+	bd2		.req	v29
														
 
															+	bd3		.req	v30
														
 
															+	bd4		.req	v31
														
 
															+
														
 
															+	.macro		__pmull_init_p64
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		__pmull_pre_p64, bd
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		__pmull_init_p8
														
 
															+	// k00_16 := 0x0000000000000000_000000000000ffff
														
 
															+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
														
 
															+	movi		k32_48.2d, #0xffffffff
														
 
															+	mov		k32_48.h[2], k32_48.h[0]
														
 
															+	ushr		k00_16.2d, k32_48.2d, #32
														
 
															+
														
 
															+	// prepare the permutation vectors
														
 
															+	mov_q		x5, 0x080f0e0d0c0b0a09
														
 
															+	movi		perm4.8b, #8
														
 
															+	dup		perm1.2d, x5
														
 
															+	eor		perm1.16b, perm1.16b, perm4.16b
														
 
															+	ushr		perm2.2d, perm1.2d, #8
														
 
															+	ushr		perm3.2d, perm1.2d, #16
														
 
															+	ushr		perm4.2d, perm1.2d, #24
														
 
															+	sli		perm2.2d, perm1.2d, #56
														
 
															+	sli		perm3.2d, perm1.2d, #48
														
 
															+	sli		perm4.2d, perm1.2d, #40
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		__pmull_pre_p8, bd
														
 
															+	tbl		bd1.16b, {\bd\().16b}, perm1.16b
														
 
															+	tbl		bd2.16b, {\bd\().16b}, perm2.16b
														
 
															+	tbl		bd3.16b, {\bd\().16b}, perm3.16b
														
 
															+	tbl		bd4.16b, {\bd\().16b}, perm4.16b
														
 
															+	.endm
														
 
															+
														
 
															+__pmull_p8_core:
														
 
															+.L__pmull_p8_core:
														
 
															+	ext		t4.8b, ad.8b, ad.8b, #1			// A1
														
 
															+	ext		t5.8b, ad.8b, ad.8b, #2			// A2
														
 
															+	ext		t6.8b, ad.8b, ad.8b, #3			// A3
														
 
															+
														
 
															+	pmull		t4.8h, t4.8b, bd.8b			// F = A1*B
														
 
															+	pmull		t8.8h, ad.8b, bd1.8b			// E = A*B1
														
 
															+	pmull		t5.8h, t5.8b, bd.8b			// H = A2*B
														
 
															+	pmull		t7.8h, ad.8b, bd2.8b			// G = A*B2
														
 
															+	pmull		t6.8h, t6.8b, bd.8b			// J = A3*B
														
 
															+	pmull		t9.8h, ad.8b, bd3.8b			// I = A*B3
														
 
															+	pmull		t3.8h, ad.8b, bd4.8b			// K = A*B4
														
 
															+	b		0f
														
 
															+
														
 
															+.L__pmull_p8_core2:
														
 
															+	tbl		t4.16b, {ad.16b}, perm1.16b		// A1
														
 
															+	tbl		t5.16b, {ad.16b}, perm2.16b		// A2
														
 
															+	tbl		t6.16b, {ad.16b}, perm3.16b		// A3
														
 
															+
														
 
															+	pmull2		t4.8h, t4.16b, bd.16b			// F = A1*B
														
 
															+	pmull2		t8.8h, ad.16b, bd1.16b			// E = A*B1
														
 
															+	pmull2		t5.8h, t5.16b, bd.16b			// H = A2*B
														
 
															+	pmull2		t7.8h, ad.16b, bd2.16b			// G = A*B2
														
 
															+	pmull2		t6.8h, t6.16b, bd.16b			// J = A3*B
														
 
															+	pmull2		t9.8h, ad.16b, bd3.16b			// I = A*B3
														
 
															+	pmull2		t3.8h, ad.16b, bd4.16b			// K = A*B4
														
 
															+
														
 
															+0:	eor		t4.16b, t4.16b, t8.16b			// L = E + F
														
 
															+	eor		t5.16b, t5.16b, t7.16b			// M = G + H
														
 
															+	eor		t6.16b, t6.16b, t9.16b			// N = I + J
														
 
															+
														
 
															+	uzp1		t8.2d, t4.2d, t5.2d
														
 
															+	uzp2		t4.2d, t4.2d, t5.2d
														
 
															+	uzp1		t7.2d, t6.2d, t3.2d
														
 
															+	uzp2		t6.2d, t6.2d, t3.2d
														
 
															+
														
 
															+	// t4 = (L) (P0 + P1) << 8
														
 
															+	// t5 = (M) (P2 + P3) << 16
														
 
															+	eor		t8.16b, t8.16b, t4.16b
														
 
															+	and		t4.16b, t4.16b, k32_48.16b
														
 
															+
														
 
															+	// t6 = (N) (P4 + P5) << 24
														
 
															+	// t7 = (K) (P6 + P7) << 32
														
 
															+	eor		t7.16b, t7.16b, t6.16b
														
 
															+	and		t6.16b, t6.16b, k00_16.16b
														
 
															+
														
 
															+	eor		t8.16b, t8.16b, t4.16b
														
 
															+	eor		t7.16b, t7.16b, t6.16b
														
 
															+
														
 
															+	zip2		t5.2d, t8.2d, t4.2d
														
 
															+	zip1		t4.2d, t8.2d, t4.2d
														
 
															+	zip2		t3.2d, t7.2d, t6.2d
														
 
															+	zip1		t6.2d, t7.2d, t6.2d
														
 
															+
														
 
															+	ext		t4.16b, t4.16b, t4.16b, #15
														
 
															+	ext		t5.16b, t5.16b, t5.16b, #14
														
 
															+	ext		t6.16b, t6.16b, t6.16b, #13
														
 
															+	ext		t3.16b, t3.16b, t3.16b, #12
														
 
															+
														
 
															+	eor		t4.16b, t4.16b, t5.16b
														
 
															+	eor		t6.16b, t6.16b, t3.16b
														
 
															+	ret
														
 
															+ENDPROC(__pmull_p8_core)
														
 
															+
														
 
															+	.macro		__pmull_p8, rq, ad, bd, i
														
 
															+	.ifnc		\bd, v10
														
 
															+	.err
														
 
															+	.endif
														
 
															+	mov		ad.16b, \ad\().16b
														
 
															+	.ifb		\i
														
 
															+	pmull		\rq\().8h, \ad\().8b, bd.8b		// D = A*B
														
 
															+	.else
														
 
															+	pmull2		\rq\().8h, \ad\().16b, bd.16b		// D = A*B
														
 
															+	.endif
														
 
															+
														
 
															+	bl		.L__pmull_p8_core\i
														
 
															+
														
 
															+	eor		\rq\().16b, \rq\().16b, t4.16b
														
 
															+	eor		\rq\().16b, \rq\().16b, t6.16b
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		fold64, p, reg1, reg2
														
 
															+	ldp		q11, q12, [arg2], #0x20
														
 
															+
														
 
															+	__pmull_\p	v8, \reg1, v10, 2
														
 
															+	__pmull_\p	\reg1, \reg1, v10
														
 
															+
														
 
															+CPU_LE(	rev64		v11.16b, v11.16b		)
														
 
															+CPU_LE(	rev64		v12.16b, v12.16b		)
														
 
															+
														
 
															+	__pmull_\p	v9, \reg2, v10, 2
														
 
															+	__pmull_\p	\reg2, \reg2, v10
														
 
															+
														
 
															+CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
														
 
															+CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
														
 
															+
														
 
															+	eor		\reg1\().16b, \reg1\().16b, v8.16b
														
 
															+	eor		\reg2\().16b, \reg2\().16b, v9.16b
														
 
															+	eor		\reg1\().16b, \reg1\().16b, v11.16b
														
 
															+	eor		\reg2\().16b, \reg2\().16b, v12.16b
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		fold16, p, reg, rk
														
 
															+	__pmull_\p	v8, \reg, v10
														
 
															+	__pmull_\p	\reg, \reg, v10, 2
														
 
															+	.ifnb		\rk
														
 
															+	ldr_l		q10, \rk, x8
														
 
															+	__pmull_pre_\p	v10
														
 
															+	.endif
														
 
															+	eor		v7.16b, v7.16b, v8.16b
														
 
															+	eor		v7.16b, v7.16b, \reg\().16b
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		__pmull_p64, rd, rn, rm, n
														
 
															+	.ifb		\n
														
 
															+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
														
 
															+	.else
														
 
															+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
														
 
															+	.endif
														
 
															+	.endm
														
 
															+
														
 
															+	.macro		crc_t10dif_pmull, p
														
 
															 	frame_push	3, 128
														
 
															 	mov		arg1_low32, w0
														
@@ -89,6 +268,8 @@ ENTRY(crc_t10dif_pmull)
 
															 	movi		vzr.16b, #0		// init zero register
														
 
															+	__pmull_init_\p
														
 
															+
														
 
															 	// adjust the 16-bit initial_crc value, scale it to 32 bits
														
 
															 	lsl		arg1_low32, arg1_low32, #16
														
@@ -96,7 +277,7 @@ ENTRY(crc_t10dif_pmull)
 
															 	cmp		arg3, #256
														
 
															 	// for sizes less than 128, we can't fold 64B at a time...
														
 
															-	b.lt		_less_than_128
														
 
															+	b.lt		.L_less_than_128_\@
														
 
															 	// load the initial crc value
														
 
															 	// crc value does not need to be byte-reflected, but it needs
														
@@ -137,6 +318,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
															 	ldr_l		q10, rk3, x8	// xmm10 has rk3 and rk4
														
 
															 					// type of pmull instruction
														
 
															 					// will determine which constant to use
														
 
															+	__pmull_pre_\p	v10
														
 
															 	//
														
 
															 	// we subtract 256 instead of 128 to save one instruction from the loop
														
@@ -147,41 +329,19 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
															 	// buffer. The _fold_64_B_loop will fold 64B at a time
														
 
															 	// until we have 64+y Bytes of buffer
														
 
															-
														
 
															 	// fold 64B at a time. This section of the code folds 4 vector
														
 
															 	// registers in parallel
														
 
															-_fold_64_B_loop:
														
 
															+.L_fold_64_B_loop_\@:
														
 
															-	.macro		fold64, reg1, reg2
														
 
															-	ldp		q11, q12, [arg2], #0x20
														
 
															-
														
 
															-	pmull2		v8.1q, \reg1\().2d, v10.2d
														
 
															-	pmull		\reg1\().1q, \reg1\().1d, v10.1d
														
 
															-
														
 
															-CPU_LE(	rev64		v11.16b, v11.16b		)
														
 
															-CPU_LE(	rev64		v12.16b, v12.16b		)
														
 
															-
														
 
															-	pmull2		v9.1q, \reg2\().2d, v10.2d
														
 
															-	pmull		\reg2\().1q, \reg2\().1d, v10.1d
														
 
															-
														
 
															-CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
														
 
															-CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
														
 
															-
														
 
															-	eor		\reg1\().16b, \reg1\().16b, v8.16b
														
 
															-	eor		\reg2\().16b, \reg2\().16b, v9.16b
														
 
															-	eor		\reg1\().16b, \reg1\().16b, v11.16b
														
 
															-	eor		\reg2\().16b, \reg2\().16b, v12.16b
														
 
															-	.endm
														
 
															-
														
 
															-	fold64		v0, v1
														
 
															-	fold64		v2, v3
														
 
															-	fold64		v4, v5
														
 
															-	fold64		v6, v7
														
 
															+	fold64		\p, v0, v1
														
 
															+	fold64		\p, v2, v3
														
 
															+	fold64		\p, v4, v5
														
 
															+	fold64		\p, v6, v7
														
 
															 	subs		arg3, arg3, #128
														
 
															 	// check if there is another 64B in the buffer to be able to fold
														
 
															-	b.lt		_fold_64_B_end
														
 
															+	b.lt		.L_fold_64_B_end_\@
														
 
															 	if_will_cond_yield_neon
														
 
															 	stp		q0, q1, [sp, #.Lframe_local_offset]
														
@@ -195,11 +355,13 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 
															 	ldp		q6, q7, [sp, #.Lframe_local_offset + 96]
														
 
															 	ldr_l		q10, rk3, x8
														
 
															 	movi		vzr.16b, #0		// init zero register
														
 
															+	__pmull_init_\p
														
 
															+	__pmull_pre_\p	v10
														
 
															 	endif_yield_neon
														
 
															-	b		_fold_64_B_loop
														
 
															+	b		.L_fold_64_B_loop_\@
														
 
															-_fold_64_B_end:
														
 
															+.L_fold_64_B_end_\@:
														
 
															 	// at this point, the buffer pointer is pointing at the last y Bytes
														
 
															 	// of the buffer the 64B of folded data is in 4 of the vector
														
 
															 	// registers: v0, v1, v2, v3
														
@@ -208,38 +370,29 @@ _fold_64_B_end:
 
															 	// constants
														
 
															 	ldr_l		q10, rk9, x8
														
 
															+	__pmull_pre_\p	v10
														
 
															-	.macro		fold16, reg, rk
														
 
															-	pmull		v8.1q, \reg\().1d, v10.1d
														
 
															-	pmull2		\reg\().1q, \reg\().2d, v10.2d
														
 
															-	.ifnb		\rk
														
 
															-	ldr_l		q10, \rk, x8
														
 
															-	.endif
														
 
															-	eor		v7.16b, v7.16b, v8.16b
														
 
															-	eor		v7.16b, v7.16b, \reg\().16b
														
 
															-	.endm
														
 
															-
														
 
															-	fold16		v0, rk11
														
 
															-	fold16		v1, rk13
														
 
															-	fold16		v2, rk15
														
 
															-	fold16		v3, rk17
														
 
															-	fold16		v4, rk19
														
 
															-	fold16		v5, rk1
														
 
															-	fold16		v6
														
 
															+	fold16		\p, v0, rk11
														
 
															+	fold16		\p, v1, rk13
														
 
															+	fold16		\p, v2, rk15
														
 
															+	fold16		\p, v3, rk17
														
 
															+	fold16		\p, v4, rk19
														
 
															+	fold16		\p, v5, rk1
														
 
															+	fold16		\p, v6
														
 
															 	// instead of 64, we add 48 to the loop counter to save 1 instruction
														
 
															 	// from the loop instead of a cmp instruction, we use the negative
														
 
															 	// flag with the jl instruction
														
 
															 	adds		arg3, arg3, #(128-16)
														
 
															-	b.lt		_final_reduction_for_128
														
 
															+	b.lt		.L_final_reduction_for_128_\@
														
 
															 	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
														
 
															 	// and the rest is in memory. We can fold 16 bytes at a time if y>=16
														
 
															 	// continue folding 16B at a time
														
 
															-_16B_reduction_loop:
														
 
															-	pmull		v8.1q, v7.1d, v10.1d
														
 
															-	pmull2		v7.1q, v7.2d, v10.2d
														
 
															+.L_16B_reduction_loop_\@:
														
 
															+	__pmull_\p	v8, v7, v10
														
 
															+	__pmull_\p	v7, v7, v10, 2
														
 
															 	eor		v7.16b, v7.16b, v8.16b
														
 
															 	ldr		q0, [arg2], #16
														
@@ -251,22 +404,22 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 
															 	// instead of a cmp instruction, we utilize the flags with the
														
 
															 	// jge instruction equivalent of: cmp arg3, 16-16
														
 
															 	// check if there is any more 16B in the buffer to be able to fold
														
 
															-	b.ge		_16B_reduction_loop
														
 
															+	b.ge		.L_16B_reduction_loop_\@
														
 
															 	// now we have 16+z bytes left to reduce, where 0<= z < 16.
														
 
															 	// first, we reduce the data in the xmm7 register
														
 
															-_final_reduction_for_128:
														
 
															+.L_final_reduction_for_128_\@:
														
 
															 	// check if any more data to fold. If not, compute the CRC of
														
 
															 	// the final 128 bits
														
 
															 	adds		arg3, arg3, #16
														
 
															-	b.eq		_128_done
														
 
															+	b.eq		.L_128_done_\@
														
 
															 	// here we are getting data that is less than 16 bytes.
														
 
															 	// since we know that there was data before the pointer, we can
														
 
															 	// offset the input pointer before the actual point, to receive
														
 
															 	// exactly 16 bytes. after that the registers need to be adjusted.
														
 
															-_get_last_two_regs:
														
 
															+.L_get_last_two_regs_\@:
														
 
															 	add		arg2, arg2, arg3
														
 
															 	ldr		q1, [arg2, #-16]
														
 
															 CPU_LE(	rev64		v1.16b, v1.16b			)
														
@@ -291,47 +444,48 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 
															 	bsl		v0.16b, v2.16b, v1.16b
														
 
															 	// fold 16 Bytes
														
 
															-	pmull		v8.1q, v7.1d, v10.1d
														
 
															-	pmull2		v7.1q, v7.2d, v10.2d
														
 
															+	__pmull_\p	v8, v7, v10
														
 
															+	__pmull_\p	v7, v7, v10, 2
														
 
															 	eor		v7.16b, v7.16b, v8.16b
														
 
															 	eor		v7.16b, v7.16b, v0.16b
														
 
															-_128_done:
														
 
															+.L_128_done_\@:
														
 
															 	// compute crc of a 128-bit value
														
 
															 	ldr_l		q10, rk5, x8		// rk5 and rk6 in xmm10
														
 
															+	__pmull_pre_\p	v10
														
 
															 	// 64b fold
														
 
															 	ext		v0.16b, vzr.16b, v7.16b, #8
														
 
															 	mov		v7.d[0], v7.d[1]
														
 
															-	pmull		v7.1q, v7.1d, v10.1d
														
 
															+	__pmull_\p	v7, v7, v10
														
 
															 	eor		v7.16b, v7.16b, v0.16b
														
 
															 	// 32b fold
														
 
															 	ext		v0.16b, v7.16b, vzr.16b, #4
														
 
															 	mov		v7.s[3], vzr.s[0]
														
 
															-	pmull2		v0.1q, v0.2d, v10.2d
														
 
															+	__pmull_\p	v0, v0, v10, 2
														
 
															 	eor		v7.16b, v7.16b, v0.16b
														
 
															 	// barrett reduction
														
 
															-_barrett:
														
 
															 	ldr_l		q10, rk7, x8
														
 
															+	__pmull_pre_\p	v10
														
 
															 	mov		v0.d[0], v7.d[1]
														
 
															-	pmull		v0.1q, v0.1d, v10.1d
														
 
															+	__pmull_\p	v0, v0, v10
														
 
															 	ext		v0.16b, vzr.16b, v0.16b, #12
														
 
															-	pmull2		v0.1q, v0.2d, v10.2d
														
 
															+	__pmull_\p	v0, v0, v10, 2
														
 
															 	ext		v0.16b, vzr.16b, v0.16b, #12
														
 
															 	eor		v7.16b, v7.16b, v0.16b
														
 
															 	mov		w0, v7.s[1]
														
 
															-_cleanup:
														
 
															+.L_cleanup_\@:
														
 
															 	// scale the result back to 16 bits
														
 
															 	lsr		x0, x0, #16
														
 
															 	frame_pop
														
 
															 	ret
														
 
															-_less_than_128:
														
 
															-	cbz		arg3, _cleanup
														
 
															+.L_less_than_128_\@:
														
 
															+	cbz		arg3, .L_cleanup_\@
														
 
															 	movi		v0.16b, #0
														
 
															 	mov		v0.s[3], arg1_low32	// get the initial crc value
														
@@ -342,20 +496,21 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
															 	eor		v7.16b, v7.16b, v0.16b	// xor the initial crc value
														
 
															 	cmp		arg3, #16
														
 
															-	b.eq		_128_done		// exactly 16 left
														
 
															-	b.lt		_less_than_16_left
														
 
															+	b.eq		.L_128_done_\@		// exactly 16 left
														
 
															+	b.lt		.L_less_than_16_left_\@
														
 
															 	ldr_l		q10, rk1, x8		// rk1 and rk2 in xmm10
														
 
															+	__pmull_pre_\p	v10
														
 
															 	// update the counter. subtract 32 instead of 16 to save one
														
 
															 	// instruction from the loop
														
 
															 	subs		arg3, arg3, #32
														
 
															-	b.ge		_16B_reduction_loop
														
 
															+	b.ge		.L_16B_reduction_loop_\@
														
 
															 	add		arg3, arg3, #16
														
 
															-	b		_get_last_two_regs
														
 
															+	b		.L_get_last_two_regs_\@
														
 
															-_less_than_16_left:
														
 
															+.L_less_than_16_left_\@:
														
 
															 	// shl r9, 4
														
 
															 	adr_l		x0, tbl_shf_table + 16
														
 
															 	sub		x0, x0, arg3
														
@@ -363,8 +518,17 @@ _less_than_16_left:
 
															 	movi		v9.16b, #0x80
														
 
															 	eor		v0.16b, v0.16b, v9.16b
														
 
															 	tbl		v7.16b, {v7.16b}, v0.16b
														
 
															-	b		_128_done
														
 
															-ENDPROC(crc_t10dif_pmull)
														
 
															+	b		.L_128_done_\@
														
 
															+	.endm
														
 
															+
														
 
															+ENTRY(crc_t10dif_pmull_p8)
														
 
															+	crc_t10dif_pmull	p8
														
 
															+ENDPROC(crc_t10dif_pmull_p8)
														
 
															+
														
 
															+	.align		5
														
 
															+ENTRY(crc_t10dif_pmull_p64)
														
 
															+	crc_t10dif_pmull	p64
														
 
															+ENDPROC(crc_t10dif_pmull_p64)
														
 
															 // precomputed constants
														
 
															 // these constants are precomputed from the poly:
														
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -22,7 +22,10 @@
 
															 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
														
 
															-asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
														
 
															+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
														
 
															+asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
														
 
															+
														
 
															+static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
														
 
															 static int crct10dif_init(struct shash_desc *desc)
														
 
															 {
														
@@ -85,6 +88,11 @@ static struct shash_alg crc_t10dif_alg = {
 
															 static int __init crc_t10dif_mod_init(void)
														
 
															 {
														
 
															+	if (elf_hwcap & HWCAP_PMULL)
														
 
															+		crc_t10dif_pmull = crc_t10dif_pmull_p64;
														
 
															+	else
														
 
															+		crc_t10dif_pmull = crc_t10dif_pmull_p8;
														
 
															+
														
 
															 	return crypto_register_shash(&crc_t10dif_alg);
														
 
															 }
														
@@ -93,8 +101,10 @@ static void __exit crc_t10dif_mod_exit(void)
 
															 	crypto_unregister_shash(&crc_t10dif_alg);
														
 
															 }
														
 
															-module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
														
 
															+module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
														
 
															 module_exit(crc_t10dif_mod_exit);
														
 
															 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
														
 
															 MODULE_LICENSE("GPL v2");
														
 
															+MODULE_ALIAS_CRYPTO("crct10dif");
														
 
															+MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
														
--- a/arch/arm64/crypto/speck-neon-core.S
+++ b/arch/arm64/crypto/speck-neon-core.S
@@ -1,352 +0,0 @@
 
															-// SPDX-License-Identifier: GPL-2.0
														
 
															-/*
														
 
															- * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
														
 
															- *
														
 
															- * Copyright (c) 2018 Google, Inc
														
 
															- *
														
 
															- * Author: Eric Biggers <ebiggers@google.com>
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-
														
 
															-	.text
														
 
															-
														
 
															-	// arguments
														
 
															-	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
														
 
															-	NROUNDS		.req	w1	// int nrounds
														
 
															-	NROUNDS_X	.req	x1
														
 
															-	DST		.req	x2	// void *dst
														
 
															-	SRC		.req	x3	// const void *src
														
 
															-	NBYTES		.req	w4	// unsigned int nbytes
														
 
															-	TWEAK		.req	x5	// void *tweak
														
 
															-
														
 
															-	// registers which hold the data being encrypted/decrypted
														
 
															-	// (underscores avoid a naming collision with ARM64 registers x0-x3)
														
 
															-	X_0		.req	v0
														
 
															-	Y_0		.req	v1
														
 
															-	X_1		.req	v2
														
 
															-	Y_1		.req	v3
														
 
															-	X_2		.req	v4
														
 
															-	Y_2		.req	v5
														
 
															-	X_3		.req	v6
														
 
															-	Y_3		.req	v7
														
 
															-
														
 
															-	// the round key, duplicated in all lanes
														
 
															-	ROUND_KEY	.req	v8
														
 
															-
														
 
															-	// index vector for tbl-based 8-bit rotates
														
 
															-	ROTATE_TABLE	.req	v9
														
 
															-	ROTATE_TABLE_Q	.req	q9
														
 
															-
														
 
															-	// temporary registers
														
 
															-	TMP0		.req	v10
														
 
															-	TMP1		.req	v11
														
 
															-	TMP2		.req	v12
														
 
															-	TMP3		.req	v13
														
 
															-
														
 
															-	// multiplication table for updating XTS tweaks
														
 
															-	GFMUL_TABLE	.req	v14
														
 
															-	GFMUL_TABLE_Q	.req	q14
														
 
															-
														
 
															-	// next XTS tweak value(s)
														
 
															-	TWEAKV_NEXT	.req	v15
														
 
															-
														
 
															-	// XTS tweaks for the blocks currently being encrypted/decrypted
														
 
															-	TWEAKV0		.req	v16
														
 
															-	TWEAKV1		.req	v17
														
 
															-	TWEAKV2		.req	v18
														
 
															-	TWEAKV3		.req	v19
														
 
															-	TWEAKV4		.req	v20
														
 
															-	TWEAKV5		.req	v21
														
 
															-	TWEAKV6		.req	v22
														
 
															-	TWEAKV7		.req	v23
														
 
															-
														
 
															-	.align		4
														
 
															-.Lror64_8_table:
														
 
															-	.octa		0x080f0e0d0c0b0a090007060504030201
														
 
															-.Lror32_8_table:
														
 
															-	.octa		0x0c0f0e0d080b0a090407060500030201
														
 
															-.Lrol64_8_table:
														
 
															-	.octa		0x0e0d0c0b0a09080f0605040302010007
														
 
															-.Lrol32_8_table:
														
 
															-	.octa		0x0e0d0c0f0a09080b0605040702010003
														
 
															-.Lgf128mul_table:
														
 
															-	.octa		0x00000000000000870000000000000001
														
 
															-.Lgf64mul_table:
														
 
															-	.octa		0x0000000000000000000000002d361b00
														
 
															-
														
 
															-/*
														
 
															- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
														
 
															- *
														
 
															- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
														
 
															- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
														
 
															- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
														
 
															- * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
														
 
															- */
														
 
															-.macro _speck_round_128bytes	n, lanes
														
 
															-
														
 
															-	// x = ror(x, 8)
														
 
															-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
														
 
															-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
														
 
															-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
														
 
															-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
														
 
															-
														
 
															-	// x += y
														
 
															-	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
														
 
															-	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
														
 
															-	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
														
 
															-	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
														
 
															-
														
 
															-	// x ^= k
														
 
															-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
														
 
															-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
														
 
															-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
														
 
															-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
														
 
															-
														
 
															-	// y = rol(y, 3)
														
 
															-	shl		TMP0.\lanes, Y_0.\lanes, #3
														
 
															-	shl		TMP1.\lanes, Y_1.\lanes, #3
														
 
															-	shl		TMP2.\lanes, Y_2.\lanes, #3
														
 
															-	shl		TMP3.\lanes, Y_3.\lanes, #3
														
 
															-	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
														
 
															-	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
														
 
															-	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
														
 
															-	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
														
 
															-
														
 
															-	// y ^= x
														
 
															-	eor		Y_0.16b, TMP0.16b, X_0.16b
														
 
															-	eor		Y_1.16b, TMP1.16b, X_1.16b
														
 
															-	eor		Y_2.16b, TMP2.16b, X_2.16b
														
 
															-	eor		Y_3.16b, TMP3.16b, X_3.16b
														
 
															-.endm
														
 
															-
														
 
															-/*
														
 
															- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
														
 
															- *
														
 
															- * This is the inverse of _speck_round_128bytes().
														
 
															- */
														
 
															-.macro _speck_unround_128bytes	n, lanes
														
 
															-
														
 
															-	// y ^= x
														
 
															-	eor		TMP0.16b, Y_0.16b, X_0.16b
														
 
															-	eor		TMP1.16b, Y_1.16b, X_1.16b
														
 
															-	eor		TMP2.16b, Y_2.16b, X_2.16b
														
 
															-	eor		TMP3.16b, Y_3.16b, X_3.16b
														
 
															-
														
 
															-	// y = ror(y, 3)
														
 
															-	ushr		Y_0.\lanes, TMP0.\lanes, #3
														
 
															-	ushr		Y_1.\lanes, TMP1.\lanes, #3
														
 
															-	ushr		Y_2.\lanes, TMP2.\lanes, #3
														
 
															-	ushr		Y_3.\lanes, TMP3.\lanes, #3
														
 
															-	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
														
 
															-	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
														
 
															-	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
														
 
															-	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
														
 
															-
														
 
															-	// x ^= k
														
 
															-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
														
 
															-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
														
 
															-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
														
 
															-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
														
 
															-
														
 
															-	// x -= y
														
 
															-	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
														
 
															-	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
														
 
															-	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
														
 
															-	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
														
 
															-
														
 
															-	// x = rol(x, 8)
														
 
															-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
														
 
															-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
														
 
															-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
														
 
															-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
														
 
															-.endm
														
 
															-
														
 
															-.macro _next_xts_tweak	next, cur, tmp, n
														
 
															-.if \n == 64
														
 
															-	/*
														
 
															-	 * Calculate the next tweak by multiplying the current one by x,
														
 
															-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
														
 
															-	 */
														
 
															-	sshr		\tmp\().2d, \cur\().2d, #63
														
 
															-	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
														
 
															-	shl		\next\().2d, \cur\().2d, #1
														
 
															-	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
														
 
															-	eor		\next\().16b, \next\().16b, \tmp\().16b
														
 
															-.else
														
 
															-	/*
														
 
															-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
														
 
															-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
														
 
															-	 */
														
 
															-	ushr		\tmp\().2d, \cur\().2d, #62
														
 
															-	shl		\next\().2d, \cur\().2d, #2
														
 
															-	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
														
 
															-	eor		\next\().16b, \next\().16b, \tmp\().16b
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-/*
														
 
															- * _speck_xts_crypt() - Speck-XTS encryption/decryption
														
 
															- *
														
 
															- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
														
 
															- * using Speck-XTS, specifically the variant with a block size of '2n' and round
														
 
															- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
														
 
															- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
														
 
															- * nonzero multiple of 128.
														
 
															- */
														
 
															-.macro _speck_xts_crypt	n, lanes, decrypting
														
 
															-
														
 
															-	/*
														
 
															-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
														
 
															-	 * round key rather than the first, since for decryption the round keys
														
 
															-	 * are used in reverse order.
														
 
															-	 */
														
 
															-.if \decrypting
														
 
															-	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
														
 
															-.if \n == 64
														
 
															-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
														
 
															-	sub		ROUND_KEYS, ROUND_KEYS, #8
														
 
															-.else
														
 
															-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
														
 
															-	sub		ROUND_KEYS, ROUND_KEYS, #4
														
 
															-.endif
														
 
															-.endif
														
 
															-
														
 
															-	// Load the index vector for tbl-based 8-bit rotates
														
 
															-.if \decrypting
														
 
															-	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
														
 
															-.else
														
 
															-	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
														
 
															-.endif
														
 
															-
														
 
															-	// One-time XTS preparation
														
 
															-.if \n == 64
														
 
															-	// Load first tweak
														
 
															-	ld1		{TWEAKV0.16b}, [TWEAK]
														
 
															-
														
 
															-	// Load GF(2^128) multiplication table
														
 
															-	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
														
 
															-.else
														
 
															-	// Load first tweak
														
 
															-	ld1		{TWEAKV0.8b}, [TWEAK]
														
 
															-
														
 
															-	// Load GF(2^64) multiplication table
														
 
															-	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
														
 
															-
														
 
															-	// Calculate second tweak, packing it together with the first
														
 
															-	ushr		TMP0.2d, TWEAKV0.2d, #63
														
 
															-	shl		TMP1.2d, TWEAKV0.2d, #1
														
 
															-	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
														
 
															-	eor		TMP0.8b, TMP0.8b, TMP1.8b
														
 
															-	mov		TWEAKV0.d[1], TMP0.d[0]
														
 
															-.endif
														
 
															-
														
 
															-.Lnext_128bytes_\@:
														
 
															-
														
 
															-	// Calculate XTS tweaks for next 128 bytes
														
 
															-	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
														
 
															-	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
														
 
															-
														
 
															-	// Load the next source blocks into {X,Y}[0-3]
														
 
															-	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
														
 
															-	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
														
 
															-
														
 
															-	// XOR the source blocks with their XTS tweaks
														
 
															-	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
														
 
															-	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
														
 
															-	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
														
 
															-	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
														
 
															-	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
														
 
															-	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
														
 
															-	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
														
 
															-	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
														
 
															-
														
 
															-	/*
														
 
															-	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
														
 
															-	 * that the X[0-3] registers contain only the second halves of blocks,
														
 
															-	 * and the Y[0-3] registers contain only the first halves of blocks.
														
 
															-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
														
 
															-	 */
														
 
															-	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
														
 
															-	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
														
 
															-	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
														
 
															-	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
														
 
															-	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
														
 
															-	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
														
 
															-	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
														
 
															-	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
														
 
															-
														
 
															-	// Do the cipher rounds
														
 
															-	mov		x6, ROUND_KEYS
														
 
															-	mov		w7, NROUNDS
														
 
															-.Lnext_round_\@:
														
 
															-.if \decrypting
														
 
															-	ld1r		{ROUND_KEY.\lanes}, [x6]
														
 
															-	sub		x6, x6, #( \n / 8 )
														
 
															-	_speck_unround_128bytes	\n, \lanes
														
 
															-.else
														
 
															-	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
														
 
															-	_speck_round_128bytes	\n, \lanes
														
 
															-.endif
														
 
															-	subs		w7, w7, #1
														
 
															-	bne		.Lnext_round_\@
														
 
															-
														
 
															-	// Re-interleave the 'x' and 'y' elements of each block
														
 
															-	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
														
 
															-	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
														
 
															-	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
														
 
															-	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
														
 
															-	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
														
 
															-	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
														
 
															-	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
														
 
															-	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
														
 
															-
														
 
															-	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
														
 
															-	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
														
 
															-	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
														
 
															-	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
														
 
															-	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
														
 
															-	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
														
 
															-	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
														
 
															-	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
														
 
															-	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
														
 
															-	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
														
 
															-
														
 
															-	// Store the ciphertext in the destination buffer
														
 
															-	st1		{X_0.16b-Y_1.16b}, [DST], #64
														
 
															-	st1		{X_2.16b-Y_3.16b}, [DST], #64
														
 
															-
														
 
															-	// Continue if there are more 128-byte chunks remaining
														
 
															-	subs		NBYTES, NBYTES, #128
														
 
															-	bne		.Lnext_128bytes_\@
														
 
															-
														
 
															-	// Store the next tweak and return
														
 
															-.if \n == 64
														
 
															-	st1		{TWEAKV_NEXT.16b}, [TWEAK]
														
 
															-.else
														
 
															-	st1		{TWEAKV_NEXT.8b}, [TWEAK]
														
 
															-.endif
														
 
															-	ret
														
 
															-.endm
														
 
															-
														
 
															-ENTRY(speck128_xts_encrypt_neon)
														
 
															-	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
														
 
															-ENDPROC(speck128_xts_encrypt_neon)
														
 
															-
														
 
															-ENTRY(speck128_xts_decrypt_neon)
														
 
															-	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
														
 
															-ENDPROC(speck128_xts_decrypt_neon)
														
 
															-
														
 
															-ENTRY(speck64_xts_encrypt_neon)
														
 
															-	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
														
 
															-ENDPROC(speck64_xts_encrypt_neon)
														
 
															-
														
 
															-ENTRY(speck64_xts_decrypt_neon)
														
 
															-	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
														
 
															-ENDPROC(speck64_xts_decrypt_neon)
														
--- a/arch/arm64/crypto/speck-neon-glue.c
+++ b/arch/arm64/crypto/speck-neon-glue.c
@@ -1,282 +0,0 @@
 
															-// SPDX-License-Identifier: GPL-2.0
														
 
															-/*
														
 
															- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
														
 
															- * (64-bit version; based on the 32-bit version)
														
 
															- *
														
 
															- * Copyright (c) 2018 Google, Inc
														
 
															- */
														
 
															-
														
 
															-#include <asm/hwcap.h>
														
 
															-#include <asm/neon.h>
														
 
															-#include <asm/simd.h>
														
 
															-#include <crypto/algapi.h>
														
 
															-#include <crypto/gf128mul.h>
														
 
															-#include <crypto/internal/skcipher.h>
														
 
															-#include <crypto/speck.h>
														
 
															-#include <crypto/xts.h>
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/module.h>
														
 
															-
														
 
															-/* The assembly functions only handle multiples of 128 bytes */
														
 
															-#define SPECK_NEON_CHUNK_SIZE	128
														
 
															-
														
 
															-/* Speck128 */
														
 
															-
														
 
															-struct speck128_xts_tfm_ctx {
														
 
															-	struct speck128_tfm_ctx main_key;
														
 
															-	struct speck128_tfm_ctx tweak_key;
														
 
															-};
														
 
															-
														
 
															-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
														
 
															-					  void *dst, const void *src,
														
 
															-					  unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
														
 
															-					  void *dst, const void *src,
														
 
															-					  unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
														
 
															-				     u8 *, const u8 *);
														
 
															-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
														
 
															-					  const void *, unsigned int, void *);
														
 
															-
														
 
															-static __always_inline int
														
 
															-__speck128_xts_crypt(struct skcipher_request *req,
														
 
															-		     speck128_crypt_one_t crypt_one,
														
 
															-		     speck128_xts_crypt_many_t crypt_many)
														
 
															-{
														
 
															-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct skcipher_walk walk;
														
 
															-	le128 tweak;
														
 
															-	int err;
														
 
															-
														
 
															-	err = skcipher_walk_virt(&walk, req, true);
														
 
															-
														
 
															-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
														
 
															-
														
 
															-	while (walk.nbytes > 0) {
														
 
															-		unsigned int nbytes = walk.nbytes;
														
 
															-		u8 *dst = walk.dst.virt.addr;
														
 
															-		const u8 *src = walk.src.virt.addr;
														
 
															-
														
 
															-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
														
 
															-			unsigned int count;
														
 
															-
														
 
															-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
														
 
															-			kernel_neon_begin();
														
 
															-			(*crypt_many)(ctx->main_key.round_keys,
														
 
															-				      ctx->main_key.nrounds,
														
 
															-				      dst, src, count, &tweak);
														
 
															-			kernel_neon_end();
														
 
															-			dst += count;
														
 
															-			src += count;
														
 
															-			nbytes -= count;
														
 
															-		}
														
 
															-
														
 
															-		/* Handle any remainder with generic code */
														
 
															-		while (nbytes >= sizeof(tweak)) {
														
 
															-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
														
 
															-			(*crypt_one)(&ctx->main_key, dst, dst);
														
 
															-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
														
 
															-			gf128mul_x_ble(&tweak, &tweak);
														
 
															-
														
 
															-			dst += sizeof(tweak);
														
 
															-			src += sizeof(tweak);
														
 
															-			nbytes -= sizeof(tweak);
														
 
															-		}
														
 
															-		err = skcipher_walk_done(&walk, nbytes);
														
 
															-	}
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int speck128_xts_encrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
														
 
															-				    speck128_xts_encrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck128_xts_decrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
														
 
															-				    speck128_xts_decrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
														
 
															-			       unsigned int keylen)
														
 
															-{
														
 
															-	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	int err;
														
 
															-
														
 
															-	err = xts_verify_key(tfm, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	keylen /= 2;
														
 
															-
														
 
															-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
														
 
															-}
														
 
															-
														
 
															-/* Speck64 */
														
 
															-
														
 
															-struct speck64_xts_tfm_ctx {
														
 
															-	struct speck64_tfm_ctx main_key;
														
 
															-	struct speck64_tfm_ctx tweak_key;
														
 
															-};
														
 
															-
														
 
															-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
														
 
															-					 void *dst, const void *src,
														
 
															-					 unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
														
 
															-					 void *dst, const void *src,
														
 
															-					 unsigned int nbytes, void *tweak);
														
 
															-
														
 
															-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
														
 
															-				    u8 *, const u8 *);
														
 
															-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
														
 
															-					 const void *, unsigned int, void *);
														
 
															-
														
 
															-static __always_inline int
														
 
															-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
														
 
															-		    speck64_xts_crypt_many_t crypt_many)
														
 
															-{
														
 
															-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct skcipher_walk walk;
														
 
															-	__le64 tweak;
														
 
															-	int err;
														
 
															-
														
 
															-	err = skcipher_walk_virt(&walk, req, true);
														
 
															-
														
 
															-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
														
 
															-
														
 
															-	while (walk.nbytes > 0) {
														
 
															-		unsigned int nbytes = walk.nbytes;
														
 
															-		u8 *dst = walk.dst.virt.addr;
														
 
															-		const u8 *src = walk.src.virt.addr;
														
 
															-
														
 
															-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
														
 
															-			unsigned int count;
														
 
															-
														
 
															-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
														
 
															-			kernel_neon_begin();
														
 
															-			(*crypt_many)(ctx->main_key.round_keys,
														
 
															-				      ctx->main_key.nrounds,
														
 
															-				      dst, src, count, &tweak);
														
 
															-			kernel_neon_end();
														
 
															-			dst += count;
														
 
															-			src += count;
														
 
															-			nbytes -= count;
														
 
															-		}
														
 
															-
														
 
															-		/* Handle any remainder with generic code */
														
 
															-		while (nbytes >= sizeof(tweak)) {
														
 
															-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
														
 
															-			(*crypt_one)(&ctx->main_key, dst, dst);
														
 
															-			*(__le64 *)dst ^= tweak;
														
 
															-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
														
 
															-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
														
 
															-					     0x1B : 0));
														
 
															-			dst += sizeof(tweak);
														
 
															-			src += sizeof(tweak);
														
 
															-			nbytes -= sizeof(tweak);
														
 
															-		}
														
 
															-		err = skcipher_walk_done(&walk, nbytes);
														
 
															-	}
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int speck64_xts_encrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
														
 
															-				   speck64_xts_encrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck64_xts_decrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
														
 
															-				   speck64_xts_decrypt_neon);
														
 
															-}
														
 
															-
														
 
															-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
														
 
															-			      unsigned int keylen)
														
 
															-{
														
 
															-	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	int err;
														
 
															-
														
 
															-	err = xts_verify_key(tfm, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	keylen /= 2;
														
 
															-
														
 
															-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
														
 
															-}
														
 
															-
														
 
															-static struct skcipher_alg speck_algs[] = {
														
 
															-	{
														
 
															-		.base.cra_name		= "xts(speck128)",
														
 
															-		.base.cra_driver_name	= "xts-speck128-neon",
														
 
															-		.base.cra_priority	= 300,
														
 
															-		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
														
 
															-		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
														
 
															-		.base.cra_alignmask	= 7,
														
 
															-		.base.cra_module	= THIS_MODULE,
														
 
															-		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
														
 
															-		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
														
 
															-		.ivsize			= SPECK128_BLOCK_SIZE,
														
 
															-		.walksize		= SPECK_NEON_CHUNK_SIZE,
														
 
															-		.setkey			= speck128_xts_setkey,
														
 
															-		.encrypt		= speck128_xts_encrypt,
														
 
															-		.decrypt		= speck128_xts_decrypt,
														
 
															-	}, {
														
 
															-		.base.cra_name		= "xts(speck64)",
														
 
															-		.base.cra_driver_name	= "xts-speck64-neon",
														
 
															-		.base.cra_priority	= 300,
														
 
															-		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
														
 
															-		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
														
 
															-		.base.cra_alignmask	= 7,
														
 
															-		.base.cra_module	= THIS_MODULE,
														
 
															-		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
														
 
															-		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
														
 
															-		.ivsize			= SPECK64_BLOCK_SIZE,
														
 
															-		.walksize		= SPECK_NEON_CHUNK_SIZE,
														
 
															-		.setkey			= speck64_xts_setkey,
														
 
															-		.encrypt		= speck64_xts_encrypt,
														
 
															-		.decrypt		= speck64_xts_decrypt,
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-static int __init speck_neon_module_init(void)
														
 
															-{
														
 
															-	if (!(elf_hwcap & HWCAP_ASIMD))
														
 
															-		return -ENODEV;
														
 
															-	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
														
 
															-}
														
 
															-
														
 
															-static void __exit speck_neon_module_exit(void)
														
 
															-{
														
 
															-	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
														
 
															-}
														
 
															-
														
 
															-module_init(speck_neon_module_init);
														
 
															-module_exit(speck_neon_module_exit);
														
 
															-
														
 
															-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
														
 
															-MODULE_ALIAS_CRYPTO("xts(speck128)");
														
 
															-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
														
 
															-MODULE_ALIAS_CRYPTO("xts(speck64)");
														
 
															-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
														
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -657,7 +656,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -614,7 +613,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -635,7 +634,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -616,7 +615,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -638,7 +637,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -720,7 +719,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -629,7 +628,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -607,7 +606,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
 
															 CONFIG_CRYPTO_MANAGER=y
														
 
															 CONFIG_CRYPTO_USER=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_AEGIS128=m
														
@@ -608,7 +607,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_LZO=m
														
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
 
															 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
														
 
															 CONFIG_CRYPTO_PCRYPT=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_LRW=m
														
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
 
															 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
														
 
															 CONFIG_CRYPTO_PCRYPT=m
														
 
															 CONFIG_CRYPTO_CRYPTD=m
														
 
															-CONFIG_CRYPTO_MCRYPTD=m
														
 
															 CONFIG_CRYPTO_TEST=m
														
 
															 CONFIG_CRYPTO_CHACHA20POLY1305=m
														
 
															 CONFIG_CRYPTO_LRW=m
														
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
 
															 	int key_len;
														
 
															 	unsigned long fc;
														
 
															 	union {
														
 
															-		struct crypto_skcipher *blk;
														
 
															+		struct crypto_sync_skcipher *blk;
														
 
															 		struct crypto_cipher *cip;
														
 
															 	} fallback;
														
 
															 };
														
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
 
															 	u8 pcc_key[32];
														
 
															 	int key_len;
														
 
															 	unsigned long fc;
														
 
															-	struct crypto_skcipher *fallback;
														
 
															+	struct crypto_sync_skcipher *fallback;
														
 
															 };
														
 
															 struct gcm_sg_walk {
														
@@ -184,14 +184,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
 
															 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
														
 
															 	unsigned int ret;
														
 
															-	crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
														
 
															-	crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
														
 
															+	crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
														
 
															+					 CRYPTO_TFM_REQ_MASK);
														
 
															+	crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
														
 
															 						      CRYPTO_TFM_REQ_MASK);
														
 
															-	ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
														
 
															+	ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
														
 
															 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
														
 
															-	tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
														
 
															+	tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
														
 
															 			  CRYPTO_TFM_RES_MASK;
														
 
															 	return ret;
														
@@ -204,9 +205,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
 
															 	unsigned int ret;
														
 
															 	struct crypto_blkcipher *tfm = desc->tfm;
														
 
															 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
														
 
															-	skcipher_request_set_tfm(req, sctx->fallback.blk);
														
 
															+	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
														
 
															 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
														
 
															 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
														
@@ -223,9 +224,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
 
															 	unsigned int ret;
														
 
															 	struct crypto_blkcipher *tfm = desc->tfm;
														
 
															 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
														
 
															-	skcipher_request_set_tfm(req, sctx->fallback.blk);
														
 
															+	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
														
 
															 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
														
 
															 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
														
@@ -306,8 +307,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
 
															 	const char *name = tfm->__crt_alg->cra_name;
														
 
															 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
														
 
															-	sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
														
 
															-						   CRYPTO_ALG_ASYNC |
														
 
															+	sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
														
 
															 						   CRYPTO_ALG_NEED_FALLBACK);
														
 
															 	if (IS_ERR(sctx->fallback.blk)) {
														
@@ -323,7 +323,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
 
															 {
														
 
															 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
														
 
															-	crypto_free_skcipher(sctx->fallback.blk);
														
 
															+	crypto_free_sync_skcipher(sctx->fallback.blk);
														
 
															 }
														
 
															 static struct crypto_alg ecb_aes_alg = {
														
@@ -453,14 +453,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
 
															 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
														
 
															 	unsigned int ret;
														
 
															-	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
														
 
															-	crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
														
 
															+	crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
														
 
															+					 CRYPTO_TFM_REQ_MASK);
														
 
															+	crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
														
 
															 						     CRYPTO_TFM_REQ_MASK);
														
 
															-	ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
														
 
															+	ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
														
 
															 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
														
 
															-	tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
														
 
															+	tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
														
 
															 			  CRYPTO_TFM_RES_MASK;
														
 
															 	return ret;
														
@@ -472,10 +473,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
 
															 {
														
 
															 	struct crypto_blkcipher *tfm = desc->tfm;
														
 
															 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
														
 
															 	unsigned int ret;
														
 
															-	skcipher_request_set_tfm(req, xts_ctx->fallback);
														
 
															+	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
														
 
															 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
														
 
															 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
														
@@ -491,10 +492,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
 
															 {
														
 
															 	struct crypto_blkcipher *tfm = desc->tfm;
														
 
															 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
														
 
															 	unsigned int ret;
														
 
															-	skcipher_request_set_tfm(req, xts_ctx->fallback);
														
 
															+	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
														
 
															 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
														
 
															 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
														
@@ -611,8 +612,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
 
															 	const char *name = tfm->__crt_alg->cra_name;
														
 
															 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
														
 
															-	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
														
 
															-						  CRYPTO_ALG_ASYNC |
														
 
															+	xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
														
 
															 						  CRYPTO_ALG_NEED_FALLBACK);
														
 
															 	if (IS_ERR(xts_ctx->fallback)) {
														
@@ -627,7 +627,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
 
															 {
														
 
															 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
														
 
															-	crypto_free_skcipher(xts_ctx->fallback);
														
 
															+	crypto_free_sync_skcipher(xts_ctx->fallback);
														
 
															 }
														
 
															 static struct crypto_alg xts_aes_alg = {
														
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -221,7 +221,6 @@ CONFIG_CRYPTO_SALSA20=m
 
															 CONFIG_CRYPTO_SEED=m
														
 
															 CONFIG_CRYPTO_SERPENT=m
														
 
															 CONFIG_CRYPTO_SM4=m
														
 
															-CONFIG_CRYPTO_SPECK=m
														
 
															 CONFIG_CRYPTO_TEA=m
														
 
															 CONFIG_CRYPTO_TWOFISH=m
														
 
															 CONFIG_CRYPTO_DEFLATE=m
														
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -60,9 +60,6 @@ endif
 
															 ifeq ($(avx2_supported),yes)
														
 
															 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
														
 
															 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
														
 
															-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
														
 
															-	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
														
 
															-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
														
 
															 	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
														
 
															 endif
														
@@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
 
															 	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
														
 
															 endif
														
 
															-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
														
 
															+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
														
 
															 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
														
 
															 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
														
 
															 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
														
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 
															 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
														
 
															 			      const u8 *in, unsigned int len, u8 *iv);
														
 
															-int crypto_fpu_init(void);
														
 
															-void crypto_fpu_exit(void);
														
 
															-
														
 
															 #define AVX_GEN2_OPTSIZE 640
														
 
															 #define AVX_GEN4_OPTSIZE 4096
														
@@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 
															 	/* Linearize assoc, if not already linear */
														
 
															 	if (req->src->length >= assoclen && req->src->length &&
														
 
															 		(!PageHighMem(sg_page(req->src)) ||
														
 
															-			req->src->offset + req->src->length < PAGE_SIZE)) {
														
 
															+			req->src->offset + req->src->length <= PAGE_SIZE)) {
														
 
															 		scatterwalk_start(&assoc_sg_walk, req->src);
														
 
															 		assoc = scatterwalk_map(&assoc_sg_walk);
														
 
															 	} else {
														
@@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
 
															 static
														
 
															 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
														
 
															-static struct {
														
 
															-	const char *algname;
														
 
															-	const char *drvname;
														
 
															-	const char *basename;
														
 
															-	struct simd_skcipher_alg *simd;
														
 
															-} aesni_simd_skciphers2[] = {
														
 
															-#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
														
 
															-    IS_BUILTIN(CONFIG_CRYPTO_PCBC)
														
 
															-	{
														
 
															-		.algname	= "pcbc(aes)",
														
 
															-		.drvname	= "pcbc-aes-aesni",
														
 
															-		.basename	= "fpu(pcbc(__aes-aesni))",
														
 
															-	},
														
 
															-#endif
														
 
															-};
														
 
															-
														
 
															 #ifdef CONFIG_X86_64
														
 
															 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
														
 
															 				  unsigned int key_len)
														
@@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
 
															 	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
														
 
															 		    aesni_simd_skciphers[i]; i++)
														
 
															 		simd_skcipher_free(aesni_simd_skciphers[i]);
														
 
															-
														
 
															-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
														
 
															-		if (aesni_simd_skciphers2[i].simd)
														
 
															-			simd_skcipher_free(aesni_simd_skciphers2[i].simd);
														
 
															 }
														
 
															 static int __init aesni_init(void)
														
@@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
 
															 #endif
														
 
															 #endif
														
 
															-	err = crypto_fpu_init();
														
 
															-	if (err)
														
 
															-		return err;
														
 
															-
														
 
															 	err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
														
 
															 	if (err)
														
 
															-		goto fpu_exit;
														
 
															+		return err;
														
 
															 	err = crypto_register_skciphers(aesni_skciphers,
														
 
															 					ARRAY_SIZE(aesni_skciphers));
														
@@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
 
															 		aesni_simd_skciphers[i] = simd;
														
 
															 	}
														
 
															-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
														
 
															-		algname = aesni_simd_skciphers2[i].algname;
														
 
															-		drvname = aesni_simd_skciphers2[i].drvname;
														
 
															-		basename = aesni_simd_skciphers2[i].basename;
														
 
															-		simd = simd_skcipher_create_compat(algname, drvname, basename);
														
 
															-		err = PTR_ERR(simd);
														
 
															-		if (IS_ERR(simd))
														
 
															-			continue;
														
 
															-
														
 
															-		aesni_simd_skciphers2[i].simd = simd;
														
 
															-	}
														
 
															-
														
 
															 	return 0;
														
 
															 unregister_simds:
														
@@ -1521,8 +1482,6 @@ unregister_skciphers:
 
															 				    ARRAY_SIZE(aesni_skciphers));
														
 
															 unregister_algs:
														
 
															 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
														
 
															-fpu_exit:
														
 
															-	crypto_fpu_exit();
														
 
															 	return err;
														
 
															 }
														
@@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
 
															 	crypto_unregister_skciphers(aesni_skciphers,
														
 
															 				    ARRAY_SIZE(aesni_skciphers));
														
 
															 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
														
 
															-
														
 
															-	crypto_fpu_exit();
														
 
															 }
														
 
															 late_initcall(aesni_init);
														
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -1,207 +0,0 @@
 
															-/*
														
 
															- * FPU: Wrapper for blkcipher touching fpu
														
 
															- *
														
 
															- * Copyright (c) Intel Corp.
														
 
															- *   Author: Huang Ying <ying.huang@intel.com>
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify it
														
 
															- * under the terms of the GNU General Public License as published by the Free
														
 
															- * Software Foundation; either version 2 of the License, or (at your option)
														
 
															- * any later version.
														
 
															- *
														
 
															- */
														
 
															-
														
 
															-#include <crypto/internal/skcipher.h>
														
 
															-#include <linux/err.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/slab.h>
														
 
															-#include <asm/fpu/api.h>
														
 
															-
														
 
															-struct crypto_fpu_ctx {
														
 
															-	struct crypto_skcipher *child;
														
 
															-};
														
 
															-
														
 
															-static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
														
 
															-			     unsigned int keylen)
														
 
															-{
														
 
															-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
														
 
															-	struct crypto_skcipher *child = ctx->child;
														
 
															-	int err;
														
 
															-
														
 
															-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
														
 
															-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
														
 
															-					 CRYPTO_TFM_REQ_MASK);
														
 
															-	err = crypto_skcipher_setkey(child, key, keylen);
														
 
															-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
														
 
															-					  CRYPTO_TFM_RES_MASK);
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int crypto_fpu_encrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct crypto_skcipher *child = ctx->child;
														
 
															-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
														
 
															-	int err;
														
 
															-
														
 
															-	skcipher_request_set_tfm(subreq, child);
														
 
															-	skcipher_request_set_callback(subreq, 0, NULL, NULL);
														
 
															-	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
														
 
															-				   req->iv);
														
 
															-
														
 
															-	kernel_fpu_begin();
														
 
															-	err = crypto_skcipher_encrypt(subreq);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	skcipher_request_zero(subreq);
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int crypto_fpu_decrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct crypto_skcipher *child = ctx->child;
														
 
															-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
														
 
															-	int err;
														
 
															-
														
 
															-	skcipher_request_set_tfm(subreq, child);
														
 
															-	skcipher_request_set_callback(subreq, 0, NULL, NULL);
														
 
															-	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
														
 
															-				   req->iv);
														
 
															-
														
 
															-	kernel_fpu_begin();
														
 
															-	err = crypto_skcipher_decrypt(subreq);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	skcipher_request_zero(subreq);
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
														
 
															-{
														
 
															-	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
														
 
															-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct crypto_skcipher_spawn *spawn;
														
 
															-	struct crypto_skcipher *cipher;
														
 
															-
														
 
															-	spawn = skcipher_instance_ctx(inst);
														
 
															-	cipher = crypto_spawn_skcipher(spawn);
														
 
															-	if (IS_ERR(cipher))
														
 
															-		return PTR_ERR(cipher);
														
 
															-
														
 
															-	ctx->child = cipher;
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
														
 
															-{
														
 
															-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-
														
 
															-	crypto_free_skcipher(ctx->child);
														
 
															-}
														
 
															-
														
 
															-static void crypto_fpu_free(struct skcipher_instance *inst)
														
 
															-{
														
 
															-	crypto_drop_skcipher(skcipher_instance_ctx(inst));
														
 
															-	kfree(inst);
														
 
															-}
														
 
															-
														
 
															-static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
														
 
															-{
														
 
															-	struct crypto_skcipher_spawn *spawn;
														
 
															-	struct skcipher_instance *inst;
														
 
															-	struct crypto_attr_type *algt;
														
 
															-	struct skcipher_alg *alg;
														
 
															-	const char *cipher_name;
														
 
															-	int err;
														
 
															-
														
 
															-	algt = crypto_get_attr_type(tb);
														
 
															-	if (IS_ERR(algt))
														
 
															-		return PTR_ERR(algt);
														
 
															-
														
 
															-	if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
														
 
															-	    algt->mask)
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	if (!(algt->mask & CRYPTO_ALG_INTERNAL))
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	cipher_name = crypto_attr_alg_name(tb[1]);
														
 
															-	if (IS_ERR(cipher_name))
														
 
															-		return PTR_ERR(cipher_name);
														
 
															-
														
 
															-	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
														
 
															-	if (!inst)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	spawn = skcipher_instance_ctx(inst);
														
 
															-
														
 
															-	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
														
 
															-	err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
														
 
															-				   CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
														
 
															-	if (err)
														
 
															-		goto out_free_inst;
														
 
															-
														
 
															-	alg = crypto_skcipher_spawn_alg(spawn);
														
 
															-
														
 
															-	err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
														
 
															-				  &alg->base);
														
 
															-	if (err)
														
 
															-		goto out_drop_skcipher;
														
 
															-
														
 
															-	inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
														
 
															-	inst->alg.base.cra_priority = alg->base.cra_priority;
														
 
															-	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
														
 
															-	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
														
 
															-
														
 
															-	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
														
 
															-	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
														
 
															-	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
														
 
															-
														
 
															-	inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
														
 
															-
														
 
															-	inst->alg.init = crypto_fpu_init_tfm;
														
 
															-	inst->alg.exit = crypto_fpu_exit_tfm;
														
 
															-
														
 
															-	inst->alg.setkey = crypto_fpu_setkey;
														
 
															-	inst->alg.encrypt = crypto_fpu_encrypt;
														
 
															-	inst->alg.decrypt = crypto_fpu_decrypt;
														
 
															-
														
 
															-	inst->free = crypto_fpu_free;
														
 
															-
														
 
															-	err = skcipher_register_instance(tmpl, inst);
														
 
															-	if (err)
														
 
															-		goto out_drop_skcipher;
														
 
															-
														
 
															-out:
														
 
															-	return err;
														
 
															-
														
 
															-out_drop_skcipher:
														
 
															-	crypto_drop_skcipher(spawn);
														
 
															-out_free_inst:
														
 
															-	kfree(inst);
														
 
															-	goto out;
														
 
															-}
														
 
															-
														
 
															-static struct crypto_template crypto_fpu_tmpl = {
														
 
															-	.name = "fpu",
														
 
															-	.create = crypto_fpu_create,
														
 
															-	.module = THIS_MODULE,
														
 
															-};
														
 
															-
														
 
															-int __init crypto_fpu_init(void)
														
 
															-{
														
 
															-	return crypto_register_template(&crypto_fpu_tmpl);
														
 
															-}
														
 
															-
														
 
															-void crypto_fpu_exit(void)
														
 
															-{
														
 
															-	crypto_unregister_template(&crypto_fpu_tmpl);
														
 
															-}
														
 
															-
														
 
															-MODULE_ALIAS_CRYPTO("fpu");
														
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile
@@ -1,14 +0,0 @@
 
															-# SPDX-License-Identifier: GPL-2.0
														
 
															-#
														
 
															-# Arch-specific CryptoAPI modules.
														
 
															-#
														
 
															-
														
 
															-OBJECT_FILES_NON_STANDARD := y
														
 
															-
														
 
															-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
														
 
															-                                $(comma)4)$(comma)%ymm2,yes,no)
														
 
															-ifeq ($(avx2_supported),yes)
														
 
															-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
														
 
															-	sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
														
 
															-	     sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
														
 
															-endif
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -1,1011 +0,0 @@
 
															-/*
														
 
															- * Multi buffer SHA1 algorithm Glue Code
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
														
 
															-
														
 
															-#include <crypto/internal/hash.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/mm.h>
														
 
															-#include <linux/cryptohash.h>
														
 
															-#include <linux/types.h>
														
 
															-#include <linux/list.h>
														
 
															-#include <crypto/scatterwalk.h>
														
 
															-#include <crypto/sha.h>
														
 
															-#include <crypto/mcryptd.h>
														
 
															-#include <crypto/crypto_wq.h>
														
 
															-#include <asm/byteorder.h>
														
 
															-#include <linux/hardirq.h>
														
 
															-#include <asm/fpu/api.h>
														
 
															-#include "sha1_mb_ctx.h"
														
 
															-
														
 
															-#define FLUSH_INTERVAL 1000 /* in usec */
														
 
															-
														
 
															-static struct mcryptd_alg_state sha1_mb_alg_state;
														
 
															-
														
 
															-struct sha1_mb_ctx {
														
 
															-	struct mcryptd_ahash *mcryptd_tfm;
														
 
															-};
														
 
															-
														
 
															-static inline struct mcryptd_hash_request_ctx
														
 
															-		*cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
														
 
															-{
														
 
															-	struct ahash_request *areq;
														
 
															-
														
 
															-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
														
 
															-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-}
														
 
															-
														
 
															-static inline struct ahash_request
														
 
															-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
														
 
															-{
														
 
															-	return container_of((void *) ctx, struct ahash_request, __ctx);
														
 
															-}
														
 
															-
														
 
															-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
														
 
															-				struct ahash_request *areq)
														
 
															-{
														
 
															-	rctx->flag = HASH_UPDATE;
														
 
															-}
														
 
															-
														
 
															-static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
														
 
															-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
														
 
															-			(struct sha1_mb_mgr *state, struct job_sha1 *job);
														
 
															-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
														
 
															-						(struct sha1_mb_mgr *state);
														
 
															-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
														
 
															-						(struct sha1_mb_mgr *state);
														
 
															-
														
 
															-static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
														
 
															-			 uint64_t total_len)
														
 
															-{
														
 
															-	uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
														
 
															-
														
 
															-	memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
														
 
															-	padblock[i] = 0x80;
														
 
															-
														
 
															-	i += ((SHA1_BLOCK_SIZE - 1) &
														
 
															-	      (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
														
 
															-	     + 1 + SHA1_PADLENGTHFIELD_SIZE;
														
 
															-
														
 
															-#if SHA1_PADLENGTHFIELD_SIZE == 16
														
 
															-	*((uint64_t *) &padblock[i - 16]) = 0;
														
 
															-#endif
														
 
															-
														
 
															-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
														
 
															-
														
 
															-	/* Number of extra blocks to hash */
														
 
															-	return i >> SHA1_LOG2_BLOCK_SIZE;
														
 
															-}
														
 
															-
														
 
															-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
														
 
															-						struct sha1_hash_ctx *ctx)
														
 
															-{
														
 
															-	while (ctx) {
														
 
															-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
														
 
															-			/* Clear PROCESSING bit */
														
 
															-			ctx->status = HASH_CTX_STS_COMPLETE;
														
 
															-			return ctx;
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra blocks are empty, begin hashing what remains
														
 
															-		 * in the user's buffer.
														
 
															-		 */
														
 
															-		if (ctx->partial_block_buffer_length == 0 &&
														
 
															-		    ctx->incoming_buffer_length) {
														
 
															-
														
 
															-			const void *buffer = ctx->incoming_buffer;
														
 
															-			uint32_t len = ctx->incoming_buffer_length;
														
 
															-			uint32_t copy_len;
														
 
															-
														
 
															-			/*
														
 
															-			 * Only entire blocks can be hashed.
														
 
															-			 * Copy remainder to extra blocks buffer.
														
 
															-			 */
														
 
															-			copy_len = len & (SHA1_BLOCK_SIZE-1);
														
 
															-
														
 
															-			if (copy_len) {
														
 
															-				len -= copy_len;
														
 
															-				memcpy(ctx->partial_block_buffer,
														
 
															-				       ((const char *) buffer + len),
														
 
															-				       copy_len);
														
 
															-				ctx->partial_block_buffer_length = copy_len;
														
 
															-			}
														
 
															-
														
 
															-			ctx->incoming_buffer_length = 0;
														
 
															-
														
 
															-			/* len should be a multiple of the block size now */
														
 
															-			assert((len % SHA1_BLOCK_SIZE) == 0);
														
 
															-
														
 
															-			/* Set len to the number of blocks to be hashed */
														
 
															-			len >>= SHA1_LOG2_BLOCK_SIZE;
														
 
															-
														
 
															-			if (len) {
														
 
															-
														
 
															-				ctx->job.buffer = (uint8_t *) buffer;
														
 
															-				ctx->job.len = len;
														
 
															-				ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
														
 
															-										&ctx->job);
														
 
															-				continue;
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra blocks are not empty, then we are
														
 
															-		 * either on the last block(s) or we need more
														
 
															-		 * user input before continuing.
														
 
															-		 */
														
 
															-		if (ctx->status & HASH_CTX_STS_LAST) {
														
 
															-
														
 
															-			uint8_t *buf = ctx->partial_block_buffer;
														
 
															-			uint32_t n_extra_blocks =
														
 
															-					sha1_pad(buf, ctx->total_length);
														
 
															-
														
 
															-			ctx->status = (HASH_CTX_STS_PROCESSING |
														
 
															-				       HASH_CTX_STS_COMPLETE);
														
 
															-			ctx->job.buffer = buf;
														
 
															-			ctx->job.len = (uint32_t) n_extra_blocks;
														
 
															-			ctx = (struct sha1_hash_ctx *)
														
 
															-				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-			continue;
														
 
															-		}
														
 
															-
														
 
															-		ctx->status = HASH_CTX_STS_IDLE;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	return NULL;
														
 
															-}
														
 
															-
														
 
															-static struct sha1_hash_ctx
														
 
															-			*sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
														
 
															-{
														
 
															-	/*
														
 
															-	 * If get_comp_job returns NULL, there are no jobs complete.
														
 
															-	 * If get_comp_job returns a job, verify that it is safe to return to
														
 
															-	 * the user.
														
 
															-	 * If it is not ready, resubmit the job to finish processing.
														
 
															-	 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
														
 
															-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
														
 
															-	 * still need processing.
														
 
															-	 */
														
 
															-	struct sha1_hash_ctx *ctx;
														
 
															-
														
 
															-	ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
														
 
															-	return sha1_ctx_mgr_resubmit(mgr, ctx);
														
 
															-}
														
 
															-
														
 
															-static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
														
 
															-{
														
 
															-	sha1_job_mgr_init(&mgr->mgr);
														
 
															-}
														
 
															-
														
 
															-static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
														
 
															-					  struct sha1_hash_ctx *ctx,
														
 
															-					  const void *buffer,
														
 
															-					  uint32_t len,
														
 
															-					  int flags)
														
 
															-{
														
 
															-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
														
 
															-		/* User should not pass anything other than UPDATE or LAST */
														
 
															-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
														
 
															-		/* Cannot submit to a currently processing job. */
														
 
															-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
														
 
															-		/* Cannot update a finished job. */
														
 
															-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	/*
														
 
															-	 * If we made it here, there were no errors during this call to
														
 
															-	 * submit
														
 
															-	 */
														
 
															-	ctx->error = HASH_CTX_ERROR_NONE;
														
 
															-
														
 
															-	/* Store buffer ptr info from user */
														
 
															-	ctx->incoming_buffer = buffer;
														
 
															-	ctx->incoming_buffer_length = len;
														
 
															-
														
 
															-	/*
														
 
															-	 * Store the user's request flags and mark this ctx as currently
														
 
															-	 * being processed.
														
 
															-	 */
														
 
															-	ctx->status = (flags & HASH_LAST) ?
														
 
															-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
														
 
															-			HASH_CTX_STS_PROCESSING;
														
 
															-
														
 
															-	/* Advance byte counter */
														
 
															-	ctx->total_length += len;
														
 
															-
														
 
															-	/*
														
 
															-	 * If there is anything currently buffered in the extra blocks,
														
 
															-	 * append to it until it contains a whole block.
														
 
															-	 * Or if the user's buffer contains less than a whole block,
														
 
															-	 * append as much as possible to the extra block.
														
 
															-	 */
														
 
															-	if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
														
 
															-		/*
														
 
															-		 * Compute how many bytes to copy from user buffer into
														
 
															-		 * extra block
														
 
															-		 */
														
 
															-		uint32_t copy_len = SHA1_BLOCK_SIZE -
														
 
															-					ctx->partial_block_buffer_length;
														
 
															-		if (len < copy_len)
														
 
															-			copy_len = len;
														
 
															-
														
 
															-		if (copy_len) {
														
 
															-			/* Copy and update relevant pointers and counters */
														
 
															-			memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
														
 
															-				buffer, copy_len);
														
 
															-
														
 
															-			ctx->partial_block_buffer_length += copy_len;
														
 
															-			ctx->incoming_buffer = (const void *)
														
 
															-					((const char *)buffer + copy_len);
														
 
															-			ctx->incoming_buffer_length = len - copy_len;
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * The extra block should never contain more than 1 block
														
 
															-		 * here
														
 
															-		 */
														
 
															-		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra block buffer contains exactly 1 block, it can
														
 
															-		 * be hashed.
														
 
															-		 */
														
 
															-		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
														
 
															-			ctx->partial_block_buffer_length = 0;
														
 
															-
														
 
															-			ctx->job.buffer = ctx->partial_block_buffer;
														
 
															-			ctx->job.len = 1;
														
 
															-			ctx = (struct sha1_hash_ctx *)
														
 
															-				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	return sha1_ctx_mgr_resubmit(mgr, ctx);
														
 
															-}
														
 
															-
														
 
															-static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
														
 
															-{
														
 
															-	struct sha1_hash_ctx *ctx;
														
 
															-
														
 
															-	while (1) {
														
 
															-		ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
														
 
															-
														
 
															-		/* If flush returned 0, there are no more jobs in flight. */
														
 
															-		if (!ctx)
														
 
															-			return NULL;
														
 
															-
														
 
															-		/*
														
 
															-		 * If flush returned a job, resubmit the job to finish
														
 
															-		 * processing.
														
 
															-		 */
														
 
															-		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
														
 
															-
														
 
															-		/*
														
 
															-		 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
														
 
															-		 * returned. Otherwise, all jobs currently being managed by the
														
 
															-		 * sha1_ctx_mgr still need processing. Loop.
														
 
															-		 */
														
 
															-		if (ctx)
														
 
															-			return ctx;
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_init(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	hash_ctx_init(sctx);
														
 
															-	sctx->job.result_digest[0] = SHA1_H0;
														
 
															-	sctx->job.result_digest[1] = SHA1_H1;
														
 
															-	sctx->job.result_digest[2] = SHA1_H2;
														
 
															-	sctx->job.result_digest[3] = SHA1_H3;
														
 
															-	sctx->job.result_digest[4] = SHA1_H4;
														
 
															-	sctx->total_length = 0;
														
 
															-	sctx->partial_block_buffer_length = 0;
														
 
															-	sctx->status = HASH_CTX_STS_IDLE;
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
														
 
															-{
														
 
															-	int	i;
														
 
															-	struct	sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
														
 
															-	__be32	*dst = (__be32 *) rctx->out;
														
 
															-
														
 
															-	for (i = 0; i < 5; ++i)
														
 
															-		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
														
 
															-			struct mcryptd_alg_cstate *cstate, bool flush)
														
 
															-{
														
 
															-	int	flag = HASH_UPDATE;
														
 
															-	int	nbytes, err = 0;
														
 
															-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
														
 
															-	struct sha1_hash_ctx *sha_ctx;
														
 
															-
														
 
															-	/* more work ? */
														
 
															-	while (!(rctx->flag & HASH_DONE)) {
														
 
															-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
														
 
															-		if (nbytes < 0) {
														
 
															-			err = nbytes;
														
 
															-			goto out;
														
 
															-		}
														
 
															-		/* check if the walk is done */
														
 
															-		if (crypto_ahash_walk_last(&rctx->walk)) {
														
 
															-			rctx->flag |= HASH_DONE;
														
 
															-			if (rctx->flag & HASH_FINAL)
														
 
															-				flag |= HASH_LAST;
														
 
															-
														
 
															-		}
														
 
															-		sha_ctx = (struct sha1_hash_ctx *)
														
 
															-						ahash_request_ctx(&rctx->areq);
														
 
															-		kernel_fpu_begin();
														
 
															-		sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
														
 
															-						rctx->walk.data, nbytes, flag);
														
 
															-		if (!sha_ctx) {
														
 
															-			if (flush)
														
 
															-				sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
														
 
															-		}
														
 
															-		kernel_fpu_end();
														
 
															-		if (sha_ctx)
														
 
															-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		else {
														
 
															-			rctx = NULL;
														
 
															-			goto out;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	/* copy the results */
														
 
															-	if (rctx->flag & HASH_FINAL)
														
 
															-		sha1_mb_set_results(rctx);
														
 
															-
														
 
															-out:
														
 
															-	*ret_rctx = rctx;
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
														
 
															-			    struct mcryptd_alg_cstate *cstate,
														
 
															-			    int err)
														
 
															-{
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha1_hash_ctx *sha_ctx;
														
 
															-	struct mcryptd_hash_request_ctx *req_ctx;
														
 
															-	int ret;
														
 
															-
														
 
															-	/* remove from work list */
														
 
															-	spin_lock(&cstate->work_lock);
														
 
															-	list_del(&rctx->waiter);
														
 
															-	spin_unlock(&cstate->work_lock);
														
 
															-
														
 
															-	if (irqs_disabled())
														
 
															-		rctx->complete(&req->base, err);
														
 
															-	else {
														
 
															-		local_bh_disable();
														
 
															-		rctx->complete(&req->base, err);
														
 
															-		local_bh_enable();
														
 
															-	}
														
 
															-
														
 
															-	/* check to see if there are other jobs that are done */
														
 
															-	sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
														
 
															-	while (sha_ctx) {
														
 
															-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		ret = sha_finish_walk(&req_ctx, cstate, false);
														
 
															-		if (req_ctx) {
														
 
															-			spin_lock(&cstate->work_lock);
														
 
															-			list_del(&req_ctx->waiter);
														
 
															-			spin_unlock(&cstate->work_lock);
														
 
															-
														
 
															-			req = cast_mcryptd_ctx_to_req(req_ctx);
														
 
															-			if (irqs_disabled())
														
 
															-				req_ctx->complete(&req->base, ret);
														
 
															-			else {
														
 
															-				local_bh_disable();
														
 
															-				req_ctx->complete(&req->base, ret);
														
 
															-				local_bh_enable();
														
 
															-			}
														
 
															-		}
														
 
															-		sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
														
 
															-			     struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	unsigned long next_flush;
														
 
															-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
														
 
															-
														
 
															-	/* initialize tag */
														
 
															-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
														
 
															-	rctx->tag.seq_num = cstate->next_seq_num++;
														
 
															-	next_flush = rctx->tag.arrival + delay;
														
 
															-	rctx->tag.expire = next_flush;
														
 
															-
														
 
															-	spin_lock(&cstate->work_lock);
														
 
															-	list_add_tail(&rctx->waiter, &cstate->work_list);
														
 
															-	spin_unlock(&cstate->work_lock);
														
 
															-
														
 
															-	mcryptd_arm_flusher(cstate, delay);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_update(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha1_hash_ctx *sha_ctx;
														
 
															-	int ret = 0, nbytes;
														
 
															-
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
														
 
															-
														
 
															-	if (nbytes < 0) {
														
 
															-		ret = nbytes;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	if (crypto_ahash_walk_last(&rctx->walk))
														
 
															-		rctx->flag |= HASH_DONE;
														
 
															-
														
 
															-	/* submit */
														
 
															-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	sha1_mb_add_list(rctx, cstate);
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
														
 
															-							nbytes, HASH_UPDATE);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_finup(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha1_hash_ctx *sha_ctx;
														
 
															-	int ret = 0, flag = HASH_UPDATE, nbytes;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
														
 
															-
														
 
															-	if (nbytes < 0) {
														
 
															-		ret = nbytes;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	if (crypto_ahash_walk_last(&rctx->walk)) {
														
 
															-		rctx->flag |= HASH_DONE;
														
 
															-		flag = HASH_LAST;
														
 
															-	}
														
 
															-
														
 
															-	/* submit */
														
 
															-	rctx->flag |= HASH_FINAL;
														
 
															-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	sha1_mb_add_list(rctx, cstate);
														
 
															-
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
														
 
															-								nbytes, flag);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_final(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct sha1_hash_ctx *sha_ctx;
														
 
															-	int ret = 0;
														
 
															-	u8 data;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	rctx->flag |= HASH_DONE | HASH_FINAL;
														
 
															-
														
 
															-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	/* flag HASH_FINAL and 0 data size */
														
 
															-	sha1_mb_add_list(rctx, cstate);
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
														
 
															-								HASH_LAST);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_export(struct ahash_request *areq, void *out)
														
 
															-{
														
 
															-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	memcpy(out, sctx, sizeof(*sctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_import(struct ahash_request *areq, const void *in)
														
 
															-{
														
 
															-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	memcpy(sctx, in, sizeof(*sctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct mcryptd_ahash *mcryptd_tfm;
														
 
															-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-	struct mcryptd_hash_ctx *mctx;
														
 
															-
														
 
															-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
														
 
															-						CRYPTO_ALG_INTERNAL,
														
 
															-						CRYPTO_ALG_INTERNAL);
														
 
															-	if (IS_ERR(mcryptd_tfm))
														
 
															-		return PTR_ERR(mcryptd_tfm);
														
 
															-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
														
 
															-	mctx->alg_state = &sha1_mb_alg_state;
														
 
															-	ctx->mcryptd_tfm = mcryptd_tfm;
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				sizeof(struct ahash_request) +
														
 
															-				crypto_ahash_reqsize(&mcryptd_tfm->base));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	mcryptd_free_ahash(ctx->mcryptd_tfm);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				sizeof(struct ahash_request) +
														
 
															-				sizeof(struct sha1_hash_ctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	mcryptd_free_ahash(ctx->mcryptd_tfm);
														
 
															-}
														
 
															-
														
 
															-static struct ahash_alg sha1_mb_areq_alg = {
														
 
															-	.init		=	sha1_mb_init,
														
 
															-	.update		=	sha1_mb_update,
														
 
															-	.final		=	sha1_mb_final,
														
 
															-	.finup		=	sha1_mb_finup,
														
 
															-	.export		=	sha1_mb_export,
														
 
															-	.import		=	sha1_mb_import,
														
 
															-	.halg		=	{
														
 
															-		.digestsize	=	SHA1_DIGEST_SIZE,
														
 
															-		.statesize	=	sizeof(struct sha1_hash_ctx),
														
 
															-		.base		=	{
														
 
															-			.cra_name	 = "__sha1-mb",
														
 
															-			.cra_driver_name = "__intel_sha1-mb",
														
 
															-			.cra_priority	 = 100,
														
 
															-			/*
														
 
															-			 * use ASYNC flag as some buffers in multi-buffer
														
 
															-			 * algo may not have completed before hashing thread
														
 
															-			 * sleep
														
 
															-			 */
														
 
															-			.cra_flags	= CRYPTO_ALG_ASYNC |
														
 
															-					  CRYPTO_ALG_INTERNAL,
														
 
															-			.cra_blocksize	= SHA1_BLOCK_SIZE,
														
 
															-			.cra_module	= THIS_MODULE,
														
 
															-			.cra_list	= LIST_HEAD_INIT
														
 
															-					(sha1_mb_areq_alg.halg.base.cra_list),
														
 
															-			.cra_init	= sha1_mb_areq_init_tfm,
														
 
															-			.cra_exit	= sha1_mb_areq_exit_tfm,
														
 
															-			.cra_ctxsize	= sizeof(struct sha1_hash_ctx),
														
 
															-		}
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-static int sha1_mb_async_init(struct ahash_request *req)
														
 
															-{
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_init(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_update(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_update(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_finup(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_finup(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_final(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_final(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_digest(struct ahash_request *req)
														
 
															-{
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_digest(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_export(struct ahash_request *req, void *out)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_export(mcryptd_req, out);
														
 
															-}
														
 
															-
														
 
															-static int sha1_mb_async_import(struct ahash_request *req, const void *in)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
														
 
															-	struct mcryptd_hash_request_ctx *rctx;
														
 
															-	struct ahash_request *areq;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	rctx = ahash_request_ctx(mcryptd_req);
														
 
															-	areq = &rctx->areq;
														
 
															-
														
 
															-	ahash_request_set_tfm(areq, child);
														
 
															-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															-					rctx->complete, req);
														
 
															-
														
 
															-	return crypto_ahash_import(mcryptd_req, in);
														
 
															-}
														
 
															-
														
 
															-static struct ahash_alg sha1_mb_async_alg = {
														
 
															-	.init           = sha1_mb_async_init,
														
 
															-	.update         = sha1_mb_async_update,
														
 
															-	.final          = sha1_mb_async_final,
														
 
															-	.finup          = sha1_mb_async_finup,
														
 
															-	.digest         = sha1_mb_async_digest,
														
 
															-	.export		= sha1_mb_async_export,
														
 
															-	.import		= sha1_mb_async_import,
														
 
															-	.halg = {
														
 
															-		.digestsize     = SHA1_DIGEST_SIZE,
														
 
															-		.statesize	= sizeof(struct sha1_hash_ctx),
														
 
															-		.base = {
														
 
															-			.cra_name               = "sha1",
														
 
															-			.cra_driver_name        = "sha1_mb",
														
 
															-			/*
														
 
															-			 * Low priority, since with few concurrent hash requests
														
 
															-			 * this is extremely slow due to the flush delay.  Users
														
 
															-			 * whose workloads would benefit from this can request
														
 
															-			 * it explicitly by driver name, or can increase its
														
 
															-			 * priority at runtime using NETLINK_CRYPTO.
														
 
															-			 */
														
 
															-			.cra_priority           = 50,
														
 
															-			.cra_flags              = CRYPTO_ALG_ASYNC,
														
 
															-			.cra_blocksize          = SHA1_BLOCK_SIZE,
														
 
															-			.cra_module             = THIS_MODULE,
														
 
															-			.cra_list               = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
														
 
															-			.cra_init               = sha1_mb_async_init_tfm,
														
 
															-			.cra_exit               = sha1_mb_async_exit_tfm,
														
 
															-			.cra_ctxsize		= sizeof(struct sha1_mb_ctx),
														
 
															-			.cra_alignmask		= 0,
														
 
															-		},
														
 
															-	},
														
 
															-};
														
 
															-
														
 
															-static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx;
														
 
															-	unsigned long cur_time;
														
 
															-	unsigned long next_flush = 0;
														
 
															-	struct sha1_hash_ctx *sha_ctx;
														
 
															-
														
 
															-
														
 
															-	cur_time = jiffies;
														
 
															-
														
 
															-	while (!list_empty(&cstate->work_list)) {
														
 
															-		rctx = list_entry(cstate->work_list.next,
														
 
															-				struct mcryptd_hash_request_ctx, waiter);
														
 
															-		if (time_before(cur_time, rctx->tag.expire))
														
 
															-			break;
														
 
															-		kernel_fpu_begin();
														
 
															-		sha_ctx = (struct sha1_hash_ctx *)
														
 
															-					sha1_ctx_mgr_flush(cstate->mgr);
														
 
															-		kernel_fpu_end();
														
 
															-		if (!sha_ctx) {
														
 
															-			pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
														
 
															-			break;
														
 
															-		}
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		sha_finish_walk(&rctx, cstate, true);
														
 
															-		sha_complete_job(rctx, cstate, 0);
														
 
															-	}
														
 
															-
														
 
															-	if (!list_empty(&cstate->work_list)) {
														
 
															-		rctx = list_entry(cstate->work_list.next,
														
 
															-				struct mcryptd_hash_request_ctx, waiter);
														
 
															-		/* get the hash context and then flush time */
														
 
															-		next_flush = rctx->tag.expire;
														
 
															-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
														
 
															-	}
														
 
															-	return next_flush;
														
 
															-}
														
 
															-
														
 
															-static int __init sha1_mb_mod_init(void)
														
 
															-{
														
 
															-
														
 
															-	int cpu;
														
 
															-	int err;
														
 
															-	struct mcryptd_alg_cstate *cpu_state;
														
 
															-
														
 
															-	/* check for dependent cpu features */
														
 
															-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
														
 
															-	    !boot_cpu_has(X86_FEATURE_BMI2))
														
 
															-		return -ENODEV;
														
 
															-
														
 
															-	/* initialize multibuffer structures */
														
 
															-	sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
														
 
															-
														
 
															-	sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
														
 
															-	sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
														
 
															-	sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
														
 
															-	sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
														
 
															-
														
 
															-	if (!sha1_mb_alg_state.alg_cstate)
														
 
															-		return -ENOMEM;
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
														
 
															-		cpu_state->next_flush = 0;
														
 
															-		cpu_state->next_seq_num = 0;
														
 
															-		cpu_state->flusher_engaged = false;
														
 
															-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
														
 
															-		cpu_state->cpu = cpu;
														
 
															-		cpu_state->alg_state = &sha1_mb_alg_state;
														
 
															-		cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
														
 
															-					GFP_KERNEL);
														
 
															-		if (!cpu_state->mgr)
														
 
															-			goto err2;
														
 
															-		sha1_ctx_mgr_init(cpu_state->mgr);
														
 
															-		INIT_LIST_HEAD(&cpu_state->work_list);
														
 
															-		spin_lock_init(&cpu_state->work_lock);
														
 
															-	}
														
 
															-	sha1_mb_alg_state.flusher = &sha1_mb_flusher;
														
 
															-
														
 
															-	err = crypto_register_ahash(&sha1_mb_areq_alg);
														
 
															-	if (err)
														
 
															-		goto err2;
														
 
															-	err = crypto_register_ahash(&sha1_mb_async_alg);
														
 
															-	if (err)
														
 
															-		goto err1;
														
 
															-
														
 
															-
														
 
															-	return 0;
														
 
															-err1:
														
 
															-	crypto_unregister_ahash(&sha1_mb_areq_alg);
														
 
															-err2:
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
														
 
															-		kfree(cpu_state->mgr);
														
 
															-	}
														
 
															-	free_percpu(sha1_mb_alg_state.alg_cstate);
														
 
															-	return -ENODEV;
														
 
															-}
														
 
															-
														
 
															-static void __exit sha1_mb_mod_fini(void)
														
 
															-{
														
 
															-	int cpu;
														
 
															-	struct mcryptd_alg_cstate *cpu_state;
														
 
															-
														
 
															-	crypto_unregister_ahash(&sha1_mb_async_alg);
														
 
															-	crypto_unregister_ahash(&sha1_mb_areq_alg);
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
														
 
															-		kfree(cpu_state->mgr);
														
 
															-	}
														
 
															-	free_percpu(sha1_mb_alg_state.alg_cstate);
														
 
															-}
														
 
															-
														
 
															-module_init(sha1_mb_mod_init);
														
 
															-module_exit(sha1_mb_mod_fini);
														
 
															-
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
														
 
															-
														
 
															-MODULE_ALIAS_CRYPTO("sha1");
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -1,134 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA context
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#ifndef _SHA_MB_CTX_INTERNAL_H
														
 
															-#define _SHA_MB_CTX_INTERNAL_H
														
 
															-
														
 
															-#include "sha1_mb_mgr.h"
														
 
															-
														
 
															-#define HASH_UPDATE          0x00
														
 
															-#define HASH_LAST            0x01
														
 
															-#define HASH_DONE	     0x02
														
 
															-#define HASH_FINAL	     0x04
														
 
															-
														
 
															-#define HASH_CTX_STS_IDLE       0x00
														
 
															-#define HASH_CTX_STS_PROCESSING 0x01
														
 
															-#define HASH_CTX_STS_LAST       0x02
														
 
															-#define HASH_CTX_STS_COMPLETE   0x04
														
 
															-
														
 
															-enum hash_ctx_error {
														
 
															-	HASH_CTX_ERROR_NONE               =  0,
														
 
															-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
														
 
															-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
														
 
															-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
														
 
															-
														
 
															-#ifdef HASH_CTX_DEBUG
														
 
															-	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
														
 
															-#endif
														
 
															-};
														
 
															-
														
 
															-
														
 
															-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
														
 
															-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
														
 
															-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
														
 
															-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
														
 
															-#define hash_ctx_status(ctx)     ((ctx)->status)
														
 
															-#define hash_ctx_error(ctx)      ((ctx)->error)
														
 
															-#define hash_ctx_init(ctx) \
														
 
															-	do { \
														
 
															-		(ctx)->error = HASH_CTX_ERROR_NONE; \
														
 
															-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
														
 
															-	} while (0)
														
 
															-
														
 
															-
														
 
															-/* Hash Constants and Typedefs */
														
 
															-#define SHA1_DIGEST_LENGTH          5
														
 
															-#define SHA1_LOG2_BLOCK_SIZE        6
														
 
															-
														
 
															-#define SHA1_PADLENGTHFIELD_SIZE    8
														
 
															-
														
 
															-#ifdef SHA_MB_DEBUG
														
 
															-#define assert(expr) \
														
 
															-do { \
														
 
															-	if (unlikely(!(expr))) { \
														
 
															-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
														
 
															-		#expr, __FILE__, __func__, __LINE__); \
														
 
															-	} \
														
 
															-} while (0)
														
 
															-#else
														
 
															-#define assert(expr) do {} while (0)
														
 
															-#endif
														
 
															-
														
 
															-struct sha1_ctx_mgr {
														
 
															-	struct sha1_mb_mgr mgr;
														
 
															-};
														
 
															-
														
 
															-/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
														
 
															-
														
 
															-struct sha1_hash_ctx {
														
 
															-	/* Must be at struct offset 0 */
														
 
															-	struct job_sha1       job;
														
 
															-	/* status flag */
														
 
															-	int status;
														
 
															-	/* error flag */
														
 
															-	int error;
														
 
															-
														
 
															-	uint64_t	total_length;
														
 
															-	const void	*incoming_buffer;
														
 
															-	uint32_t	incoming_buffer_length;
														
 
															-	uint8_t		partial_block_buffer[SHA1_BLOCK_SIZE * 2];
														
 
															-	uint32_t	partial_block_buffer_length;
														
 
															-	void		*user_data;
														
 
															-};
														
 
															-
														
 
															-#endif
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
@@ -1,110 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA1 algorithm manager
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      James Guilford <james.guilford@intel.com>
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-#ifndef __SHA_MB_MGR_H
														
 
															-#define __SHA_MB_MGR_H
														
 
															-
														
 
															-
														
 
															-#include <linux/types.h>
														
 
															-
														
 
															-#define NUM_SHA1_DIGEST_WORDS 5
														
 
															-
														
 
															-enum job_sts {	STS_UNKNOWN = 0,
														
 
															-		STS_BEING_PROCESSED = 1,
														
 
															-		STS_COMPLETED = 2,
														
 
															-		STS_INTERNAL_ERROR = 3,
														
 
															-		STS_ERROR = 4
														
 
															-};
														
 
															-
														
 
															-struct job_sha1 {
														
 
															-	u8	*buffer;
														
 
															-	u32	len;
														
 
															-	u32	result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
														
 
															-	enum	job_sts status;
														
 
															-	void	*user_data;
														
 
															-};
														
 
															-
														
 
															-/* SHA1 out-of-order scheduler */
														
 
															-
														
 
															-/* typedef uint32_t sha1_digest_array[5][8]; */
														
 
															-
														
 
															-struct sha1_args_x8 {
														
 
															-	uint32_t	digest[5][8];
														
 
															-	uint8_t		*data_ptr[8];
														
 
															-};
														
 
															-
														
 
															-struct sha1_lane_data {
														
 
															-	struct job_sha1 *job_in_lane;
														
 
															-};
														
 
															-
														
 
															-struct sha1_mb_mgr {
														
 
															-	struct sha1_args_x8 args;
														
 
															-
														
 
															-	uint32_t lens[8];
														
 
															-
														
 
															-	/* each byte is index (0...7) of unused lanes */
														
 
															-	uint64_t unused_lanes;
														
 
															-	/* byte 4 is set to FF as a flag */
														
 
															-	struct sha1_lane_data ldata[8];
														
 
															-};
														
 
															-
														
 
															-
														
 
															-#define SHA1_MB_MGR_NUM_LANES_AVX2 8
														
 
															-
														
 
															-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
														
 
															-struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
														
 
															-					 struct job_sha1 *job);
														
 
															-struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
														
 
															-struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
														
 
															-
														
 
															-#endif
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
@@ -1,287 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA1 algorithm data structure
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      James Guilford <james.guilford@intel.com>
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-# Macros for defining data structures
														
 
															-
														
 
															-# Usage example
														
 
															-
														
 
															-#START_FIELDS	# JOB_AES
														
 
															-###	name		size	align
														
 
															-#FIELD	_plaintext,	8,	8	# pointer to plaintext
														
 
															-#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
														
 
															-#FIELD	_IV,		16,	8	# IV
														
 
															-#FIELD	_keys,		8,	8	# pointer to keys
														
 
															-#FIELD	_len,		4,	4	# length in bytes
														
 
															-#FIELD	_status,	4,	4	# status enumeration
														
 
															-#FIELD	_user_data,	8,	8	# pointer to user data
														
 
															-#UNION  _union,         size1,  align1, \
														
 
															-#	                size2,  align2, \
														
 
															-#	                size3,  align3, \
														
 
															-#	                ...
														
 
															-#END_FIELDS
														
 
															-#%assign _JOB_AES_size	_FIELD_OFFSET
														
 
															-#%assign _JOB_AES_align	_STRUCT_ALIGN
														
 
															-
														
 
															-#########################################################################
														
 
															-
														
 
															-# Alternate "struc-like" syntax:
														
 
															-#	STRUCT job_aes2
														
 
															-#	RES_Q	.plaintext,	1
														
 
															-#	RES_Q	.ciphertext,	1
														
 
															-#	RES_DQ	.IV,		1
														
 
															-#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
														
 
															-#	RES_U	.union,		size1, align1, \
														
 
															-#				size2, align2, \
														
 
															-#				...
														
 
															-#	ENDSTRUCT
														
 
															-#	# Following only needed if nesting
														
 
															-#	%assign job_aes2_size	_FIELD_OFFSET
														
 
															-#	%assign job_aes2_align	_STRUCT_ALIGN
														
 
															-#
														
 
															-# RES_* macros take a name, a count and an optional alignment.
														
 
															-# The count in in terms of the base size of the macro, and the
														
 
															-# default alignment is the base size.
														
 
															-# The macros are:
														
 
															-# Macro    Base size
														
 
															-# RES_B	    1
														
 
															-# RES_W	    2
														
 
															-# RES_D     4
														
 
															-# RES_Q     8
														
 
															-# RES_DQ   16
														
 
															-# RES_Y    32
														
 
															-# RES_Z    64
														
 
															-#
														
 
															-# RES_U defines a union. It's arguments are a name and two or more
														
 
															-# pairs of "size, alignment"
														
 
															-#
														
 
															-# The two assigns are only needed if this structure is being nested
														
 
															-# within another. Even if the assigns are not done, one can still use
														
 
															-# STRUCT_NAME_size as the size of the structure.
														
 
															-#
														
 
															-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
														
 
															-#
														
 
															-# The differences between this and using "struc" directly are that each
														
 
															-# type is implicitly aligned to its natural length (although this can be
														
 
															-# over-ridden with an explicit third parameter), and that the structure
														
 
															-# is padded at the end to its overall alignment.
														
 
															-#
														
 
															-
														
 
															-#########################################################################
														
 
															-
														
 
															-#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
														
 
															-#define _SHA1_MB_MGR_DATASTRUCT_ASM_
														
 
															-
														
 
															-## START_FIELDS
														
 
															-.macro START_FIELDS
														
 
															- _FIELD_OFFSET = 0
														
 
															- _STRUCT_ALIGN = 0
														
 
															-.endm
														
 
															-
														
 
															-## FIELD name size align
														
 
															-.macro FIELD name size align
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
														
 
															- \name	= _FIELD_OFFSET
														
 
															- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
														
 
															-.if (\align > _STRUCT_ALIGN)
														
 
															- _STRUCT_ALIGN = \align
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-## END_FIELDS
														
 
															-.macro END_FIELDS
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
														
 
															-.endm
														
 
															-
														
 
															-########################################################################
														
 
															-
														
 
															-.macro STRUCT p1
														
 
															-START_FIELDS
														
 
															-.struc \p1
														
 
															-.endm
														
 
															-
														
 
															-.macro ENDSTRUCT
														
 
															- tmp = _FIELD_OFFSET
														
 
															- END_FIELDS
														
 
															- tmp = (_FIELD_OFFSET - %%tmp)
														
 
															-.if (tmp > 0)
														
 
															-	.lcomm	tmp
														
 
															-.endif
														
 
															-.endstruc
														
 
															-.endm
														
 
															-
														
 
															-## RES_int name size align
														
 
															-.macro RES_int p1 p2 p3
														
 
															- name = \p1
														
 
															- size = \p2
														
 
															- align = .\p3
														
 
															-
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
														
 
															-.align align
														
 
															-.lcomm name size
														
 
															- _FIELD_OFFSET = _FIELD_OFFSET + (size)
														
 
															-.if (align > _STRUCT_ALIGN)
														
 
															- _STRUCT_ALIGN = align
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-
														
 
															-
														
 
															-# macro RES_B name, size [, align]
														
 
															-.macro RES_B _name, _size, _align=1
														
 
															-RES_int _name _size _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_W name, size [, align]
														
 
															-.macro RES_W _name, _size, _align=2
														
 
															-RES_int _name 2*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_D name, size [, align]
														
 
															-.macro RES_D _name, _size, _align=4
														
 
															-RES_int _name 4*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Q name, size [, align]
														
 
															-.macro RES_Q _name, _size, _align=8
														
 
															-RES_int _name 8*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_DQ name, size [, align]
														
 
															-.macro RES_DQ _name, _size, _align=16
														
 
															-RES_int _name 16*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Y name, size [, align]
														
 
															-.macro RES_Y _name, _size, _align=32
														
 
															-RES_int _name 32*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Z name, size [, align]
														
 
															-.macro RES_Z _name, _size, _align=64
														
 
															-RES_int _name 64*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-
														
 
															-#endif
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define constants
														
 
															-########################################################################
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define SHA1 Out Of Order Data Structures
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # LANE_DATA
														
 
															-###     name            size    align
														
 
															-FIELD   _job_in_lane,   8,      8       # pointer to job object
														
 
															-END_FIELDS
														
 
															-
														
 
															-_LANE_DATA_size = _FIELD_OFFSET
														
 
															-_LANE_DATA_align = _STRUCT_ALIGN
														
 
															-
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # SHA1_ARGS_X8
														
 
															-###     name            size    align
														
 
															-FIELD   _digest,        4*5*8,  16      # transposed digest
														
 
															-FIELD   _data_ptr,      8*8,    8       # array of pointers to data
														
 
															-END_FIELDS
														
 
															-
														
 
															-_SHA1_ARGS_X4_size =     _FIELD_OFFSET
														
 
															-_SHA1_ARGS_X4_align =    _STRUCT_ALIGN
														
 
															-_SHA1_ARGS_X8_size =     _FIELD_OFFSET
														
 
															-_SHA1_ARGS_X8_align =    _STRUCT_ALIGN
														
 
															-
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # MB_MGR
														
 
															-###     name            size    align
														
 
															-FIELD   _args,          _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
														
 
															-FIELD   _lens,          4*8,    8
														
 
															-FIELD   _unused_lanes,  8,      8
														
 
															-FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
														
 
															-END_FIELDS
														
 
															-
														
 
															-_MB_MGR_size =   _FIELD_OFFSET
														
 
															-_MB_MGR_align =  _STRUCT_ALIGN
														
 
															-
														
 
															-_args_digest    =     _args + _digest
														
 
															-_args_data_ptr  =     _args + _data_ptr
														
 
															-
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define constants
														
 
															-########################################################################
														
 
															-
														
 
															-#define STS_UNKNOWN             0
														
 
															-#define STS_BEING_PROCESSED     1
														
 
															-#define STS_COMPLETED           2
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define JOB_SHA1 structure
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # JOB_SHA1
														
 
															-
														
 
															-###     name                            size    align
														
 
															-FIELD   _buffer,                        8,      8       # pointer to buffer
														
 
															-FIELD   _len,                           4,      4       # length in bytes
														
 
															-FIELD   _result_digest,                 5*4,    32      # Digest (output)
														
 
															-FIELD   _status,                        4,      4
														
 
															-FIELD   _user_data,                     8,      8
														
 
															-END_FIELDS
														
 
															-
														
 
															-_JOB_SHA1_size =  _FIELD_OFFSET
														
 
															-_JOB_SHA1_align = _STRUCT_ALIGN
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
@@ -1,304 +0,0 @@
 
															-/*
														
 
															- * Flush routine for SHA1 multibuffer
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      James Guilford <james.guilford@intel.com>
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/frame.h>
														
 
															-#include "sha1_mb_mgr_datastruct.S"
														
 
															-
														
 
															-
														
 
															-.extern sha1_x8_avx2
														
 
															-
														
 
															-# LINUX register definitions
														
 
															-#define arg1    %rdi
														
 
															-#define arg2    %rsi
														
 
															-
														
 
															-# Common definitions
														
 
															-#define state   arg1
														
 
															-#define job     arg2
														
 
															-#define len2    arg2
														
 
															-
														
 
															-# idx must be a register not clobbered by sha1_x8_avx2
														
 
															-#define idx		%r8
														
 
															-#define DWORD_idx	%r8d
														
 
															-
														
 
															-#define unused_lanes    %rbx
														
 
															-#define lane_data       %rbx
														
 
															-#define tmp2            %rbx
														
 
															-#define tmp2_w		%ebx
														
 
															-
														
 
															-#define job_rax         %rax
														
 
															-#define tmp1            %rax
														
 
															-#define size_offset     %rax
														
 
															-#define tmp             %rax
														
 
															-#define start_offset    %rax
														
 
															-
														
 
															-#define tmp3            %arg1
														
 
															-
														
 
															-#define extra_blocks    %arg2
														
 
															-#define p               %arg2
														
 
															-
														
 
															-.macro LABEL prefix n
														
 
															-\prefix\n\():
														
 
															-.endm
														
 
															-
														
 
															-.macro JNE_SKIP i
														
 
															-jne     skip_\i
														
 
															-.endm
														
 
															-
														
 
															-.altmacro
														
 
															-.macro SET_OFFSET _offset
														
 
															-offset = \_offset
														
 
															-.endm
														
 
															-.noaltmacro
														
 
															-
														
 
															-# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
														
 
															-# arg 1 : rcx : state
														
 
															-ENTRY(sha1_mb_mgr_flush_avx2)
														
 
															-	FRAME_BEGIN
														
 
															-	push	%rbx
														
 
															-
														
 
															-	# If bit (32+3) is set, then all lanes are empty
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	bt      $32+3, unused_lanes
														
 
															-	jc      return_null
														
 
															-
														
 
															-	# find a lane with a non-null job
														
 
															-	xor     idx, idx
														
 
															-	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  one(%rip), idx
														
 
															-	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  two(%rip), idx
														
 
															-	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  three(%rip), idx
														
 
															-	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  four(%rip), idx
														
 
															-	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  five(%rip), idx
														
 
															-	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  six(%rip), idx
														
 
															-	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-	cmovne  seven(%rip), idx
														
 
															-
														
 
															-	# copy idx to empty lanes
														
 
															-copy_lane_data:
														
 
															-	offset =  (_args + _data_ptr)
														
 
															-	mov     offset(state,idx,8), tmp
														
 
															-
														
 
															-	I = 0
														
 
															-.rep 8
														
 
															-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq    $0, offset(state)
														
 
															-.altmacro
														
 
															-	JNE_SKIP %I
														
 
															-	offset =  (_args + _data_ptr + 8*I)
														
 
															-	mov     tmp, offset(state)
														
 
															-	offset =  (_lens + 4*I)
														
 
															-	movl    $0xFFFFFFFF, offset(state)
														
 
															-LABEL skip_ %I
														
 
															-	I = (I+1)
														
 
															-.noaltmacro
														
 
															-.endr
														
 
															-
														
 
															-	# Find min length
														
 
															-	vmovdqu _lens+0*16(state), %xmm0
														
 
															-	vmovdqu _lens+1*16(state), %xmm1
														
 
															-
														
 
															-	vpminud %xmm1, %xmm0, %xmm2     # xmm2 has {D,C,B,A}
														
 
															-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
														
 
															-	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
														
 
															-
														
 
															-	vmovd   %xmm2, DWORD_idx
														
 
															-	mov	idx, len2
														
 
															-	and	$0xF, idx
														
 
															-	shr	$4, len2
														
 
															-	jz	len_is_0
														
 
															-
														
 
															-	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
														
 
															-	vpshufd $0, %xmm2, %xmm2
														
 
															-
														
 
															-	vpsubd  %xmm2, %xmm0, %xmm0
														
 
															-	vpsubd  %xmm2, %xmm1, %xmm1
														
 
															-
														
 
															-	vmovdqu %xmm0, _lens+0*16(state)
														
 
															-	vmovdqu %xmm1, _lens+1*16(state)
														
 
															-
														
 
															-	# "state" and "args" are the same address, arg1
														
 
															-	# len is arg2
														
 
															-	call	sha1_x8_avx2
														
 
															-	# state and idx are intact
														
 
															-
														
 
															-
														
 
															-len_is_0:
														
 
															-	# process completed job "idx"
														
 
															-	imul    $_LANE_DATA_size, idx, lane_data
														
 
															-	lea     _ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov     _job_in_lane(lane_data), job_rax
														
 
															-	movq    $0, _job_in_lane(lane_data)
														
 
															-	movl    $STS_COMPLETED, _status(job_rax)
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	shl     $4, unused_lanes
														
 
															-	or      idx, unused_lanes
														
 
															-	mov     unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl	$0xFFFFFFFF, _lens(state, idx, 4)
														
 
															-
														
 
															-	vmovd    _args_digest(state , idx, 4) , %xmm0
														
 
															-	vpinsrd  $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd  $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd  $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	movl    _args_digest+4*32(state, idx, 4), tmp2_w
														
 
															-
														
 
															-	vmovdqu  %xmm0, _result_digest(job_rax)
														
 
															-	offset =  (_result_digest + 1*16)
														
 
															-	mov     tmp2_w, offset(job_rax)
														
 
															-
														
 
															-return:
														
 
															-	pop	%rbx
														
 
															-	FRAME_END
														
 
															-	ret
														
 
															-
														
 
															-return_null:
														
 
															-	xor     job_rax, job_rax
														
 
															-	jmp     return
														
 
															-ENDPROC(sha1_mb_mgr_flush_avx2)
														
 
															-
														
 
															-
														
 
															-#################################################################
														
 
															-
														
 
															-.align 16
														
 
															-ENTRY(sha1_mb_mgr_get_comp_job_avx2)
														
 
															-	push    %rbx
														
 
															-
														
 
															-	## if bit 32+3 is set, then all lanes are empty
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	bt      $(32+3), unused_lanes
														
 
															-	jc      .return_null
														
 
															-
														
 
															-	# Find min length
														
 
															-	vmovdqu _lens(state), %xmm0
														
 
															-	vmovdqu _lens+1*16(state), %xmm1
														
 
															-
														
 
															-	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
														
 
															-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
														
 
															-	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
														
 
															-
														
 
															-	vmovd   %xmm2, DWORD_idx
														
 
															-	test    $~0xF, idx
														
 
															-	jnz     .return_null
														
 
															-
														
 
															-	# process completed job "idx"
														
 
															-	imul    $_LANE_DATA_size, idx, lane_data
														
 
															-	lea     _ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov     _job_in_lane(lane_data), job_rax
														
 
															-	movq    $0,  _job_in_lane(lane_data)
														
 
															-	movl    $STS_COMPLETED, _status(job_rax)
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	shl     $4, unused_lanes
														
 
															-	or      idx, unused_lanes
														
 
															-	mov     unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl    $0xFFFFFFFF, _lens(state,  idx, 4)
														
 
															-
														
 
															-	vmovd   _args_digest(state, idx, 4), %xmm0
														
 
															-	vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	movl    _args_digest+4*32(state, idx, 4), tmp2_w
														
 
															-
														
 
															-	vmovdqu %xmm0, _result_digest(job_rax)
														
 
															-	movl    tmp2_w, _result_digest+1*16(job_rax)
														
 
															-
														
 
															-	pop     %rbx
														
 
															-
														
 
															-	ret
														
 
															-
														
 
															-.return_null:
														
 
															-	xor     job_rax, job_rax
														
 
															-	pop     %rbx
														
 
															-	ret
														
 
															-ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
														
 
															-
														
 
															-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
														
 
															-.align 16
														
 
															-clear_low_nibble:
														
 
															-.octa	0x000000000000000000000000FFFFFFF0
														
 
															-
														
 
															-.section	.rodata.cst8, "aM", @progbits, 8
														
 
															-.align 8
														
 
															-one:
														
 
															-.quad  1
														
 
															-two:
														
 
															-.quad  2
														
 
															-three:
														
 
															-.quad  3
														
 
															-four:
														
 
															-.quad  4
														
 
															-five:
														
 
															-.quad  5
														
 
															-six:
														
 
															-.quad  6
														
 
															-seven:
														
 
															-.quad  7
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
@@ -1,64 +0,0 @@
 
															-/*
														
 
															- * Initialization code for multi buffer SHA1 algorithm for AVX2
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include "sha1_mb_mgr.h"
														
 
															-
														
 
															-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
														
 
															-{
														
 
															-	unsigned int j;
														
 
															-	state->unused_lanes = 0xF76543210ULL;
														
 
															-	for (j = 0; j < 8; j++) {
														
 
															-		state->lens[j] = 0xFFFFFFFF;
														
 
															-		state->ldata[j].job_in_lane = NULL;
														
 
															-	}
														
 
															-}
														
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
@@ -1,209 +0,0 @@
 
															-/*
														
 
															- * Buffer submit code for multi buffer SHA1 algorithm
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      James Guilford <james.guilford@intel.com>
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/frame.h>
														
 
															-#include "sha1_mb_mgr_datastruct.S"
														
 
															-
														
 
															-
														
 
															-.extern sha1_x8_avx
														
 
															-
														
 
															-# LINUX register definitions
														
 
															-arg1    = %rdi
														
 
															-arg2    = %rsi
														
 
															-size_offset	= %rcx
														
 
															-tmp2		= %rcx
														
 
															-extra_blocks	= %rdx
														
 
															-
														
 
															-# Common definitions
														
 
															-#define state   arg1
														
 
															-#define job     %rsi
														
 
															-#define len2    arg2
														
 
															-#define p2      arg2
														
 
															-
														
 
															-# idx must be a register not clobberred by sha1_x8_avx2
														
 
															-idx		= %r8
														
 
															-DWORD_idx	= %r8d
														
 
															-last_len	= %r8
														
 
															-
														
 
															-p               = %r11
														
 
															-start_offset    = %r11
														
 
															-
														
 
															-unused_lanes    = %rbx
														
 
															-BYTE_unused_lanes = %bl
														
 
															-
														
 
															-job_rax         = %rax
														
 
															-len             = %rax
														
 
															-DWORD_len	= %eax
														
 
															-
														
 
															-lane            = %r12
														
 
															-tmp3            = %r12
														
 
															-
														
 
															-tmp             = %r9
														
 
															-DWORD_tmp	= %r9d
														
 
															-
														
 
															-lane_data       = %r10
														
 
															-
														
 
															-# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
														
 
															-# arg 1 : rcx : state
														
 
															-# arg 2 : rdx : job
														
 
															-ENTRY(sha1_mb_mgr_submit_avx2)
														
 
															-	FRAME_BEGIN
														
 
															-	push	%rbx
														
 
															-	push	%r12
														
 
															-
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	mov	unused_lanes, lane
														
 
															-	and	$0xF, lane
														
 
															-	shr     $4, unused_lanes
														
 
															-	imul    $_LANE_DATA_size, lane, lane_data
														
 
															-	movl    $STS_BEING_PROCESSED, _status(job)
														
 
															-	lea     _ldata(state, lane_data), lane_data
														
 
															-	mov     unused_lanes, _unused_lanes(state)
														
 
															-	movl    _len(job),  DWORD_len
														
 
															-
														
 
															-	mov	job, _job_in_lane(lane_data)
														
 
															-	shl	$4, len
														
 
															-	or	lane, len
														
 
															-
														
 
															-	movl    DWORD_len,  _lens(state , lane, 4)
														
 
															-
														
 
															-	# Load digest words from result_digest
														
 
															-	vmovdqu	_result_digest(job), %xmm0
														
 
															-	mov	_result_digest+1*16(job), DWORD_tmp
														
 
															-	vmovd    %xmm0, _args_digest(state, lane, 4)
														
 
															-	vpextrd  $1, %xmm0, _args_digest+1*32(state , lane, 4)
														
 
															-	vpextrd  $2, %xmm0, _args_digest+2*32(state , lane, 4)
														
 
															-	vpextrd  $3, %xmm0, _args_digest+3*32(state , lane, 4)
														
 
															-	movl    DWORD_tmp, _args_digest+4*32(state , lane, 4)
														
 
															-
														
 
															-	mov     _buffer(job), p
														
 
															-	mov     p, _args_data_ptr(state, lane, 8)
														
 
															-
														
 
															-	cmp     $0xF, unused_lanes
														
 
															-	jne     return_null
														
 
															-
														
 
															-start_loop:
														
 
															-	# Find min length
														
 
															-	vmovdqa _lens(state), %xmm0
														
 
															-	vmovdqa _lens+1*16(state), %xmm1
														
 
															-
														
 
															-	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
														
 
															-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
														
 
															-	vpalignr $4, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,x,E}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
														
 
															-
														
 
															-	vmovd   %xmm2, DWORD_idx
														
 
															-	mov    idx, len2
														
 
															-	and    $0xF, idx
														
 
															-	shr    $4, len2
														
 
															-	jz     len_is_0
														
 
															-
														
 
															-	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
														
 
															-	vpshufd $0, %xmm2, %xmm2
														
 
															-
														
 
															-	vpsubd  %xmm2, %xmm0, %xmm0
														
 
															-	vpsubd  %xmm2, %xmm1, %xmm1
														
 
															-
														
 
															-	vmovdqa %xmm0, _lens + 0*16(state)
														
 
															-	vmovdqa %xmm1, _lens + 1*16(state)
														
 
															-
														
 
															-
														
 
															-	# "state" and "args" are the same address, arg1
														
 
															-	# len is arg2
														
 
															-	call    sha1_x8_avx2
														
 
															-
														
 
															-	# state and idx are intact
														
 
															-
														
 
															-len_is_0:
														
 
															-	# process completed job "idx"
														
 
															-	imul    $_LANE_DATA_size, idx, lane_data
														
 
															-	lea     _ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov     _job_in_lane(lane_data), job_rax
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	movq    $0, _job_in_lane(lane_data)
														
 
															-	movl    $STS_COMPLETED, _status(job_rax)
														
 
															-	shl     $4, unused_lanes
														
 
															-	or      idx, unused_lanes
														
 
															-	mov     unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl	$0xFFFFFFFF, _lens(state, idx, 4)
														
 
															-
														
 
															-	vmovd    _args_digest(state, idx, 4), %xmm0
														
 
															-	vpinsrd  $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd  $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd  $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
														
 
															-	movl     _args_digest+4*32(state, idx, 4), DWORD_tmp
														
 
															-
														
 
															-	vmovdqu  %xmm0, _result_digest(job_rax)
														
 
															-	movl    DWORD_tmp, _result_digest+1*16(job_rax)
														
 
															-
														
 
															-return:
														
 
															-	pop	%r12
														
 
															-	pop	%rbx
														
 
															-	FRAME_END
														
 
															-	ret
														
 
															-
														
 
															-return_null:
														
 
															-	xor     job_rax, job_rax
														
 
															-	jmp     return
														
 
															-
														
 
															-ENDPROC(sha1_mb_mgr_submit_avx2)
														
 
															-
														
 
															-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
														
 
															-.align 16
														
 
															-clear_low_nibble:
														
 
															-	.octa	0x000000000000000000000000FFFFFFF0
														
--- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
@@ -1,492 +0,0 @@
 
															-/*
														
 
															- * Multi-buffer SHA1 algorithm hash compute routine
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      James Guilford <james.guilford@intel.com>
														
 
															- *	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2014 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include "sha1_mb_mgr_datastruct.S"
														
 
															-
														
 
															-## code to compute oct SHA1 using SSE-256
														
 
															-## outer calling routine takes care of save and restore of XMM registers
														
 
															-
														
 
															-## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15# ymm0-15
														
 
															-##
														
 
															-## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
														
 
															-## Linux preserves:                       rdi rbp r8
														
 
															-##
														
 
															-## clobbers ymm0-15
														
 
															-
														
 
															-
														
 
															-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
														
 
															-# "transpose" data in {r0...r7} using temps {t0...t1}
														
 
															-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
														
 
															-# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
														
 
															-# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
														
 
															-# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
														
 
															-# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
														
 
															-# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
														
 
															-# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
														
 
															-# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
														
 
															-# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
														
 
															-#
														
 
															-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
														
 
															-# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
														
 
															-# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
														
 
															-# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
														
 
															-# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
														
 
															-# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
														
 
															-# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
														
 
															-# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
														
 
															-# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
														
 
															-#
														
 
															-
														
 
															-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
														
 
															-	# process top half (r0..r3) {a...d}
														
 
															-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
														
 
															-	vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
														
 
															-	vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
														
 
															-	vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
														
 
															-	vshufps  $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
														
 
															-	vshufps  $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
														
 
															-	vshufps  $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
														
 
															-	vshufps  $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
														
 
															-
														
 
															-	# use r2 in place of t0
														
 
															-	# process bottom half (r4..r7) {e...h}
														
 
															-	vshufps  $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
														
 
															-	vshufps  $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
														
 
															-	vshufps  $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
														
 
															-	vshufps  $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
														
 
															-	vshufps  $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
														
 
															-	vshufps  $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
														
 
															-	vshufps  $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
														
 
															-	vshufps  $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
														
 
															-
														
 
															-	vperm2f128      $0x13, \r1, \r5, \r6  # h6...a6
														
 
															-	vperm2f128      $0x02, \r1, \r5, \r2  # h2...a2
														
 
															-	vperm2f128      $0x13, \r3, \r7, \r5  # h5...a5
														
 
															-	vperm2f128      $0x02, \r3, \r7, \r1  # h1...a1
														
 
															-	vperm2f128      $0x13, \r0, \r4, \r7  # h7...a7
														
 
															-	vperm2f128      $0x02, \r0, \r4, \r3  # h3...a3
														
 
															-	vperm2f128      $0x13, \t0, \t1, \r4  # h4...a4
														
 
															-	vperm2f128      $0x02, \t0, \t1, \r0  # h0...a0
														
 
															-
														
 
															-.endm
														
 
															-##
														
 
															-## Magic functions defined in FIPS 180-1
														
 
															-##
														
 
															-# macro MAGIC_F0 F,B,C,D,T   ## F = (D ^ (B & (C ^ D)))
														
 
															-.macro MAGIC_F0 regF regB regC regD regT
														
 
															-    vpxor \regD, \regC, \regF
														
 
															-    vpand \regB, \regF, \regF
														
 
															-    vpxor \regD, \regF, \regF
														
 
															-.endm
														
 
															-
														
 
															-# macro MAGIC_F1 F,B,C,D,T   ## F = (B ^ C ^ D)
														
 
															-.macro MAGIC_F1 regF regB regC regD regT
														
 
															-    vpxor  \regC, \regD, \regF
														
 
															-    vpxor  \regB, \regF, \regF
														
 
															-.endm
														
 
															-
														
 
															-# macro MAGIC_F2 F,B,C,D,T   ## F = ((B & C) | (B & D) | (C & D))
														
 
															-.macro MAGIC_F2 regF regB regC regD regT
														
 
															-    vpor  \regC, \regB, \regF
														
 
															-    vpand \regC, \regB, \regT
														
 
															-    vpand \regD, \regF, \regF
														
 
															-    vpor  \regT, \regF, \regF
														
 
															-.endm
														
 
															-
														
 
															-# macro MAGIC_F3 F,B,C,D,T   ## F = (B ^ C ^ D)
														
 
															-.macro MAGIC_F3 regF regB regC regD regT
														
 
															-    MAGIC_F1 \regF,\regB,\regC,\regD,\regT
														
 
															-.endm
														
 
															-
														
 
															-# PROLD reg, imm, tmp
														
 
															-.macro PROLD reg imm tmp
														
 
															-	vpsrld  $(32-\imm), \reg, \tmp
														
 
															-	vpslld  $\imm, \reg, \reg
														
 
															-	vpor    \tmp, \reg, \reg
														
 
															-.endm
														
 
															-
														
 
															-.macro PROLD_nd reg imm tmp src
														
 
															-	vpsrld  $(32-\imm), \src, \tmp
														
 
															-	vpslld  $\imm, \src, \reg
														
 
															-	vpor	\tmp, \reg, \reg
														
 
															-.endm
														
 
															-
														
 
															-.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
														
 
															-	vpaddd	\immCNT, \regE, \regE
														
 
															-	vpaddd	\memW*32(%rsp), \regE, \regE
														
 
															-	PROLD_nd \regT, 5, \regF, \regA
														
 
															-	vpaddd	\regT, \regE, \regE
														
 
															-	\MAGIC  \regF, \regB, \regC, \regD, \regT
														
 
															-        PROLD   \regB, 30, \regT
														
 
															-        vpaddd  \regF, \regE, \regE
														
 
															-.endm
														
 
															-
														
 
															-.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
														
 
															-	vpaddd	\immCNT, \regE, \regE
														
 
															-	offset = ((\memW - 14) & 15) * 32
														
 
															-	vmovdqu offset(%rsp), W14
														
 
															-	vpxor	W14, W16, W16
														
 
															-	offset = ((\memW -  8) & 15) * 32
														
 
															-	vpxor	offset(%rsp), W16, W16
														
 
															-	offset = ((\memW -  3) & 15) * 32
														
 
															-	vpxor	offset(%rsp), W16, W16
														
 
															-	vpsrld	$(32-1), W16, \regF
														
 
															-	vpslld	$1, W16, W16
														
 
															-	vpor	W16, \regF, \regF
														
 
															-
														
 
															-	ROTATE_W
														
 
															-
														
 
															-	offset = ((\memW - 0) & 15) * 32
														
 
															-	vmovdqu	\regF, offset(%rsp)
														
 
															-	vpaddd	\regF, \regE, \regE
														
 
															-	PROLD_nd \regT, 5, \regF, \regA
														
 
															-	vpaddd	\regT, \regE, \regE
														
 
															-	\MAGIC \regF,\regB,\regC,\regD,\regT      ## FUN  = MAGIC_Fi(B,C,D)
														
 
															-	PROLD   \regB,30, \regT
														
 
															-	vpaddd  \regF, \regE, \regE
														
 
															-.endm
														
 
															-
														
 
															-########################################################################
														
 
															-########################################################################
														
 
															-########################################################################
														
 
															-
														
 
															-## FRAMESZ plus pushes must be an odd multiple of 8
														
 
															-YMM_SAVE = (15-15)*32
														
 
															-FRAMESZ = 32*16 + YMM_SAVE
														
 
															-_YMM  =   FRAMESZ - YMM_SAVE
														
 
															-
														
 
															-#define VMOVPS   vmovups
														
 
															-
														
 
															-IDX  = %rax
														
 
															-inp0 = %r9
														
 
															-inp1 = %r10
														
 
															-inp2 = %r11
														
 
															-inp3 = %r12
														
 
															-inp4 = %r13
														
 
															-inp5 = %r14
														
 
															-inp6 = %r15
														
 
															-inp7 = %rcx
														
 
															-arg1 = %rdi
														
 
															-arg2 = %rsi
														
 
															-RSP_SAVE = %rdx
														
 
															-
														
 
															-# ymm0 A
														
 
															-# ymm1 B
														
 
															-# ymm2 C
														
 
															-# ymm3 D
														
 
															-# ymm4 E
														
 
															-# ymm5         F       AA
														
 
															-# ymm6         T0      BB
														
 
															-# ymm7         T1      CC
														
 
															-# ymm8         T2      DD
														
 
															-# ymm9         T3      EE
														
 
															-# ymm10                T4      TMP
														
 
															-# ymm11                T5      FUN
														
 
															-# ymm12                T6      K
														
 
															-# ymm13                T7      W14
														
 
															-# ymm14                T8      W15
														
 
															-# ymm15                T9      W16
														
 
															-
														
 
															-
														
 
															-A  =     %ymm0
														
 
															-B  =     %ymm1
														
 
															-C  =     %ymm2
														
 
															-D  =     %ymm3
														
 
															-E  =     %ymm4
														
 
															-F  =     %ymm5
														
 
															-T0 =	 %ymm6
														
 
															-T1 =     %ymm7
														
 
															-T2 =     %ymm8
														
 
															-T3 =     %ymm9
														
 
															-T4 =     %ymm10
														
 
															-T5 =     %ymm11
														
 
															-T6 =     %ymm12
														
 
															-T7 =     %ymm13
														
 
															-T8  =     %ymm14
														
 
															-T9  =     %ymm15
														
 
															-
														
 
															-AA  =     %ymm5
														
 
															-BB  =     %ymm6
														
 
															-CC  =     %ymm7
														
 
															-DD  =     %ymm8
														
 
															-EE  =     %ymm9
														
 
															-TMP =     %ymm10
														
 
															-FUN =     %ymm11
														
 
															-K   =     %ymm12
														
 
															-W14 =     %ymm13
														
 
															-W15 =     %ymm14
														
 
															-W16 =     %ymm15
														
 
															-
														
 
															-.macro ROTATE_ARGS
														
 
															- TMP_ = E
														
 
															- E = D
														
 
															- D = C
														
 
															- C = B
														
 
															- B = A
														
 
															- A = TMP_
														
 
															-.endm
														
 
															-
														
 
															-.macro ROTATE_W
														
 
															-TMP_  = W16
														
 
															-W16  = W15
														
 
															-W15  = W14
														
 
															-W14  = TMP_
														
 
															-.endm
														
 
															-
														
 
															-# 8 streams x 5 32bit words per digest x 4 bytes per word
														
 
															-#define DIGEST_SIZE (8*5*4)
														
 
															-
														
 
															-.align 32
														
 
															-
														
 
															-# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
														
 
															-# arg 1 : pointer to array[4] of pointer to input data
														
 
															-# arg 2 : size (in blocks) ;; assumed to be >= 1
														
 
															-#
														
 
															-ENTRY(sha1_x8_avx2)
														
 
															-
														
 
															-	# save callee-saved clobbered registers to comply with C function ABI
														
 
															-	push	%r12
														
 
															-	push	%r13
														
 
															-	push	%r14
														
 
															-	push	%r15
														
 
															-
														
 
															-	#save rsp
														
 
															-	mov	%rsp, RSP_SAVE
														
 
															-	sub     $FRAMESZ, %rsp
														
 
															-
														
 
															-	#align rsp to 32 Bytes
														
 
															-	and	$~0x1F, %rsp
														
 
															-
														
 
															-	## Initialize digests
														
 
															-	vmovdqu  0*32(arg1), A
														
 
															-	vmovdqu  1*32(arg1), B
														
 
															-	vmovdqu  2*32(arg1), C
														
 
															-	vmovdqu  3*32(arg1), D
														
 
															-	vmovdqu  4*32(arg1), E
														
 
															-
														
 
															-	## transpose input onto stack
														
 
															-	mov     _data_ptr+0*8(arg1),inp0
														
 
															-	mov     _data_ptr+1*8(arg1),inp1
														
 
															-	mov     _data_ptr+2*8(arg1),inp2
														
 
															-	mov     _data_ptr+3*8(arg1),inp3
														
 
															-	mov     _data_ptr+4*8(arg1),inp4
														
 
															-	mov     _data_ptr+5*8(arg1),inp5
														
 
															-	mov     _data_ptr+6*8(arg1),inp6
														
 
															-	mov     _data_ptr+7*8(arg1),inp7
														
 
															-
														
 
															-	xor     IDX, IDX
														
 
															-lloop:
														
 
															-	vmovdqu  PSHUFFLE_BYTE_FLIP_MASK(%rip), F
														
 
															-	I=0
														
 
															-.rep 2
														
 
															-	VMOVPS   (inp0, IDX), T0
														
 
															-	VMOVPS   (inp1, IDX), T1
														
 
															-	VMOVPS   (inp2, IDX), T2
														
 
															-	VMOVPS   (inp3, IDX), T3
														
 
															-	VMOVPS   (inp4, IDX), T4
														
 
															-	VMOVPS   (inp5, IDX), T5
														
 
															-	VMOVPS   (inp6, IDX), T6
														
 
															-	VMOVPS   (inp7, IDX), T7
														
 
															-
														
 
															-	TRANSPOSE8       T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
														
 
															-	vpshufb  F, T0, T0
														
 
															-	vmovdqu  T0, (I*8)*32(%rsp)
														
 
															-	vpshufb  F, T1, T1
														
 
															-	vmovdqu  T1, (I*8+1)*32(%rsp)
														
 
															-	vpshufb  F, T2, T2
														
 
															-	vmovdqu  T2, (I*8+2)*32(%rsp)
														
 
															-	vpshufb  F, T3, T3
														
 
															-	vmovdqu  T3, (I*8+3)*32(%rsp)
														
 
															-	vpshufb  F, T4, T4
														
 
															-	vmovdqu  T4, (I*8+4)*32(%rsp)
														
 
															-	vpshufb  F, T5, T5
														
 
															-	vmovdqu  T5, (I*8+5)*32(%rsp)
														
 
															-	vpshufb  F, T6, T6
														
 
															-	vmovdqu  T6, (I*8+6)*32(%rsp)
														
 
															-	vpshufb  F, T7, T7
														
 
															-	vmovdqu  T7, (I*8+7)*32(%rsp)
														
 
															-	add     $32, IDX
														
 
															-	I = (I+1)
														
 
															-.endr
														
 
															-	# save old digests
														
 
															-	vmovdqu  A,AA
														
 
															-	vmovdqu  B,BB
														
 
															-	vmovdqu  C,CC
														
 
															-	vmovdqu  D,DD
														
 
															-	vmovdqu  E,EE
														
 
															-
														
 
															-##
														
 
															-## perform 0-79 steps
														
 
															-##
														
 
															-	vmovdqu  K00_19(%rip), K
														
 
															-## do rounds 0...15
														
 
															-	I = 0
														
 
															-.rep 16
														
 
															-	SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
														
 
															-	ROTATE_ARGS
														
 
															-	I = (I+1)
														
 
															-.endr
														
 
															-
														
 
															-## do rounds 16...19
														
 
															-	vmovdqu  ((16 - 16) & 15) * 32 (%rsp), W16
														
 
															-	vmovdqu  ((16 - 15) & 15) * 32 (%rsp), W15
														
 
															-.rep 4
														
 
															-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
														
 
															-	ROTATE_ARGS
														
 
															-	I = (I+1)
														
 
															-.endr
														
 
															-
														
 
															-## do rounds 20...39
														
 
															-	vmovdqu  K20_39(%rip), K
														
 
															-.rep 20
														
 
															-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
														
 
															-	ROTATE_ARGS
														
 
															-	I = (I+1)
														
 
															-.endr
														
 
															-
														
 
															-## do rounds 40...59
														
 
															-	vmovdqu  K40_59(%rip), K
														
 
															-.rep 20
														
 
															-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
														
 
															-	ROTATE_ARGS
														
 
															-	I = (I+1)
														
 
															-.endr
														
 
															-
														
 
															-## do rounds 60...79
														
 
															-	vmovdqu  K60_79(%rip), K
														
 
															-.rep 20
														
 
															-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
														
 
															-	ROTATE_ARGS
														
 
															-	I = (I+1)
														
 
															-.endr
														
 
															-
														
 
															-	vpaddd   AA,A,A
														
 
															-	vpaddd   BB,B,B
														
 
															-	vpaddd   CC,C,C
														
 
															-	vpaddd   DD,D,D
														
 
															-	vpaddd   EE,E,E
														
 
															-
														
 
															-	sub     $1, arg2
														
 
															-	jne     lloop
														
 
															-
														
 
															-	# write out digests
														
 
															-	vmovdqu  A, 0*32(arg1)
														
 
															-	vmovdqu  B, 1*32(arg1)
														
 
															-	vmovdqu  C, 2*32(arg1)
														
 
															-	vmovdqu  D, 3*32(arg1)
														
 
															-	vmovdqu  E, 4*32(arg1)
														
 
															-
														
 
															-	# update input pointers
														
 
															-	add     IDX, inp0
														
 
															-	add     IDX, inp1
														
 
															-	add     IDX, inp2
														
 
															-	add     IDX, inp3
														
 
															-	add     IDX, inp4
														
 
															-	add     IDX, inp5
														
 
															-	add     IDX, inp6
														
 
															-	add     IDX, inp7
														
 
															-	mov     inp0, _data_ptr (arg1)
														
 
															-	mov     inp1, _data_ptr + 1*8(arg1)
														
 
															-	mov     inp2, _data_ptr + 2*8(arg1)
														
 
															-	mov     inp3, _data_ptr + 3*8(arg1)
														
 
															-	mov     inp4, _data_ptr + 4*8(arg1)
														
 
															-	mov     inp5, _data_ptr + 5*8(arg1)
														
 
															-	mov     inp6, _data_ptr + 6*8(arg1)
														
 
															-	mov     inp7, _data_ptr + 7*8(arg1)
														
 
															-
														
 
															-	################
														
 
															-	## Postamble
														
 
															-
														
 
															-	mov     RSP_SAVE, %rsp
														
 
															-
														
 
															-	# restore callee-saved clobbered registers
														
 
															-	pop	%r15
														
 
															-	pop	%r14
														
 
															-	pop	%r13
														
 
															-	pop	%r12
														
 
															-
														
 
															-	ret
														
 
															-ENDPROC(sha1_x8_avx2)
														
 
															-
														
 
															-
														
 
															-.section	.rodata.cst32.K00_19, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-K00_19:
														
 
															-.octa 0x5A8279995A8279995A8279995A827999
														
 
															-.octa 0x5A8279995A8279995A8279995A827999
														
 
															-
														
 
															-.section	.rodata.cst32.K20_39, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-K20_39:
														
 
															-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
														
 
															-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
														
 
															-
														
 
															-.section	.rodata.cst32.K40_59, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-K40_59:
														
 
															-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
														
 
															-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
														
 
															-
														
 
															-.section	.rodata.cst32.K60_79, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-K60_79:
														
 
															-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
														
 
															-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
														
 
															-
														
 
															-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-PSHUFFLE_BYTE_FLIP_MASK:
														
 
															-.octa 0x0c0d0e0f08090a0b0405060700010203
														
 
															-.octa 0x0c0d0e0f08090a0b0405060700010203
														
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -1,14 +0,0 @@
 
															-# SPDX-License-Identifier: GPL-2.0
														
 
															-#
														
 
															-# Arch-specific CryptoAPI modules.
														
 
															-#
														
 
															-
														
 
															-OBJECT_FILES_NON_STANDARD := y
														
 
															-
														
 
															-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
														
 
															-                                $(comma)4)$(comma)%ymm2,yes,no)
														
 
															-ifeq ($(avx2_supported),yes)
														
 
															-	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
														
 
															-	sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
														
 
															-	     sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
														
 
															-endif
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -1,1013 +0,0 @@
 
															-/*
														
 
															- * Multi buffer SHA256 algorithm Glue Code
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
														
 
															-
														
 
															-#include <crypto/internal/hash.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/mm.h>
														
 
															-#include <linux/cryptohash.h>
														
 
															-#include <linux/types.h>
														
 
															-#include <linux/list.h>
														
 
															-#include <crypto/scatterwalk.h>
														
 
															-#include <crypto/sha.h>
														
 
															-#include <crypto/mcryptd.h>
														
 
															-#include <crypto/crypto_wq.h>
														
 
															-#include <asm/byteorder.h>
														
 
															-#include <linux/hardirq.h>
														
 
															-#include <asm/fpu/api.h>
														
 
															-#include "sha256_mb_ctx.h"
														
 
															-
														
 
															-#define FLUSH_INTERVAL 1000 /* in usec */
														
 
															-
														
 
															-static struct mcryptd_alg_state sha256_mb_alg_state;
														
 
															-
														
 
															-struct sha256_mb_ctx {
														
 
															-	struct mcryptd_ahash *mcryptd_tfm;
														
 
															-};
														
 
															-
														
 
															-static inline struct mcryptd_hash_request_ctx
														
 
															-		*cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
														
 
															-{
														
 
															-	struct ahash_request *areq;
														
 
															-
														
 
															-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
														
 
															-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-}
														
 
															-
														
 
															-static inline struct ahash_request
														
 
															-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
														
 
															-{
														
 
															-	return container_of((void *) ctx, struct ahash_request, __ctx);
														
 
															-}
														
 
															-
														
 
															-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
														
 
															-				struct ahash_request *areq)
														
 
															-{
														
 
															-	rctx->flag = HASH_UPDATE;
														
 
															-}
														
 
															-
														
 
															-static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
														
 
															-static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
														
 
															-			(struct sha256_mb_mgr *state, struct job_sha256 *job);
														
 
															-static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
														
 
															-			(struct sha256_mb_mgr *state);
														
 
															-static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
														
 
															-			(struct sha256_mb_mgr *state);
														
 
															-
														
 
															-inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
														
 
															-			 uint64_t total_len)
														
 
															-{
														
 
															-	uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
														
 
															-
														
 
															-	memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
														
 
															-	padblock[i] = 0x80;
														
 
															-
														
 
															-	i += ((SHA256_BLOCK_SIZE - 1) &
														
 
															-	      (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
														
 
															-	     + 1 + SHA256_PADLENGTHFIELD_SIZE;
														
 
															-
														
 
															-#if SHA256_PADLENGTHFIELD_SIZE == 16
														
 
															-	*((uint64_t *) &padblock[i - 16]) = 0;
														
 
															-#endif
														
 
															-
														
 
															-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
														
 
															-
														
 
															-	/* Number of extra blocks to hash */
														
 
															-	return i >> SHA256_LOG2_BLOCK_SIZE;
														
 
															-}
														
 
															-
														
 
															-static struct sha256_hash_ctx
														
 
															-		*sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
														
 
															-					struct sha256_hash_ctx *ctx)
														
 
															-{
														
 
															-	while (ctx) {
														
 
															-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
														
 
															-			/* Clear PROCESSING bit */
														
 
															-			ctx->status = HASH_CTX_STS_COMPLETE;
														
 
															-			return ctx;
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra blocks are empty, begin hashing what remains
														
 
															-		 * in the user's buffer.
														
 
															-		 */
														
 
															-		if (ctx->partial_block_buffer_length == 0 &&
														
 
															-		    ctx->incoming_buffer_length) {
														
 
															-
														
 
															-			const void *buffer = ctx->incoming_buffer;
														
 
															-			uint32_t len = ctx->incoming_buffer_length;
														
 
															-			uint32_t copy_len;
														
 
															-
														
 
															-			/*
														
 
															-			 * Only entire blocks can be hashed.
														
 
															-			 * Copy remainder to extra blocks buffer.
														
 
															-			 */
														
 
															-			copy_len = len & (SHA256_BLOCK_SIZE-1);
														
 
															-
														
 
															-			if (copy_len) {
														
 
															-				len -= copy_len;
														
 
															-				memcpy(ctx->partial_block_buffer,
														
 
															-				       ((const char *) buffer + len),
														
 
															-				       copy_len);
														
 
															-				ctx->partial_block_buffer_length = copy_len;
														
 
															-			}
														
 
															-
														
 
															-			ctx->incoming_buffer_length = 0;
														
 
															-
														
 
															-			/* len should be a multiple of the block size now */
														
 
															-			assert((len % SHA256_BLOCK_SIZE) == 0);
														
 
															-
														
 
															-			/* Set len to the number of blocks to be hashed */
														
 
															-			len >>= SHA256_LOG2_BLOCK_SIZE;
														
 
															-
														
 
															-			if (len) {
														
 
															-
														
 
															-				ctx->job.buffer = (uint8_t *) buffer;
														
 
															-				ctx->job.len = len;
														
 
															-				ctx = (struct sha256_hash_ctx *)
														
 
															-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-				continue;
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra blocks are not empty, then we are
														
 
															-		 * either on the last block(s) or we need more
														
 
															-		 * user input before continuing.
														
 
															-		 */
														
 
															-		if (ctx->status & HASH_CTX_STS_LAST) {
														
 
															-
														
 
															-			uint8_t *buf = ctx->partial_block_buffer;
														
 
															-			uint32_t n_extra_blocks =
														
 
															-				sha256_pad(buf, ctx->total_length);
														
 
															-
														
 
															-			ctx->status = (HASH_CTX_STS_PROCESSING |
														
 
															-				       HASH_CTX_STS_COMPLETE);
														
 
															-			ctx->job.buffer = buf;
														
 
															-			ctx->job.len = (uint32_t) n_extra_blocks;
														
 
															-			ctx = (struct sha256_hash_ctx *)
														
 
															-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-			continue;
														
 
															-		}
														
 
															-
														
 
															-		ctx->status = HASH_CTX_STS_IDLE;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	return NULL;
														
 
															-}
														
 
															-
														
 
															-static struct sha256_hash_ctx
														
 
															-		*sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
														
 
															-{
														
 
															-	/*
														
 
															-	 * If get_comp_job returns NULL, there are no jobs complete.
														
 
															-	 * If get_comp_job returns a job, verify that it is safe to return to
														
 
															-	 * the user. If it is not ready, resubmit the job to finish processing.
														
 
															-	 * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
														
 
															-	 * returned. Otherwise, all jobs currently being managed by the
														
 
															-	 * hash_ctx_mgr still need processing.
														
 
															-	 */
														
 
															-	struct sha256_hash_ctx *ctx;
														
 
															-
														
 
															-	ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
														
 
															-	return sha256_ctx_mgr_resubmit(mgr, ctx);
														
 
															-}
														
 
															-
														
 
															-static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
														
 
															-{
														
 
															-	sha256_job_mgr_init(&mgr->mgr);
														
 
															-}
														
 
															-
														
 
															-static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
														
 
															-					  struct sha256_hash_ctx *ctx,
														
 
															-					  const void *buffer,
														
 
															-					  uint32_t len,
														
 
															-					  int flags)
														
 
															-{
														
 
															-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
														
 
															-		/* User should not pass anything other than UPDATE or LAST */
														
 
															-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
														
 
															-		/* Cannot submit to a currently processing job. */
														
 
															-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
														
 
															-		/* Cannot update a finished job. */
														
 
															-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	/* If we made it here, there was no error during this call to submit */
														
 
															-	ctx->error = HASH_CTX_ERROR_NONE;
														
 
															-
														
 
															-	/* Store buffer ptr info from user */
														
 
															-	ctx->incoming_buffer = buffer;
														
 
															-	ctx->incoming_buffer_length = len;
														
 
															-
														
 
															-	/*
														
 
															-	 * Store the user's request flags and mark this ctx as currently
														
 
															-	 * being processed.
														
 
															-	 */
														
 
															-	ctx->status = (flags & HASH_LAST) ?
														
 
															-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
														
 
															-			HASH_CTX_STS_PROCESSING;
														
 
															-
														
 
															-	/* Advance byte counter */
														
 
															-	ctx->total_length += len;
														
 
															-
														
 
															-	/*
														
 
															-	 * If there is anything currently buffered in the extra blocks,
														
 
															-	 * append to it until it contains a whole block.
														
 
															-	 * Or if the user's buffer contains less than a whole block,
														
 
															-	 * append as much as possible to the extra block.
														
 
															-	 */
														
 
															-	if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
														
 
															-		/*
														
 
															-		 * Compute how many bytes to copy from user buffer into
														
 
															-		 * extra block
														
 
															-		 */
														
 
															-		uint32_t copy_len = SHA256_BLOCK_SIZE -
														
 
															-					ctx->partial_block_buffer_length;
														
 
															-		if (len < copy_len)
														
 
															-			copy_len = len;
														
 
															-
														
 
															-		if (copy_len) {
														
 
															-			/* Copy and update relevant pointers and counters */
														
 
															-			memcpy(
														
 
															-		&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
														
 
															-				buffer, copy_len);
														
 
															-
														
 
															-			ctx->partial_block_buffer_length += copy_len;
														
 
															-			ctx->incoming_buffer = (const void *)
														
 
															-					((const char *)buffer + copy_len);
														
 
															-			ctx->incoming_buffer_length = len - copy_len;
														
 
															-		}
														
 
															-
														
 
															-		/* The extra block should never contain more than 1 block */
														
 
															-		assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra block buffer contains exactly 1 block,
														
 
															-		 * it can be hashed.
														
 
															-		 */
														
 
															-		if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
														
 
															-			ctx->partial_block_buffer_length = 0;
														
 
															-
														
 
															-			ctx->job.buffer = ctx->partial_block_buffer;
														
 
															-			ctx->job.len = 1;
														
 
															-			ctx = (struct sha256_hash_ctx *)
														
 
															-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	return sha256_ctx_mgr_resubmit(mgr, ctx);
														
 
															-}
														
 
															-
														
 
															-static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
														
 
															-{
														
 
															-	struct sha256_hash_ctx *ctx;
														
 
															-
														
 
															-	while (1) {
														
 
															-		ctx = (struct sha256_hash_ctx *)
														
 
															-					sha256_job_mgr_flush(&mgr->mgr);
														
 
															-
														
 
															-		/* If flush returned 0, there are no more jobs in flight. */
														
 
															-		if (!ctx)
														
 
															-			return NULL;
														
 
															-
														
 
															-		/*
														
 
															-		 * If flush returned a job, resubmit the job to finish
														
 
															-		 * processing.
														
 
															-		 */
														
 
															-		ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
														
 
															-
														
 
															-		/*
														
 
															-		 * If sha256_ctx_mgr_resubmit returned a job, it is ready to
														
 
															-		 * be returned. Otherwise, all jobs currently being managed by
														
 
															-		 * the sha256_ctx_mgr still need processing. Loop.
														
 
															-		 */
														
 
															-		if (ctx)
														
 
															-			return ctx;
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_init(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	hash_ctx_init(sctx);
														
 
															-	sctx->job.result_digest[0] = SHA256_H0;
														
 
															-	sctx->job.result_digest[1] = SHA256_H1;
														
 
															-	sctx->job.result_digest[2] = SHA256_H2;
														
 
															-	sctx->job.result_digest[3] = SHA256_H3;
														
 
															-	sctx->job.result_digest[4] = SHA256_H4;
														
 
															-	sctx->job.result_digest[5] = SHA256_H5;
														
 
															-	sctx->job.result_digest[6] = SHA256_H6;
														
 
															-	sctx->job.result_digest[7] = SHA256_H7;
														
 
															-	sctx->total_length = 0;
														
 
															-	sctx->partial_block_buffer_length = 0;
														
 
															-	sctx->status = HASH_CTX_STS_IDLE;
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
														
 
															-{
														
 
															-	int	i;
														
 
															-	struct	sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
														
 
															-	__be32	*dst = (__be32 *) rctx->out;
														
 
															-
														
 
															-	for (i = 0; i < 8; ++i)
														
 
															-		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
														
 
															-			struct mcryptd_alg_cstate *cstate, bool flush)
														
 
															-{
														
 
															-	int	flag = HASH_UPDATE;
														
 
															-	int	nbytes, err = 0;
														
 
															-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
														
 
															-	struct sha256_hash_ctx *sha_ctx;
														
 
															-
														
 
															-	/* more work ? */
														
 
															-	while (!(rctx->flag & HASH_DONE)) {
														
 
															-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
														
 
															-		if (nbytes < 0) {
														
 
															-			err = nbytes;
														
 
															-			goto out;
														
 
															-		}
														
 
															-		/* check if the walk is done */
														
 
															-		if (crypto_ahash_walk_last(&rctx->walk)) {
														
 
															-			rctx->flag |= HASH_DONE;
														
 
															-			if (rctx->flag & HASH_FINAL)
														
 
															-				flag |= HASH_LAST;
														
 
															-
														
 
															-		}
														
 
															-		sha_ctx = (struct sha256_hash_ctx *)
														
 
															-						ahash_request_ctx(&rctx->areq);
														
 
															-		kernel_fpu_begin();
														
 
															-		sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
														
 
															-						rctx->walk.data, nbytes, flag);
														
 
															-		if (!sha_ctx) {
														
 
															-			if (flush)
														
 
															-				sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
														
 
															-		}
														
 
															-		kernel_fpu_end();
														
 
															-		if (sha_ctx)
														
 
															-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		else {
														
 
															-			rctx = NULL;
														
 
															-			goto out;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	/* copy the results */
														
 
															-	if (rctx->flag & HASH_FINAL)
														
 
															-		sha256_mb_set_results(rctx);
														
 
															-
														
 
															-out:
														
 
															-	*ret_rctx = rctx;
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
														
 
															-			    struct mcryptd_alg_cstate *cstate,
														
 
															-			    int err)
														
 
															-{
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha256_hash_ctx *sha_ctx;
														
 
															-	struct mcryptd_hash_request_ctx *req_ctx;
														
 
															-	int ret;
														
 
															-
														
 
															-	/* remove from work list */
														
 
															-	spin_lock(&cstate->work_lock);
														
 
															-	list_del(&rctx->waiter);
														
 
															-	spin_unlock(&cstate->work_lock);
														
 
															-
														
 
															-	if (irqs_disabled())
														
 
															-		rctx->complete(&req->base, err);
														
 
															-	else {
														
 
															-		local_bh_disable();
														
 
															-		rctx->complete(&req->base, err);
														
 
															-		local_bh_enable();
														
 
															-	}
														
 
															-
														
 
															-	/* check to see if there are other jobs that are done */
														
 
															-	sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
														
 
															-	while (sha_ctx) {
														
 
															-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		ret = sha_finish_walk(&req_ctx, cstate, false);
														
 
															-		if (req_ctx) {
														
 
															-			spin_lock(&cstate->work_lock);
														
 
															-			list_del(&req_ctx->waiter);
														
 
															-			spin_unlock(&cstate->work_lock);
														
 
															-
														
 
															-			req = cast_mcryptd_ctx_to_req(req_ctx);
														
 
															-			if (irqs_disabled())
														
 
															-				req_ctx->complete(&req->base, ret);
														
 
															-			else {
														
 
															-				local_bh_disable();
														
 
															-				req_ctx->complete(&req->base, ret);
														
 
															-				local_bh_enable();
														
 
															-			}
														
 
															-		}
														
 
															-		sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
														
 
															-			     struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	unsigned long next_flush;
														
 
															-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
														
 
															-
														
 
															-	/* initialize tag */
														
 
															-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
														
 
															-	rctx->tag.seq_num = cstate->next_seq_num++;
														
 
															-	next_flush = rctx->tag.arrival + delay;
														
 
															-	rctx->tag.expire = next_flush;
														
 
															-
														
 
															-	spin_lock(&cstate->work_lock);
														
 
															-	list_add_tail(&rctx->waiter, &cstate->work_list);
														
 
															-	spin_unlock(&cstate->work_lock);
														
 
															-
														
 
															-	mcryptd_arm_flusher(cstate, delay);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_update(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha256_hash_ctx *sha_ctx;
														
 
															-	int ret = 0, nbytes;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
														
 
															-
														
 
															-	if (nbytes < 0) {
														
 
															-		ret = nbytes;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	if (crypto_ahash_walk_last(&rctx->walk))
														
 
															-		rctx->flag |= HASH_DONE;
														
 
															-
														
 
															-	/* submit */
														
 
															-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	sha256_mb_add_list(rctx, cstate);
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
														
 
															-							nbytes, HASH_UPDATE);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_finup(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha256_hash_ctx *sha_ctx;
														
 
															-	int ret = 0, flag = HASH_UPDATE, nbytes;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
														
 
															-
														
 
															-	if (nbytes < 0) {
														
 
															-		ret = nbytes;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	if (crypto_ahash_walk_last(&rctx->walk)) {
														
 
															-		rctx->flag |= HASH_DONE;
														
 
															-		flag = HASH_LAST;
														
 
															-	}
														
 
															-
														
 
															-	/* submit */
														
 
															-	rctx->flag |= HASH_FINAL;
														
 
															-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	sha256_mb_add_list(rctx, cstate);
														
 
															-
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
														
 
															-								nbytes, flag);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_final(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-			container_of(areq, struct mcryptd_hash_request_ctx,
														
 
															-			areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct sha256_hash_ctx *sha_ctx;
														
 
															-	int ret = 0;
														
 
															-	u8 data;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	rctx->flag |= HASH_DONE | HASH_FINAL;
														
 
															-
														
 
															-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	/* flag HASH_FINAL and 0 data size */
														
 
															-	sha256_mb_add_list(rctx, cstate);
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
														
 
															-								HASH_LAST);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_export(struct ahash_request *areq, void *out)
														
 
															-{
														
 
															-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	memcpy(out, sctx, sizeof(*sctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_import(struct ahash_request *areq, const void *in)
														
 
															-{
														
 
															-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	memcpy(sctx, in, sizeof(*sctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct mcryptd_ahash *mcryptd_tfm;
														
 
															-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-	struct mcryptd_hash_ctx *mctx;
														
 
															-
														
 
															-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
														
 
															-						CRYPTO_ALG_INTERNAL,
														
 
															-						CRYPTO_ALG_INTERNAL);
														
 
															-	if (IS_ERR(mcryptd_tfm))
														
 
															-		return PTR_ERR(mcryptd_tfm);
														
 
															-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
														
 
															-	mctx->alg_state = &sha256_mb_alg_state;
														
 
															-	ctx->mcryptd_tfm = mcryptd_tfm;
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				sizeof(struct ahash_request) +
														
 
															-				crypto_ahash_reqsize(&mcryptd_tfm->base));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	mcryptd_free_ahash(ctx->mcryptd_tfm);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				sizeof(struct ahash_request) +
														
 
															-				sizeof(struct sha256_hash_ctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	mcryptd_free_ahash(ctx->mcryptd_tfm);
														
 
															-}
														
 
															-
														
 
															-static struct ahash_alg sha256_mb_areq_alg = {
														
 
															-	.init		=	sha256_mb_init,
														
 
															-	.update		=	sha256_mb_update,
														
 
															-	.final		=	sha256_mb_final,
														
 
															-	.finup		=	sha256_mb_finup,
														
 
															-	.export		=	sha256_mb_export,
														
 
															-	.import		=	sha256_mb_import,
														
 
															-	.halg		=	{
														
 
															-	.digestsize	=	SHA256_DIGEST_SIZE,
														
 
															-	.statesize	=	sizeof(struct sha256_hash_ctx),
														
 
															-		.base		=	{
														
 
															-			.cra_name	 = "__sha256-mb",
														
 
															-			.cra_driver_name = "__intel_sha256-mb",
														
 
															-			.cra_priority	 = 100,
														
 
															-			/*
														
 
															-			 * use ASYNC flag as some buffers in multi-buffer
														
 
															-			 * algo may not have completed before hashing thread
														
 
															-			 * sleep
														
 
															-			 */
														
 
															-			.cra_flags	= CRYPTO_ALG_ASYNC |
														
 
															-					  CRYPTO_ALG_INTERNAL,
														
 
															-			.cra_blocksize	= SHA256_BLOCK_SIZE,
														
 
															-			.cra_module	= THIS_MODULE,
														
 
															-			.cra_list	= LIST_HEAD_INIT
														
 
															-					(sha256_mb_areq_alg.halg.base.cra_list),
														
 
															-			.cra_init	= sha256_mb_areq_init_tfm,
														
 
															-			.cra_exit	= sha256_mb_areq_exit_tfm,
														
 
															-			.cra_ctxsize	= sizeof(struct sha256_hash_ctx),
														
 
															-		}
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-static int sha256_mb_async_init(struct ahash_request *req)
														
 
															-{
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_init(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_update(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_update(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_finup(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_finup(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_final(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_final(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_digest(struct ahash_request *req)
														
 
															-{
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_digest(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_export(struct ahash_request *req, void *out)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_export(mcryptd_req, out);
														
 
															-}
														
 
															-
														
 
															-static int sha256_mb_async_import(struct ahash_request *req, const void *in)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
														
 
															-	struct mcryptd_hash_request_ctx *rctx;
														
 
															-	struct ahash_request *areq;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	rctx = ahash_request_ctx(mcryptd_req);
														
 
															-	areq = &rctx->areq;
														
 
															-
														
 
															-	ahash_request_set_tfm(areq, child);
														
 
															-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															-					rctx->complete, req);
														
 
															-
														
 
															-	return crypto_ahash_import(mcryptd_req, in);
														
 
															-}
														
 
															-
														
 
															-static struct ahash_alg sha256_mb_async_alg = {
														
 
															-	.init           = sha256_mb_async_init,
														
 
															-	.update         = sha256_mb_async_update,
														
 
															-	.final          = sha256_mb_async_final,
														
 
															-	.finup          = sha256_mb_async_finup,
														
 
															-	.export         = sha256_mb_async_export,
														
 
															-	.import         = sha256_mb_async_import,
														
 
															-	.digest         = sha256_mb_async_digest,
														
 
															-	.halg = {
														
 
															-		.digestsize     = SHA256_DIGEST_SIZE,
														
 
															-		.statesize      = sizeof(struct sha256_hash_ctx),
														
 
															-		.base = {
														
 
															-			.cra_name               = "sha256",
														
 
															-			.cra_driver_name        = "sha256_mb",
														
 
															-			/*
														
 
															-			 * Low priority, since with few concurrent hash requests
														
 
															-			 * this is extremely slow due to the flush delay.  Users
														
 
															-			 * whose workloads would benefit from this can request
														
 
															-			 * it explicitly by driver name, or can increase its
														
 
															-			 * priority at runtime using NETLINK_CRYPTO.
														
 
															-			 */
														
 
															-			.cra_priority           = 50,
														
 
															-			.cra_flags              = CRYPTO_ALG_ASYNC,
														
 
															-			.cra_blocksize          = SHA256_BLOCK_SIZE,
														
 
															-			.cra_module             = THIS_MODULE,
														
 
															-			.cra_list               = LIST_HEAD_INIT
														
 
															-				(sha256_mb_async_alg.halg.base.cra_list),
														
 
															-			.cra_init               = sha256_mb_async_init_tfm,
														
 
															-			.cra_exit               = sha256_mb_async_exit_tfm,
														
 
															-			.cra_ctxsize		= sizeof(struct sha256_mb_ctx),
														
 
															-			.cra_alignmask		= 0,
														
 
															-		},
														
 
															-	},
														
 
															-};
														
 
															-
														
 
															-static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx;
														
 
															-	unsigned long cur_time;
														
 
															-	unsigned long next_flush = 0;
														
 
															-	struct sha256_hash_ctx *sha_ctx;
														
 
															-
														
 
															-
														
 
															-	cur_time = jiffies;
														
 
															-
														
 
															-	while (!list_empty(&cstate->work_list)) {
														
 
															-		rctx = list_entry(cstate->work_list.next,
														
 
															-				struct mcryptd_hash_request_ctx, waiter);
														
 
															-		if (time_before(cur_time, rctx->tag.expire))
														
 
															-			break;
														
 
															-		kernel_fpu_begin();
														
 
															-		sha_ctx = (struct sha256_hash_ctx *)
														
 
															-					sha256_ctx_mgr_flush(cstate->mgr);
														
 
															-		kernel_fpu_end();
														
 
															-		if (!sha_ctx) {
														
 
															-			pr_err("sha256_mb error: nothing got"
														
 
															-					" flushed for non-empty list\n");
														
 
															-			break;
														
 
															-		}
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		sha_finish_walk(&rctx, cstate, true);
														
 
															-		sha_complete_job(rctx, cstate, 0);
														
 
															-	}
														
 
															-
														
 
															-	if (!list_empty(&cstate->work_list)) {
														
 
															-		rctx = list_entry(cstate->work_list.next,
														
 
															-				struct mcryptd_hash_request_ctx, waiter);
														
 
															-		/* get the hash context and then flush time */
														
 
															-		next_flush = rctx->tag.expire;
														
 
															-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
														
 
															-	}
														
 
															-	return next_flush;
														
 
															-}
														
 
															-
														
 
															-static int __init sha256_mb_mod_init(void)
														
 
															-{
														
 
															-
														
 
															-	int cpu;
														
 
															-	int err;
														
 
															-	struct mcryptd_alg_cstate *cpu_state;
														
 
															-
														
 
															-	/* check for dependent cpu features */
														
 
															-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
														
 
															-	    !boot_cpu_has(X86_FEATURE_BMI2))
														
 
															-		return -ENODEV;
														
 
															-
														
 
															-	/* initialize multibuffer structures */
														
 
															-	sha256_mb_alg_state.alg_cstate = alloc_percpu
														
 
															-						(struct mcryptd_alg_cstate);
														
 
															-
														
 
															-	sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
														
 
															-	sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
														
 
															-	sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
														
 
															-	sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
														
 
															-
														
 
															-	if (!sha256_mb_alg_state.alg_cstate)
														
 
															-		return -ENOMEM;
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
														
 
															-		cpu_state->next_flush = 0;
														
 
															-		cpu_state->next_seq_num = 0;
														
 
															-		cpu_state->flusher_engaged = false;
														
 
															-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
														
 
															-		cpu_state->cpu = cpu;
														
 
															-		cpu_state->alg_state = &sha256_mb_alg_state;
														
 
															-		cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
														
 
															-					GFP_KERNEL);
														
 
															-		if (!cpu_state->mgr)
														
 
															-			goto err2;
														
 
															-		sha256_ctx_mgr_init(cpu_state->mgr);
														
 
															-		INIT_LIST_HEAD(&cpu_state->work_list);
														
 
															-		spin_lock_init(&cpu_state->work_lock);
														
 
															-	}
														
 
															-	sha256_mb_alg_state.flusher = &sha256_mb_flusher;
														
 
															-
														
 
															-	err = crypto_register_ahash(&sha256_mb_areq_alg);
														
 
															-	if (err)
														
 
															-		goto err2;
														
 
															-	err = crypto_register_ahash(&sha256_mb_async_alg);
														
 
															-	if (err)
														
 
															-		goto err1;
														
 
															-
														
 
															-
														
 
															-	return 0;
														
 
															-err1:
														
 
															-	crypto_unregister_ahash(&sha256_mb_areq_alg);
														
 
															-err2:
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
														
 
															-		kfree(cpu_state->mgr);
														
 
															-	}
														
 
															-	free_percpu(sha256_mb_alg_state.alg_cstate);
														
 
															-	return -ENODEV;
														
 
															-}
														
 
															-
														
 
															-static void __exit sha256_mb_mod_fini(void)
														
 
															-{
														
 
															-	int cpu;
														
 
															-	struct mcryptd_alg_cstate *cpu_state;
														
 
															-
														
 
															-	crypto_unregister_ahash(&sha256_mb_async_alg);
														
 
															-	crypto_unregister_ahash(&sha256_mb_areq_alg);
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
														
 
															-		kfree(cpu_state->mgr);
														
 
															-	}
														
 
															-	free_percpu(sha256_mb_alg_state.alg_cstate);
														
 
															-}
														
 
															-
														
 
															-module_init(sha256_mb_mod_init);
														
 
															-module_exit(sha256_mb_mod_fini);
														
 
															-
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
														
 
															-
														
 
															-MODULE_ALIAS_CRYPTO("sha256");
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -1,134 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA256 context
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#ifndef _SHA_MB_CTX_INTERNAL_H
														
 
															-#define _SHA_MB_CTX_INTERNAL_H
														
 
															-
														
 
															-#include "sha256_mb_mgr.h"
														
 
															-
														
 
															-#define HASH_UPDATE          0x00
														
 
															-#define HASH_LAST            0x01
														
 
															-#define HASH_DONE	     0x02
														
 
															-#define HASH_FINAL	     0x04
														
 
															-
														
 
															-#define HASH_CTX_STS_IDLE       0x00
														
 
															-#define HASH_CTX_STS_PROCESSING 0x01
														
 
															-#define HASH_CTX_STS_LAST       0x02
														
 
															-#define HASH_CTX_STS_COMPLETE   0x04
														
 
															-
														
 
															-enum hash_ctx_error {
														
 
															-	HASH_CTX_ERROR_NONE               =  0,
														
 
															-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
														
 
															-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
														
 
															-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
														
 
															-
														
 
															-#ifdef HASH_CTX_DEBUG
														
 
															-	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
														
 
															-#endif
														
 
															-};
														
 
															-
														
 
															-
														
 
															-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
														
 
															-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
														
 
															-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
														
 
															-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
														
 
															-#define hash_ctx_status(ctx)     ((ctx)->status)
														
 
															-#define hash_ctx_error(ctx)      ((ctx)->error)
														
 
															-#define hash_ctx_init(ctx) \
														
 
															-	do { \
														
 
															-		(ctx)->error = HASH_CTX_ERROR_NONE; \
														
 
															-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
														
 
															-	} while (0)
														
 
															-
														
 
															-
														
 
															-/* Hash Constants and Typedefs */
														
 
															-#define SHA256_DIGEST_LENGTH        8
														
 
															-#define SHA256_LOG2_BLOCK_SIZE        6
														
 
															-
														
 
															-#define SHA256_PADLENGTHFIELD_SIZE    8
														
 
															-
														
 
															-#ifdef SHA_MB_DEBUG
														
 
															-#define assert(expr) \
														
 
															-do { \
														
 
															-	if (unlikely(!(expr))) { \
														
 
															-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
														
 
															-		#expr, __FILE__, __func__, __LINE__); \
														
 
															-	} \
														
 
															-} while (0)
														
 
															-#else
														
 
															-#define assert(expr) do {} while (0)
														
 
															-#endif
														
 
															-
														
 
															-struct sha256_ctx_mgr {
														
 
															-	struct sha256_mb_mgr mgr;
														
 
															-};
														
 
															-
														
 
															-/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
														
 
															-
														
 
															-struct sha256_hash_ctx {
														
 
															-	/* Must be at struct offset 0 */
														
 
															-	struct job_sha256       job;
														
 
															-	/* status flag */
														
 
															-	int status;
														
 
															-	/* error flag */
														
 
															-	int error;
														
 
															-
														
 
															-	uint64_t	total_length;
														
 
															-	const void	*incoming_buffer;
														
 
															-	uint32_t	incoming_buffer_length;
														
 
															-	uint8_t		partial_block_buffer[SHA256_BLOCK_SIZE * 2];
														
 
															-	uint32_t	partial_block_buffer_length;
														
 
															-	void		*user_data;
														
 
															-};
														
 
															-
														
 
															-#endif
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
@@ -1,108 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA256 algorithm manager
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-#ifndef __SHA_MB_MGR_H
														
 
															-#define __SHA_MB_MGR_H
														
 
															-
														
 
															-#include <linux/types.h>
														
 
															-
														
 
															-#define NUM_SHA256_DIGEST_WORDS 8
														
 
															-
														
 
															-enum job_sts {	STS_UNKNOWN = 0,
														
 
															-		STS_BEING_PROCESSED = 1,
														
 
															-		STS_COMPLETED = 2,
														
 
															-		STS_INTERNAL_ERROR = 3,
														
 
															-		STS_ERROR = 4
														
 
															-};
														
 
															-
														
 
															-struct job_sha256 {
														
 
															-	u8	*buffer;
														
 
															-	u32	len;
														
 
															-	u32	result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
														
 
															-	enum	job_sts status;
														
 
															-	void	*user_data;
														
 
															-};
														
 
															-
														
 
															-/* SHA256 out-of-order scheduler */
														
 
															-
														
 
															-/* typedef uint32_t sha8_digest_array[8][8]; */
														
 
															-
														
 
															-struct sha256_args_x8 {
														
 
															-	uint32_t	digest[8][8];
														
 
															-	uint8_t		*data_ptr[8];
														
 
															-};
														
 
															-
														
 
															-struct sha256_lane_data {
														
 
															-	struct job_sha256 *job_in_lane;
														
 
															-};
														
 
															-
														
 
															-struct sha256_mb_mgr {
														
 
															-	struct sha256_args_x8 args;
														
 
															-
														
 
															-	uint32_t lens[8];
														
 
															-
														
 
															-	/* each byte is index (0...7) of unused lanes */
														
 
															-	uint64_t unused_lanes;
														
 
															-	/* byte 4 is set to FF as a flag */
														
 
															-	struct sha256_lane_data ldata[8];
														
 
															-};
														
 
															-
														
 
															-
														
 
															-#define SHA256_MB_MGR_NUM_LANES_AVX2 8
														
 
															-
														
 
															-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
														
 
															-struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
														
 
															-					 struct job_sha256 *job);
														
 
															-struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
														
 
															-struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
														
 
															-
														
 
															-#endif
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
@@ -1,304 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA256 algorithm data structure
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of version 2 of the GNU General Public License as
														
 
															- * published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License for more details.
														
 
															- *
														
 
															- * Contact Information:
														
 
															- *     Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- * BSD LICENSE
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * Redistribution and use in source and binary forms, with or without
														
 
															- * modification, are permitted provided that the following conditions
														
 
															- * are met:
														
 
															- *
														
 
															- *   * Redistributions of source code must retain the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer.
														
 
															- *   * Redistributions in binary form must reproduce the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer in
														
 
															- *     the documentation and/or other materials provided with the
														
 
															- *     distribution.
														
 
															- *   * Neither the name of Intel Corporation nor the names of its
														
 
															- *     contributors may be used to endorse or promote products derived
														
 
															- *     from this software without specific prior written permission.
														
 
															- *
														
 
															- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-# Macros for defining data structures
														
 
															-
														
 
															-# Usage example
														
 
															-
														
 
															-#START_FIELDS	# JOB_AES
														
 
															-###	name		size	align
														
 
															-#FIELD	_plaintext,	8,	8	# pointer to plaintext
														
 
															-#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
														
 
															-#FIELD	_IV,		16,	8	# IV
														
 
															-#FIELD	_keys,		8,	8	# pointer to keys
														
 
															-#FIELD	_len,		4,	4	# length in bytes
														
 
															-#FIELD	_status,	4,	4	# status enumeration
														
 
															-#FIELD	_user_data,	8,	8	# pointer to user data
														
 
															-#UNION  _union,         size1,  align1, \
														
 
															-#	                size2,  align2, \
														
 
															-#	                size3,  align3, \
														
 
															-#	                ...
														
 
															-#END_FIELDS
														
 
															-#%assign _JOB_AES_size	_FIELD_OFFSET
														
 
															-#%assign _JOB_AES_align	_STRUCT_ALIGN
														
 
															-
														
 
															-#########################################################################
														
 
															-
														
 
															-# Alternate "struc-like" syntax:
														
 
															-#	STRUCT job_aes2
														
 
															-#	RES_Q	.plaintext,	1
														
 
															-#	RES_Q	.ciphertext, 	1
														
 
															-#	RES_DQ	.IV,		1
														
 
															-#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
														
 
															-#	RES_U	.union,		size1, align1, \
														
 
															-#				size2, align2, \
														
 
															-#				...
														
 
															-#	ENDSTRUCT
														
 
															-#	# Following only needed if nesting
														
 
															-#	%assign job_aes2_size	_FIELD_OFFSET
														
 
															-#	%assign job_aes2_align	_STRUCT_ALIGN
														
 
															-#
														
 
															-# RES_* macros take a name, a count and an optional alignment.
														
 
															-# The count in in terms of the base size of the macro, and the
														
 
															-# default alignment is the base size.
														
 
															-# The macros are:
														
 
															-# Macro    Base size
														
 
															-# RES_B	    1
														
 
															-# RES_W	    2
														
 
															-# RES_D     4
														
 
															-# RES_Q     8
														
 
															-# RES_DQ   16
														
 
															-# RES_Y    32
														
 
															-# RES_Z    64
														
 
															-#
														
 
															-# RES_U defines a union. It's arguments are a name and two or more
														
 
															-# pairs of "size, alignment"
														
 
															-#
														
 
															-# The two assigns are only needed if this structure is being nested
														
 
															-# within another. Even if the assigns are not done, one can still use
														
 
															-# STRUCT_NAME_size as the size of the structure.
														
 
															-#
														
 
															-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
														
 
															-#
														
 
															-# The differences between this and using "struc" directly are that each
														
 
															-# type is implicitly aligned to its natural length (although this can be
														
 
															-# over-ridden with an explicit third parameter), and that the structure
														
 
															-# is padded at the end to its overall alignment.
														
 
															-#
														
 
															-
														
 
															-#########################################################################
														
 
															-
														
 
															-#ifndef _DATASTRUCT_ASM_
														
 
															-#define _DATASTRUCT_ASM_
														
 
															-
														
 
															-#define SZ8			8*SHA256_DIGEST_WORD_SIZE
														
 
															-#define ROUNDS			64*SZ8
														
 
															-#define PTR_SZ                  8
														
 
															-#define SHA256_DIGEST_WORD_SIZE 4
														
 
															-#define MAX_SHA256_LANES        8
														
 
															-#define SHA256_DIGEST_WORDS 8
														
 
															-#define SHA256_DIGEST_ROW_SIZE  (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
														
 
															-#define SHA256_DIGEST_SIZE      (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
														
 
															-#define SHA256_BLK_SZ           64
														
 
															-
														
 
															-# START_FIELDS
														
 
															-.macro START_FIELDS
														
 
															- _FIELD_OFFSET = 0
														
 
															- _STRUCT_ALIGN = 0
														
 
															-.endm
														
 
															-
														
 
															-# FIELD name size align
														
 
															-.macro FIELD name size align
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
														
 
															- \name	= _FIELD_OFFSET
														
 
															- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
														
 
															-.if (\align > _STRUCT_ALIGN)
														
 
															- _STRUCT_ALIGN = \align
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-# END_FIELDS
														
 
															-.macro END_FIELDS
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
														
 
															-.endm
														
 
															-
														
 
															-########################################################################
														
 
															-
														
 
															-.macro STRUCT p1
														
 
															-START_FIELDS
														
 
															-.struc \p1
														
 
															-.endm
														
 
															-
														
 
															-.macro ENDSTRUCT
														
 
															- tmp = _FIELD_OFFSET
														
 
															- END_FIELDS
														
 
															- tmp = (_FIELD_OFFSET - %%tmp)
														
 
															-.if (tmp > 0)
														
 
															-	.lcomm	tmp
														
 
															-.endif
														
 
															-.endstruc
														
 
															-.endm
														
 
															-
														
 
															-## RES_int name size align
														
 
															-.macro RES_int p1 p2 p3
														
 
															- name = \p1
														
 
															- size = \p2
														
 
															- align = .\p3
														
 
															-
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
														
 
															-.align align
														
 
															-.lcomm name size
														
 
															- _FIELD_OFFSET = _FIELD_OFFSET + (size)
														
 
															-.if (align > _STRUCT_ALIGN)
														
 
															- _STRUCT_ALIGN = align
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_B name, size [, align]
														
 
															-.macro RES_B _name, _size, _align=1
														
 
															-RES_int _name _size _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_W name, size [, align]
														
 
															-.macro RES_W _name, _size, _align=2
														
 
															-RES_int _name 2*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_D name, size [, align]
														
 
															-.macro RES_D _name, _size, _align=4
														
 
															-RES_int _name 4*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Q name, size [, align]
														
 
															-.macro RES_Q _name, _size, _align=8
														
 
															-RES_int _name 8*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_DQ name, size [, align]
														
 
															-.macro RES_DQ _name, _size, _align=16
														
 
															-RES_int _name 16*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Y name, size [, align]
														
 
															-.macro RES_Y _name, _size, _align=32
														
 
															-RES_int _name 32*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Z name, size [, align]
														
 
															-.macro RES_Z _name, _size, _align=64
														
 
															-RES_int _name 64*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-#endif
														
 
															-
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define SHA256 Out Of Order Data Structures
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # LANE_DATA
														
 
															-###     name            size    align
														
 
															-FIELD   _job_in_lane,   8,      8       # pointer to job object
														
 
															-END_FIELDS
														
 
															-
														
 
															- _LANE_DATA_size = _FIELD_OFFSET
														
 
															- _LANE_DATA_align = _STRUCT_ALIGN
														
 
															-
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # SHA256_ARGS_X4
														
 
															-###     name            size    align
														
 
															-FIELD   _digest,        4*8*8,  4       # transposed digest
														
 
															-FIELD   _data_ptr,      8*8,    8       # array of pointers to data
														
 
															-END_FIELDS
														
 
															-
														
 
															- _SHA256_ARGS_X4_size  =  _FIELD_OFFSET
														
 
															- _SHA256_ARGS_X4_align = _STRUCT_ALIGN
														
 
															- _SHA256_ARGS_X8_size  =	_FIELD_OFFSET
														
 
															- _SHA256_ARGS_X8_align =	_STRUCT_ALIGN
														
 
															-
														
 
															-#######################################################################
														
 
															-
														
 
															-START_FIELDS    # MB_MGR
														
 
															-###     name            size    align
														
 
															-FIELD   _args,          _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
														
 
															-FIELD   _lens,          4*8,    8
														
 
															-FIELD   _unused_lanes,  8,      8
														
 
															-FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
														
 
															-END_FIELDS
														
 
															-
														
 
															- _MB_MGR_size  =  _FIELD_OFFSET
														
 
															- _MB_MGR_align =  _STRUCT_ALIGN
														
 
															-
														
 
															-_args_digest   =     _args + _digest
														
 
															-_args_data_ptr =     _args + _data_ptr
														
 
															-
														
 
															-#######################################################################
														
 
															-
														
 
															-START_FIELDS    #STACK_FRAME
														
 
															-###     name            size    align
														
 
															-FIELD   _data,		16*SZ8,   1       # transposed digest
														
 
															-FIELD   _digest,         8*SZ8,   1       # array of pointers to data
														
 
															-FIELD   _ytmp,           4*SZ8,   1
														
 
															-FIELD   _rsp,            8,       1
														
 
															-END_FIELDS
														
 
															-
														
 
															- _STACK_FRAME_size  =  _FIELD_OFFSET
														
 
															- _STACK_FRAME_align =  _STRUCT_ALIGN
														
 
															-
														
 
															-#######################################################################
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define constants
														
 
															-########################################################################
														
 
															-
														
 
															-#define STS_UNKNOWN             0
														
 
															-#define STS_BEING_PROCESSED     1
														
 
															-#define STS_COMPLETED           2
														
 
															-
														
 
															-########################################################################
														
 
															-#### Define JOB_SHA256 structure
														
 
															-########################################################################
														
 
															-
														
 
															-START_FIELDS    # JOB_SHA256
														
 
															-
														
 
															-###     name                            size    align
														
 
															-FIELD   _buffer,                        8,      8       # pointer to buffer
														
 
															-FIELD   _len,                           8,      8       # length in bytes
														
 
															-FIELD   _result_digest,                 8*4,    32      # Digest (output)
														
 
															-FIELD   _status,                        4,      4
														
 
															-FIELD   _user_data,                     8,      8
														
 
															-END_FIELDS
														
 
															-
														
 
															- _JOB_SHA256_size = _FIELD_OFFSET
														
 
															- _JOB_SHA256_align = _STRUCT_ALIGN
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -1,307 +0,0 @@
 
															-/*
														
 
															- * Flush routine for SHA256 multibuffer
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/frame.h>
														
 
															-#include "sha256_mb_mgr_datastruct.S"
														
 
															-
														
 
															-.extern sha256_x8_avx2
														
 
															-
														
 
															-#LINUX register definitions
														
 
															-#define arg1	%rdi
														
 
															-#define arg2	%rsi
														
 
															-
														
 
															-# Common register definitions
														
 
															-#define state	arg1
														
 
															-#define job	arg2
														
 
															-#define len2	arg2
														
 
															-
														
 
															-# idx must be a register not clobberred by sha1_mult
														
 
															-#define idx		%r8
														
 
															-#define DWORD_idx	%r8d
														
 
															-
														
 
															-#define unused_lanes	%rbx
														
 
															-#define lane_data	%rbx
														
 
															-#define tmp2		%rbx
														
 
															-#define tmp2_w		%ebx
														
 
															-
														
 
															-#define job_rax		%rax
														
 
															-#define tmp1		%rax
														
 
															-#define size_offset	%rax
														
 
															-#define tmp		%rax
														
 
															-#define start_offset	%rax
														
 
															-
														
 
															-#define tmp3		%arg1
														
 
															-
														
 
															-#define extra_blocks	%arg2
														
 
															-#define p		%arg2
														
 
															-
														
 
															-.macro LABEL prefix n
														
 
															-\prefix\n\():
														
 
															-.endm
														
 
															-
														
 
															-.macro JNE_SKIP i
														
 
															-jne     skip_\i
														
 
															-.endm
														
 
															-
														
 
															-.altmacro
														
 
															-.macro SET_OFFSET _offset
														
 
															-offset = \_offset
														
 
															-.endm
														
 
															-.noaltmacro
														
 
															-
														
 
															-# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
														
 
															-# arg 1 : rcx : state
														
 
															-ENTRY(sha256_mb_mgr_flush_avx2)
														
 
															-	FRAME_BEGIN
														
 
															-        push    %rbx
														
 
															-
														
 
															-	# If bit (32+3) is set, then all lanes are empty
														
 
															-	mov	_unused_lanes(state), unused_lanes
														
 
															-	bt	$32+3, unused_lanes
														
 
															-	jc	return_null
														
 
															-
														
 
															-	# find a lane with a non-null job
														
 
															-	xor	idx, idx
														
 
															-	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	one(%rip), idx
														
 
															-	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	two(%rip), idx
														
 
															-	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	three(%rip), idx
														
 
															-	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	four(%rip), idx
														
 
															-	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	five(%rip), idx
														
 
															-	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	six(%rip), idx
														
 
															-	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-	cmovne	seven(%rip), idx
														
 
															-
														
 
															-	# copy idx to empty lanes
														
 
															-copy_lane_data:
														
 
															-	offset =  (_args + _data_ptr)
														
 
															-	mov	offset(state,idx,8), tmp
														
 
															-
														
 
															-	I = 0
														
 
															-.rep 8
														
 
															-	offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
														
 
															-	cmpq	$0, offset(state)
														
 
															-.altmacro
														
 
															-	JNE_SKIP %I
														
 
															-	offset =  (_args + _data_ptr + 8*I)
														
 
															-	mov	tmp, offset(state)
														
 
															-	offset =  (_lens + 4*I)
														
 
															-	movl	$0xFFFFFFFF, offset(state)
														
 
															-LABEL skip_ %I
														
 
															-	I = (I+1)
														
 
															-.noaltmacro
														
 
															-.endr
														
 
															-
														
 
															-	# Find min length
														
 
															-	vmovdqu _lens+0*16(state), %xmm0
														
 
															-	vmovdqu _lens+1*16(state), %xmm1
														
 
															-
														
 
															-	vpminud %xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
														
 
															-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
														
 
															-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
														
 
															-	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
														
 
															-
														
 
															-	vmovd	%xmm2, DWORD_idx
														
 
															-	mov	idx, len2
														
 
															-	and	$0xF, idx
														
 
															-	shr	$4, len2
														
 
															-	jz	len_is_0
														
 
															-
														
 
															-	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
														
 
															-	vpshufd	$0, %xmm2, %xmm2
														
 
															-
														
 
															-	vpsubd	%xmm2, %xmm0, %xmm0
														
 
															-	vpsubd	%xmm2, %xmm1, %xmm1
														
 
															-
														
 
															-	vmovdqu	%xmm0, _lens+0*16(state)
														
 
															-	vmovdqu	%xmm1, _lens+1*16(state)
														
 
															-
														
 
															-	# "state" and "args" are the same address, arg1
														
 
															-	# len is arg2
														
 
															-	call	sha256_x8_avx2
														
 
															-	# state and idx are intact
														
 
															-
														
 
															-len_is_0:
														
 
															-	# process completed job "idx"
														
 
															-	imul	$_LANE_DATA_size, idx, lane_data
														
 
															-	lea	_ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov	_job_in_lane(lane_data), job_rax
														
 
															-	movq	$0, _job_in_lane(lane_data)
														
 
															-	movl	$STS_COMPLETED, _status(job_rax)
														
 
															-	mov	_unused_lanes(state), unused_lanes
														
 
															-	shl	$4, unused_lanes
														
 
															-	or	idx, unused_lanes
														
 
															-
														
 
															-	mov	unused_lanes, _unused_lanes(state)
														
 
															-	movl	$0xFFFFFFFF, _lens(state,idx,4)
														
 
															-
														
 
															-	vmovd	_args_digest(state , idx, 4) , %xmm0
														
 
															-	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
														
 
															-	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
														
 
															-	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
														
 
															-	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
														
 
															-
														
 
															-	vmovdqu	%xmm0, _result_digest(job_rax)
														
 
															-	offset =  (_result_digest + 1*16)
														
 
															-	vmovdqu	%xmm1, offset(job_rax)
														
 
															-
														
 
															-return:
														
 
															-	pop     %rbx
														
 
															-	FRAME_END
														
 
															-	ret
														
 
															-
														
 
															-return_null:
														
 
															-	xor	job_rax, job_rax
														
 
															-	jmp	return
														
 
															-ENDPROC(sha256_mb_mgr_flush_avx2)
														
 
															-
														
 
															-##############################################################################
														
 
															-
														
 
															-.align 16
														
 
															-ENTRY(sha256_mb_mgr_get_comp_job_avx2)
														
 
															-	push	%rbx
														
 
															-
														
 
															-	## if bit 32+3 is set, then all lanes are empty
														
 
															-	mov	_unused_lanes(state), unused_lanes
														
 
															-	bt	$(32+3), unused_lanes
														
 
															-	jc	.return_null
														
 
															-
														
 
															-	# Find min length
														
 
															-	vmovdqu	_lens(state), %xmm0
														
 
															-	vmovdqu	_lens+1*16(state), %xmm1
														
 
															-
														
 
															-	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
														
 
															-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
														
 
															-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
														
 
															-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
														
 
															-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
														
 
															-
														
 
															-	vmovd	%xmm2, DWORD_idx
														
 
															-	test	$~0xF, idx
														
 
															-	jnz	.return_null
														
 
															-
														
 
															-	# process completed job "idx"
														
 
															-	imul	$_LANE_DATA_size, idx, lane_data
														
 
															-	lea	_ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov	_job_in_lane(lane_data), job_rax
														
 
															-	movq	$0,  _job_in_lane(lane_data)
														
 
															-	movl	$STS_COMPLETED, _status(job_rax)
														
 
															-	mov	_unused_lanes(state), unused_lanes
														
 
															-	shl	$4, unused_lanes
														
 
															-	or	idx, unused_lanes
														
 
															-	mov	unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl	$0xFFFFFFFF, _lens(state,  idx, 4)
														
 
															-
														
 
															-	vmovd	_args_digest(state, idx, 4), %xmm0
														
 
															-	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
														
 
															-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
														
 
															-	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
														
 
															-	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
														
 
															-	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
														
 
															-
														
 
															-        vmovdqu %xmm0, _result_digest(job_rax)
														
 
															-        offset =  (_result_digest + 1*16)
														
 
															-        vmovdqu %xmm1, offset(job_rax)
														
 
															-
														
 
															-	pop	%rbx
														
 
															-
														
 
															-	ret
														
 
															-
														
 
															-.return_null:
														
 
															-	xor	job_rax, job_rax
														
 
															-	pop	%rbx
														
 
															-	ret
														
 
															-ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
														
 
															-
														
 
															-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
														
 
															-.align 16
														
 
															-clear_low_nibble:
														
 
															-.octa	0x000000000000000000000000FFFFFFF0
														
 
															-
														
 
															-.section	.rodata.cst8, "aM", @progbits, 8
														
 
															-.align 8
														
 
															-one:
														
 
															-.quad	1
														
 
															-two:
														
 
															-.quad	2
														
 
															-three:
														
 
															-.quad	3
														
 
															-four:
														
 
															-.quad	4
														
 
															-five:
														
 
															-.quad	5
														
 
															-six:
														
 
															-.quad	6
														
 
															-seven:
														
 
															-.quad  7
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
@@ -1,65 +0,0 @@
 
															-/*
														
 
															- * Initialization code for multi buffer SHA256 algorithm for AVX2
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include "sha256_mb_mgr.h"
														
 
															-
														
 
															-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
														
 
															-{
														
 
															-	unsigned int j;
														
 
															-
														
 
															-	state->unused_lanes = 0xF76543210ULL;
														
 
															-	for (j = 0; j < 8; j++) {
														
 
															-		state->lens[j] = 0xFFFFFFFF;
														
 
															-		state->ldata[j].job_in_lane = NULL;
														
 
															-	}
														
 
															-}
														
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
@@ -1,214 +0,0 @@
 
															-/*
														
 
															- * Buffer submit code for multi buffer SHA256 algorithm
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/frame.h>
														
 
															-#include "sha256_mb_mgr_datastruct.S"
														
 
															-
														
 
															-.extern sha256_x8_avx2
														
 
															-
														
 
															-# LINUX register definitions
														
 
															-arg1		= %rdi
														
 
															-arg2		= %rsi
														
 
															-size_offset	= %rcx
														
 
															-tmp2		= %rcx
														
 
															-extra_blocks	= %rdx
														
 
															-
														
 
															-# Common definitions
														
 
															-#define state	arg1
														
 
															-#define job	%rsi
														
 
															-#define len2	arg2
														
 
															-#define p2	arg2
														
 
															-
														
 
															-# idx must be a register not clobberred by sha1_x8_avx2
														
 
															-idx		= %r8
														
 
															-DWORD_idx	= %r8d
														
 
															-last_len	= %r8
														
 
															-
														
 
															-p		= %r11
														
 
															-start_offset	= %r11
														
 
															-
														
 
															-unused_lanes	= %rbx
														
 
															-BYTE_unused_lanes = %bl
														
 
															-
														
 
															-job_rax		= %rax
														
 
															-len		= %rax
														
 
															-DWORD_len	= %eax
														
 
															-
														
 
															-lane		= %r12
														
 
															-tmp3		= %r12
														
 
															-
														
 
															-tmp		= %r9
														
 
															-DWORD_tmp	= %r9d
														
 
															-
														
 
															-lane_data	= %r10
														
 
															-
														
 
															-# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
														
 
															-# arg 1 : rcx : state
														
 
															-# arg 2 : rdx : job
														
 
															-ENTRY(sha256_mb_mgr_submit_avx2)
														
 
															-	FRAME_BEGIN
														
 
															-	push	%rbx
														
 
															-	push	%r12
														
 
															-
														
 
															-	mov	_unused_lanes(state), unused_lanes
														
 
															-	mov	unused_lanes, lane
														
 
															-	and	$0xF, lane
														
 
															-	shr	$4, unused_lanes
														
 
															-	imul	$_LANE_DATA_size, lane, lane_data
														
 
															-	movl	$STS_BEING_PROCESSED, _status(job)
														
 
															-	lea	_ldata(state, lane_data), lane_data
														
 
															-	mov	unused_lanes, _unused_lanes(state)
														
 
															-	movl	_len(job),  DWORD_len
														
 
															-
														
 
															-	mov	job, _job_in_lane(lane_data)
														
 
															-	shl	$4, len
														
 
															-	or	lane, len
														
 
															-
														
 
															-	movl	DWORD_len,  _lens(state , lane, 4)
														
 
															-
														
 
															-	# Load digest words from result_digest
														
 
															-	vmovdqu	_result_digest(job), %xmm0
														
 
															-	vmovdqu	_result_digest+1*16(job), %xmm1
														
 
															-	vmovd	%xmm0, _args_digest(state, lane, 4)
														
 
															-	vpextrd	$1, %xmm0, _args_digest+1*32(state , lane, 4)
														
 
															-	vpextrd	$2, %xmm0, _args_digest+2*32(state , lane, 4)
														
 
															-	vpextrd	$3, %xmm0, _args_digest+3*32(state , lane, 4)
														
 
															-	vmovd	%xmm1, _args_digest+4*32(state , lane, 4)
														
 
															-
														
 
															-	vpextrd	$1, %xmm1, _args_digest+5*32(state , lane, 4)
														
 
															-	vpextrd	$2, %xmm1, _args_digest+6*32(state , lane, 4)
														
 
															-	vpextrd	$3, %xmm1, _args_digest+7*32(state , lane, 4)
														
 
															-
														
 
															-	mov	_buffer(job), p
														
 
															-	mov	p, _args_data_ptr(state, lane, 8)
														
 
															-
														
 
															-	cmp	$0xF, unused_lanes
														
 
															-	jne	return_null
														
 
															-
														
 
															-start_loop:
														
 
															-	# Find min length
														
 
															-	vmovdqa	_lens(state), %xmm0
														
 
															-	vmovdqa	_lens+1*16(state), %xmm1
														
 
															-
														
 
															-	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
														
 
															-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
														
 
															-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
														
 
															-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
														
 
															-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
														
 
															-
														
 
															-	vmovd	%xmm2, DWORD_idx
														
 
															-	mov	idx, len2
														
 
															-	and	$0xF, idx
														
 
															-	shr	$4, len2
														
 
															-	jz	len_is_0
														
 
															-
														
 
															-	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
														
 
															-	vpshufd	$0, %xmm2, %xmm2
														
 
															-
														
 
															-	vpsubd	%xmm2, %xmm0, %xmm0
														
 
															-	vpsubd	%xmm2, %xmm1, %xmm1
														
 
															-
														
 
															-	vmovdqa	%xmm0, _lens + 0*16(state)
														
 
															-	vmovdqa	%xmm1, _lens + 1*16(state)
														
 
															-
														
 
															-	# "state" and "args" are the same address, arg1
														
 
															-	# len is arg2
														
 
															-	call	sha256_x8_avx2
														
 
															-
														
 
															-	# state and idx are intact
														
 
															-
														
 
															-len_is_0:
														
 
															-	# process completed job "idx"
														
 
															-	imul	$_LANE_DATA_size, idx, lane_data
														
 
															-	lea	_ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov	_job_in_lane(lane_data), job_rax
														
 
															-	mov	_unused_lanes(state), unused_lanes
														
 
															-	movq	$0, _job_in_lane(lane_data)
														
 
															-	movl	$STS_COMPLETED, _status(job_rax)
														
 
															-	shl	$4, unused_lanes
														
 
															-	or	idx, unused_lanes
														
 
															-	mov	unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl	$0xFFFFFFFF, _lens(state,idx,4)
														
 
															-
														
 
															-	vmovd	_args_digest(state, idx, 4), %xmm0
														
 
															-	vpinsrd	$1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd	$2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
														
 
															-	vpinsrd	$3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
														
 
															-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
														
 
															-
														
 
															-	vpinsrd	$1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
														
 
															-	vpinsrd	$2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
														
 
															-	vpinsrd	$3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
														
 
															-
														
 
															-	vmovdqu	%xmm0, _result_digest(job_rax)
														
 
															-	vmovdqu	%xmm1, _result_digest+1*16(job_rax)
														
 
															-
														
 
															-return:
														
 
															-	pop     %r12
														
 
															-        pop     %rbx
														
 
															-        FRAME_END
														
 
															-	ret
														
 
															-
														
 
															-return_null:
														
 
															-	xor	job_rax, job_rax
														
 
															-	jmp	return
														
 
															-
														
 
															-ENDPROC(sha256_mb_mgr_submit_avx2)
														
 
															-
														
 
															-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
														
 
															-.align 16
														
 
															-clear_low_nibble:
														
 
															-	.octa	0x000000000000000000000000FFFFFFF0
														
--- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
@@ -1,598 +0,0 @@
 
															-/*
														
 
															- * Multi-buffer SHA256 algorithm hash compute routine
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *	Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include "sha256_mb_mgr_datastruct.S"
														
 
															-
														
 
															-## code to compute oct SHA256 using SSE-256
														
 
															-## outer calling routine takes care of save and restore of XMM registers
														
 
															-## Logic designed/laid out by JDG
														
 
															-
														
 
															-## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15; %ymm0-15
														
 
															-## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
														
 
															-## Linux preserves:                       rdi rbp r8
														
 
															-##
														
 
															-## clobbers %ymm0-15
														
 
															-
														
 
															-arg1 = %rdi
														
 
															-arg2 = %rsi
														
 
															-reg3 = %rcx
														
 
															-reg4 = %rdx
														
 
															-
														
 
															-# Common definitions
														
 
															-STATE = arg1
														
 
															-INP_SIZE = arg2
														
 
															-
														
 
															-IDX = %rax
														
 
															-ROUND = %rbx
														
 
															-TBL = reg3
														
 
															-
														
 
															-inp0 = %r9
														
 
															-inp1 = %r10
														
 
															-inp2 = %r11
														
 
															-inp3 = %r12
														
 
															-inp4 = %r13
														
 
															-inp5 = %r14
														
 
															-inp6 = %r15
														
 
															-inp7 = reg4
														
 
															-
														
 
															-a = %ymm0
														
 
															-b = %ymm1
														
 
															-c = %ymm2
														
 
															-d = %ymm3
														
 
															-e = %ymm4
														
 
															-f = %ymm5
														
 
															-g = %ymm6
														
 
															-h = %ymm7
														
 
															-
														
 
															-T1 = %ymm8
														
 
															-
														
 
															-a0 = %ymm12
														
 
															-a1 = %ymm13
														
 
															-a2 = %ymm14
														
 
															-TMP = %ymm15
														
 
															-TMP0 = %ymm6
														
 
															-TMP1 = %ymm7
														
 
															-
														
 
															-TT0 = %ymm8
														
 
															-TT1 = %ymm9
														
 
															-TT2 = %ymm10
														
 
															-TT3 = %ymm11
														
 
															-TT4 = %ymm12
														
 
															-TT5 = %ymm13
														
 
															-TT6 = %ymm14
														
 
															-TT7 = %ymm15
														
 
															-
														
 
															-# Define stack usage
														
 
															-
														
 
															-# Assume stack aligned to 32 bytes before call
														
 
															-# Therefore FRAMESZ mod 32 must be 32-8 = 24
														
 
															-
														
 
															-#define FRAMESZ	0x388
														
 
															-
														
 
															-#define VMOVPS	vmovups
														
 
															-
														
 
															-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
														
 
															-# "transpose" data in {r0...r7} using temps {t0...t1}
														
 
															-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
														
 
															-# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
														
 
															-# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
														
 
															-# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
														
 
															-# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
														
 
															-# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
														
 
															-# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
														
 
															-# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
														
 
															-# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
														
 
															-#
														
 
															-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
														
 
															-# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
														
 
															-# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
														
 
															-# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
														
 
															-# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
														
 
															-# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
														
 
															-# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
														
 
															-# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
														
 
															-# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
														
 
															-#
														
 
															-
														
 
															-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
														
 
															-	# process top half (r0..r3) {a...d}
														
 
															-	vshufps	$0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
														
 
															-	vshufps	$0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
														
 
															-	vshufps	$0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
														
 
															-	vshufps	$0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
														
 
															-	vshufps	$0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
														
 
															-	vshufps	$0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
														
 
															-	vshufps	$0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
														
 
															-	vshufps	$0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
														
 
															-
														
 
															-	# use r2 in place of t0
														
 
															-	# process bottom half (r4..r7) {e...h}
														
 
															-	vshufps	$0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
														
 
															-	vshufps	$0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
														
 
															-	vshufps	$0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
														
 
															-	vshufps	$0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
														
 
															-	vshufps	$0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
														
 
															-	vshufps	$0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
														
 
															-	vshufps	$0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
														
 
															-	vshufps	$0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
														
 
															-
														
 
															-	vperm2f128	$0x13, \r1, \r5, \r6  # h6...a6
														
 
															-	vperm2f128	$0x02, \r1, \r5, \r2  # h2...a2
														
 
															-	vperm2f128	$0x13, \r3, \r7, \r5  # h5...a5
														
 
															-	vperm2f128	$0x02, \r3, \r7, \r1  # h1...a1
														
 
															-	vperm2f128	$0x13, \r0, \r4, \r7  # h7...a7
														
 
															-	vperm2f128	$0x02, \r0, \r4, \r3  # h3...a3
														
 
															-	vperm2f128	$0x13, \t0, \t1, \r4  # h4...a4
														
 
															-	vperm2f128	$0x02, \t0, \t1, \r0  # h0...a0
														
 
															-
														
 
															-.endm
														
 
															-
														
 
															-.macro ROTATE_ARGS
														
 
															-TMP_ = h
														
 
															-h = g
														
 
															-g = f
														
 
															-f = e
														
 
															-e = d
														
 
															-d = c
														
 
															-c = b
														
 
															-b = a
														
 
															-a = TMP_
														
 
															-.endm
														
 
															-
														
 
															-.macro _PRORD reg imm tmp
														
 
															-	vpslld	$(32-\imm),\reg,\tmp
														
 
															-	vpsrld	$\imm,\reg, \reg
														
 
															-	vpor	\tmp,\reg, \reg
														
 
															-.endm
														
 
															-
														
 
															-# PRORD_nd reg, imm, tmp, src
														
 
															-.macro _PRORD_nd reg imm tmp src
														
 
															-	vpslld	$(32-\imm), \src, \tmp
														
 
															-	vpsrld	$\imm, \src, \reg
														
 
															-	vpor	\tmp, \reg, \reg
														
 
															-.endm
														
 
															-
														
 
															-# PRORD dst/src, amt
														
 
															-.macro PRORD reg imm
														
 
															-	_PRORD	\reg,\imm,TMP
														
 
															-.endm
														
 
															-
														
 
															-# PRORD_nd dst, src, amt
														
 
															-.macro PRORD_nd reg tmp imm
														
 
															-	_PRORD_nd	\reg, \imm, TMP, \tmp
														
 
															-.endm
														
 
															-
														
 
															-# arguments passed implicitly in preprocessor symbols i, a...h
														
 
															-.macro ROUND_00_15 _T1 i
														
 
															-	PRORD_nd	a0,e,5	# sig1: a0 = (e >> 5)
														
 
															-
														
 
															-	vpxor	g, f, a2	# ch: a2 = f^g
														
 
															-	vpand	e,a2, a2	# ch: a2 = (f^g)&e
														
 
															-	vpxor	g, a2, a2	# a2 = ch
														
 
															-
														
 
															-	PRORD_nd	a1,e,25	# sig1: a1 = (e >> 25)
														
 
															-
														
 
															-	vmovdqu	\_T1,(SZ8*(\i & 0xf))(%rsp)
														
 
															-	vpaddd	(TBL,ROUND,1), \_T1, \_T1	# T1 = W + K
														
 
															-	vpxor	e,a0, a0	# sig1: a0 = e ^ (e >> 5)
														
 
															-	PRORD	a0, 6		# sig1: a0 = (e >> 6) ^ (e >> 11)
														
 
															-	vpaddd	a2, h, h	# h = h + ch
														
 
															-	PRORD_nd	a2,a,11	# sig0: a2 = (a >> 11)
														
 
															-	vpaddd	\_T1,h, h 	# h = h + ch + W + K
														
 
															-	vpxor	a1, a0, a0	# a0 = sigma1
														
 
															-	PRORD_nd	a1,a,22	# sig0: a1 = (a >> 22)
														
 
															-	vpxor	c, a, \_T1	# maj: T1 = a^c
														
 
															-	add	$SZ8, ROUND	# ROUND++
														
 
															-	vpand	b, \_T1, \_T1	# maj: T1 = (a^c)&b
														
 
															-	vpaddd	a0, h, h
														
 
															-	vpaddd	h, d, d
														
 
															-	vpxor	a, a2, a2	# sig0: a2 = a ^ (a >> 11)
														
 
															-	PRORD	a2,2		# sig0: a2 = (a >> 2) ^ (a >> 13)
														
 
															-	vpxor	a1, a2, a2	# a2 = sig0
														
 
															-	vpand	c, a, a1	# maj: a1 = a&c
														
 
															-	vpor	\_T1, a1, a1 	# a1 = maj
														
 
															-	vpaddd	a1, h, h	# h = h + ch + W + K + maj
														
 
															-	vpaddd	a2, h, h	# h = h + ch + W + K + maj + sigma0
														
 
															-	ROTATE_ARGS
														
 
															-.endm
														
 
															-
														
 
															-# arguments passed implicitly in preprocessor symbols i, a...h
														
 
															-.macro ROUND_16_XX _T1 i
														
 
															-	vmovdqu	(SZ8*((\i-15)&0xf))(%rsp), \_T1
														
 
															-	vmovdqu	(SZ8*((\i-2)&0xf))(%rsp), a1
														
 
															-	vmovdqu	\_T1, a0
														
 
															-	PRORD	\_T1,11
														
 
															-	vmovdqu	a1, a2
														
 
															-	PRORD	a1,2
														
 
															-	vpxor	a0, \_T1, \_T1
														
 
															-	PRORD	\_T1, 7
														
 
															-	vpxor	a2, a1, a1
														
 
															-	PRORD	a1, 17
														
 
															-	vpsrld	$3, a0, a0
														
 
															-	vpxor	a0, \_T1, \_T1
														
 
															-	vpsrld	$10, a2, a2
														
 
															-	vpxor	a2, a1, a1
														
 
															-	vpaddd	(SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
														
 
															-	vpaddd	(SZ8*((\i-7)&0xf))(%rsp), a1, a1
														
 
															-	vpaddd	a1, \_T1, \_T1
														
 
															-
														
 
															-	ROUND_00_15 \_T1,\i
														
 
															-.endm
														
 
															-
														
 
															-# SHA256_ARGS:
														
 
															-#   UINT128 digest[8];  // transposed digests
														
 
															-#   UINT8  *data_ptr[4];
														
 
															-
														
 
															-# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
														
 
															-# arg 1 : STATE : pointer to array of pointers to input data
														
 
															-# arg 2 : INP_SIZE  : size of input in blocks
														
 
															-	# general registers preserved in outer calling routine
														
 
															-	# outer calling routine saves all the XMM registers
														
 
															-	# save rsp, allocate 32-byte aligned for local variables
														
 
															-ENTRY(sha256_x8_avx2)
														
 
															-
														
 
															-	# save callee-saved clobbered registers to comply with C function ABI
														
 
															-	push    %r12
														
 
															-	push    %r13
														
 
															-	push    %r14
														
 
															-	push    %r15
														
 
															-
														
 
															-	mov	%rsp, IDX
														
 
															-	sub	$FRAMESZ, %rsp
														
 
															-	and	$~0x1F, %rsp
														
 
															-	mov	IDX, _rsp(%rsp)
														
 
															-
														
 
															-	# Load the pre-transposed incoming digest.
														
 
															-	vmovdqu	0*SHA256_DIGEST_ROW_SIZE(STATE),a
														
 
															-	vmovdqu	1*SHA256_DIGEST_ROW_SIZE(STATE),b
														
 
															-	vmovdqu	2*SHA256_DIGEST_ROW_SIZE(STATE),c
														
 
															-	vmovdqu	3*SHA256_DIGEST_ROW_SIZE(STATE),d
														
 
															-	vmovdqu	4*SHA256_DIGEST_ROW_SIZE(STATE),e
														
 
															-	vmovdqu	5*SHA256_DIGEST_ROW_SIZE(STATE),f
														
 
															-	vmovdqu	6*SHA256_DIGEST_ROW_SIZE(STATE),g
														
 
															-	vmovdqu	7*SHA256_DIGEST_ROW_SIZE(STATE),h
														
 
															-
														
 
															-	lea	K256_8(%rip),TBL
														
 
															-
														
 
															-	# load the address of each of the 4 message lanes
														
 
															-	# getting ready to transpose input onto stack
														
 
															-	mov	_args_data_ptr+0*PTR_SZ(STATE),inp0
														
 
															-	mov	_args_data_ptr+1*PTR_SZ(STATE),inp1
														
 
															-	mov	_args_data_ptr+2*PTR_SZ(STATE),inp2
														
 
															-	mov	_args_data_ptr+3*PTR_SZ(STATE),inp3
														
 
															-	mov	_args_data_ptr+4*PTR_SZ(STATE),inp4
														
 
															-	mov	_args_data_ptr+5*PTR_SZ(STATE),inp5
														
 
															-	mov	_args_data_ptr+6*PTR_SZ(STATE),inp6
														
 
															-	mov	_args_data_ptr+7*PTR_SZ(STATE),inp7
														
 
															-
														
 
															-	xor	IDX, IDX
														
 
															-lloop:
														
 
															-	xor	ROUND, ROUND
														
 
															-
														
 
															-	# save old digest
														
 
															-	vmovdqu	a, _digest(%rsp)
														
 
															-	vmovdqu	b, _digest+1*SZ8(%rsp)
														
 
															-	vmovdqu	c, _digest+2*SZ8(%rsp)
														
 
															-	vmovdqu	d, _digest+3*SZ8(%rsp)
														
 
															-	vmovdqu	e, _digest+4*SZ8(%rsp)
														
 
															-	vmovdqu	f, _digest+5*SZ8(%rsp)
														
 
															-	vmovdqu	g, _digest+6*SZ8(%rsp)
														
 
															-	vmovdqu	h, _digest+7*SZ8(%rsp)
														
 
															-	i = 0
														
 
															-.rep 2
														
 
															-	VMOVPS	i*32(inp0, IDX), TT0
														
 
															-	VMOVPS	i*32(inp1, IDX), TT1
														
 
															-	VMOVPS	i*32(inp2, IDX), TT2
														
 
															-	VMOVPS	i*32(inp3, IDX), TT3
														
 
															-	VMOVPS	i*32(inp4, IDX), TT4
														
 
															-	VMOVPS	i*32(inp5, IDX), TT5
														
 
															-	VMOVPS	i*32(inp6, IDX), TT6
														
 
															-	VMOVPS	i*32(inp7, IDX), TT7
														
 
															-	vmovdqu	g, _ytmp(%rsp)
														
 
															-	vmovdqu	h, _ytmp+1*SZ8(%rsp)
														
 
															-	TRANSPOSE8	TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7,   TMP0, TMP1
														
 
															-	vmovdqu	PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
														
 
															-	vmovdqu	_ytmp(%rsp), g
														
 
															-	vpshufb	TMP1, TT0, TT0
														
 
															-	vpshufb	TMP1, TT1, TT1
														
 
															-	vpshufb	TMP1, TT2, TT2
														
 
															-	vpshufb	TMP1, TT3, TT3
														
 
															-	vpshufb	TMP1, TT4, TT4
														
 
															-	vpshufb	TMP1, TT5, TT5
														
 
															-	vpshufb	TMP1, TT6, TT6
														
 
															-	vpshufb	TMP1, TT7, TT7
														
 
															-	vmovdqu	_ytmp+1*SZ8(%rsp), h
														
 
															-	vmovdqu	TT4, _ytmp(%rsp)
														
 
															-	vmovdqu	TT5, _ytmp+1*SZ8(%rsp)
														
 
															-	vmovdqu	TT6, _ytmp+2*SZ8(%rsp)
														
 
															-	vmovdqu	TT7, _ytmp+3*SZ8(%rsp)
														
 
															-	ROUND_00_15	TT0,(i*8+0)
														
 
															-	vmovdqu	_ytmp(%rsp), TT0
														
 
															-	ROUND_00_15	TT1,(i*8+1)
														
 
															-	vmovdqu	_ytmp+1*SZ8(%rsp), TT1
														
 
															-	ROUND_00_15	TT2,(i*8+2)
														
 
															-	vmovdqu	_ytmp+2*SZ8(%rsp), TT2
														
 
															-	ROUND_00_15	TT3,(i*8+3)
														
 
															-	vmovdqu	_ytmp+3*SZ8(%rsp), TT3
														
 
															-	ROUND_00_15	TT0,(i*8+4)
														
 
															-	ROUND_00_15	TT1,(i*8+5)
														
 
															-	ROUND_00_15	TT2,(i*8+6)
														
 
															-	ROUND_00_15	TT3,(i*8+7)
														
 
															-	i = (i+1)
														
 
															-.endr
														
 
															-	add	$64, IDX
														
 
															-	i = (i*8)
														
 
															-
														
 
															-	jmp	Lrounds_16_xx
														
 
															-.align 16
														
 
															-Lrounds_16_xx:
														
 
															-.rep 16
														
 
															-	ROUND_16_XX	T1, i
														
 
															-	i = (i+1)
														
 
															-.endr
														
 
															-
														
 
															-	cmp	$ROUNDS,ROUND
														
 
															-	jb	Lrounds_16_xx
														
 
															-
														
 
															-	# add old digest
														
 
															-	vpaddd	_digest+0*SZ8(%rsp), a, a
														
 
															-	vpaddd	_digest+1*SZ8(%rsp), b, b
														
 
															-	vpaddd	_digest+2*SZ8(%rsp), c, c
														
 
															-	vpaddd	_digest+3*SZ8(%rsp), d, d
														
 
															-	vpaddd	_digest+4*SZ8(%rsp), e, e
														
 
															-	vpaddd	_digest+5*SZ8(%rsp), f, f
														
 
															-	vpaddd	_digest+6*SZ8(%rsp), g, g
														
 
															-	vpaddd	_digest+7*SZ8(%rsp), h, h
														
 
															-
														
 
															-	sub	$1, INP_SIZE  # unit is blocks
														
 
															-	jne	lloop
														
 
															-
														
 
															-	# write back to memory (state object) the transposed digest
														
 
															-	vmovdqu	a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-	vmovdqu	h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
														
 
															-
														
 
															-	# update input pointers
														
 
															-	add	IDX, inp0
														
 
															-	mov	inp0, _args_data_ptr+0*8(STATE)
														
 
															-	add	IDX, inp1
														
 
															-	mov	inp1, _args_data_ptr+1*8(STATE)
														
 
															-	add	IDX, inp2
														
 
															-	mov	inp2, _args_data_ptr+2*8(STATE)
														
 
															-	add	IDX, inp3
														
 
															-	mov	inp3, _args_data_ptr+3*8(STATE)
														
 
															-	add	IDX, inp4
														
 
															-	mov	inp4, _args_data_ptr+4*8(STATE)
														
 
															-	add	IDX, inp5
														
 
															-	mov	inp5, _args_data_ptr+5*8(STATE)
														
 
															-	add	IDX, inp6
														
 
															-	mov	inp6, _args_data_ptr+6*8(STATE)
														
 
															-	add	IDX, inp7
														
 
															-	mov	inp7, _args_data_ptr+7*8(STATE)
														
 
															-
														
 
															-	# Postamble
														
 
															-	mov	_rsp(%rsp), %rsp
														
 
															-
														
 
															-	# restore callee-saved clobbered registers
														
 
															-	pop     %r15
														
 
															-	pop     %r14
														
 
															-	pop     %r13
														
 
															-	pop     %r12
														
 
															-
														
 
															-	ret
														
 
															-ENDPROC(sha256_x8_avx2)
														
 
															-
														
 
															-.section	.rodata.K256_8, "a", @progbits
														
 
															-.align 64
														
 
															-K256_8:
														
 
															-	.octa	0x428a2f98428a2f98428a2f98428a2f98
														
 
															-	.octa	0x428a2f98428a2f98428a2f98428a2f98
														
 
															-	.octa	0x71374491713744917137449171374491
														
 
															-	.octa	0x71374491713744917137449171374491
														
 
															-	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
														
 
															-	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
														
 
															-	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
														
 
															-	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
														
 
															-	.octa	0x3956c25b3956c25b3956c25b3956c25b
														
 
															-	.octa	0x3956c25b3956c25b3956c25b3956c25b
														
 
															-	.octa	0x59f111f159f111f159f111f159f111f1
														
 
															-	.octa	0x59f111f159f111f159f111f159f111f1
														
 
															-	.octa	0x923f82a4923f82a4923f82a4923f82a4
														
 
															-	.octa	0x923f82a4923f82a4923f82a4923f82a4
														
 
															-	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
														
 
															-	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
														
 
															-	.octa	0xd807aa98d807aa98d807aa98d807aa98
														
 
															-	.octa	0xd807aa98d807aa98d807aa98d807aa98
														
 
															-	.octa	0x12835b0112835b0112835b0112835b01
														
 
															-	.octa	0x12835b0112835b0112835b0112835b01
														
 
															-	.octa	0x243185be243185be243185be243185be
														
 
															-	.octa	0x243185be243185be243185be243185be
														
 
															-	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
														
 
															-	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
														
 
															-	.octa	0x72be5d7472be5d7472be5d7472be5d74
														
 
															-	.octa	0x72be5d7472be5d7472be5d7472be5d74
														
 
															-	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
														
 
															-	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
														
 
															-	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
														
 
															-	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
														
 
															-	.octa	0xc19bf174c19bf174c19bf174c19bf174
														
 
															-	.octa	0xc19bf174c19bf174c19bf174c19bf174
														
 
															-	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
														
 
															-	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
														
 
															-	.octa	0xefbe4786efbe4786efbe4786efbe4786
														
 
															-	.octa	0xefbe4786efbe4786efbe4786efbe4786
														
 
															-	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
														
 
															-	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
														
 
															-	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
														
 
															-	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
														
 
															-	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
														
 
															-	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
														
 
															-	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
														
 
															-	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
														
 
															-	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
														
 
															-	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
														
 
															-	.octa	0x76f988da76f988da76f988da76f988da
														
 
															-	.octa	0x76f988da76f988da76f988da76f988da
														
 
															-	.octa	0x983e5152983e5152983e5152983e5152
														
 
															-	.octa	0x983e5152983e5152983e5152983e5152
														
 
															-	.octa	0xa831c66da831c66da831c66da831c66d
														
 
															-	.octa	0xa831c66da831c66da831c66da831c66d
														
 
															-	.octa	0xb00327c8b00327c8b00327c8b00327c8
														
 
															-	.octa	0xb00327c8b00327c8b00327c8b00327c8
														
 
															-	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
														
 
															-	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
														
 
															-	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
														
 
															-	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
														
 
															-	.octa	0xd5a79147d5a79147d5a79147d5a79147
														
 
															-	.octa	0xd5a79147d5a79147d5a79147d5a79147
														
 
															-	.octa	0x06ca635106ca635106ca635106ca6351
														
 
															-	.octa	0x06ca635106ca635106ca635106ca6351
														
 
															-	.octa	0x14292967142929671429296714292967
														
 
															-	.octa	0x14292967142929671429296714292967
														
 
															-	.octa	0x27b70a8527b70a8527b70a8527b70a85
														
 
															-	.octa	0x27b70a8527b70a8527b70a8527b70a85
														
 
															-	.octa	0x2e1b21382e1b21382e1b21382e1b2138
														
 
															-	.octa	0x2e1b21382e1b21382e1b21382e1b2138
														
 
															-	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
														
 
															-	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
														
 
															-	.octa	0x53380d1353380d1353380d1353380d13
														
 
															-	.octa	0x53380d1353380d1353380d1353380d13
														
 
															-	.octa	0x650a7354650a7354650a7354650a7354
														
 
															-	.octa	0x650a7354650a7354650a7354650a7354
														
 
															-	.octa	0x766a0abb766a0abb766a0abb766a0abb
														
 
															-	.octa	0x766a0abb766a0abb766a0abb766a0abb
														
 
															-	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
														
 
															-	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
														
 
															-	.octa	0x92722c8592722c8592722c8592722c85
														
 
															-	.octa	0x92722c8592722c8592722c8592722c85
														
 
															-	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
														
 
															-	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
														
 
															-	.octa	0xa81a664ba81a664ba81a664ba81a664b
														
 
															-	.octa	0xa81a664ba81a664ba81a664ba81a664b
														
 
															-	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
														
 
															-	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
														
 
															-	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
														
 
															-	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
														
 
															-	.octa	0xd192e819d192e819d192e819d192e819
														
 
															-	.octa	0xd192e819d192e819d192e819d192e819
														
 
															-	.octa	0xd6990624d6990624d6990624d6990624
														
 
															-	.octa	0xd6990624d6990624d6990624d6990624
														
 
															-	.octa	0xf40e3585f40e3585f40e3585f40e3585
														
 
															-	.octa	0xf40e3585f40e3585f40e3585f40e3585
														
 
															-	.octa	0x106aa070106aa070106aa070106aa070
														
 
															-	.octa	0x106aa070106aa070106aa070106aa070
														
 
															-	.octa	0x19a4c11619a4c11619a4c11619a4c116
														
 
															-	.octa	0x19a4c11619a4c11619a4c11619a4c116
														
 
															-	.octa	0x1e376c081e376c081e376c081e376c08
														
 
															-	.octa	0x1e376c081e376c081e376c081e376c08
														
 
															-	.octa	0x2748774c2748774c2748774c2748774c
														
 
															-	.octa	0x2748774c2748774c2748774c2748774c
														
 
															-	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
														
 
															-	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
														
 
															-	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
														
 
															-	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
														
 
															-	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
														
 
															-	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
														
 
															-	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
														
 
															-	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
														
 
															-	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
														
 
															-	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
														
 
															-	.octa	0x748f82ee748f82ee748f82ee748f82ee
														
 
															-	.octa	0x748f82ee748f82ee748f82ee748f82ee
														
 
															-	.octa	0x78a5636f78a5636f78a5636f78a5636f
														
 
															-	.octa	0x78a5636f78a5636f78a5636f78a5636f
														
 
															-	.octa	0x84c8781484c8781484c8781484c87814
														
 
															-	.octa	0x84c8781484c8781484c8781484c87814
														
 
															-	.octa	0x8cc702088cc702088cc702088cc70208
														
 
															-	.octa	0x8cc702088cc702088cc702088cc70208
														
 
															-	.octa	0x90befffa90befffa90befffa90befffa
														
 
															-	.octa	0x90befffa90befffa90befffa90befffa
														
 
															-	.octa	0xa4506ceba4506ceba4506ceba4506ceb
														
 
															-	.octa	0xa4506ceba4506ceba4506ceba4506ceb
														
 
															-	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
														
 
															-	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
														
 
															-	.octa	0xc67178f2c67178f2c67178f2c67178f2
														
 
															-	.octa	0xc67178f2c67178f2c67178f2c67178f2
														
 
															-
														
 
															-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-PSHUFFLE_BYTE_FLIP_MASK:
														
 
															-.octa 0x0c0d0e0f08090a0b0405060700010203
														
 
															-.octa 0x0c0d0e0f08090a0b0405060700010203
														
 
															-
														
 
															-.section	.rodata.cst256.K256, "aM", @progbits, 256
														
 
															-.align 64
														
 
															-.global K256
														
 
															-K256:
														
 
															-	.int	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
														
 
															-	.int	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
														
 
															-	.int	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
														
 
															-	.int	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
														
 
															-	.int	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
														
 
															-	.int	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
														
 
															-	.int	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
														
 
															-	.int	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
														
 
															-	.int	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
														
 
															-	.int	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
														
 
															-	.int	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
														
 
															-	.int	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
														
 
															-	.int	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
														
 
															-	.int	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
														
 
															-	.int	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
														
 
															-	.int	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
														
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ b/arch/x86/crypto/sha512-mb/Makefile
@@ -1,12 +0,0 @@
 
															-# SPDX-License-Identifier: GPL-2.0
														
 
															-#
														
 
															-# Arch-specific CryptoAPI modules.
														
 
															-#
														
 
															-
														
 
															-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
														
 
															-                                $(comma)4)$(comma)%ymm2,yes,no)
														
 
															-ifeq ($(avx2_supported),yes)
														
 
															-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
														
 
															-	sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
														
 
															-	     sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
														
 
															-endif
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -1,1047 +0,0 @@
 
															-/*
														
 
															- * Multi buffer SHA512 algorithm Glue Code
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of version 2 of the GNU General Public License as
														
 
															- * published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License for more details.
														
 
															- *
														
 
															- * Contact Information:
														
 
															- *	Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- * BSD LICENSE
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * Redistribution and use in source and binary forms, with or without
														
 
															- * modification, are permitted provided that the following conditions
														
 
															- * are met:
														
 
															- *
														
 
															- *   * Redistributions of source code must retain the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer.
														
 
															- *   * Redistributions in binary form must reproduce the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer in
														
 
															- *     the documentation and/or other materials provided with the
														
 
															- *     distribution.
														
 
															- *   * Neither the name of Intel Corporation nor the names of its
														
 
															- *     contributors may be used to endorse or promote products derived
														
 
															- *     from this software without specific prior written permission.
														
 
															- *
														
 
															- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
														
 
															-
														
 
															-#include <crypto/internal/hash.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/mm.h>
														
 
															-#include <linux/cryptohash.h>
														
 
															-#include <linux/types.h>
														
 
															-#include <linux/list.h>
														
 
															-#include <crypto/scatterwalk.h>
														
 
															-#include <crypto/sha.h>
														
 
															-#include <crypto/mcryptd.h>
														
 
															-#include <crypto/crypto_wq.h>
														
 
															-#include <asm/byteorder.h>
														
 
															-#include <linux/hardirq.h>
														
 
															-#include <asm/fpu/api.h>
														
 
															-#include "sha512_mb_ctx.h"
														
 
															-
														
 
															-#define FLUSH_INTERVAL 1000 /* in usec */
														
 
															-
														
 
															-static struct mcryptd_alg_state sha512_mb_alg_state;
														
 
															-
														
 
															-struct sha512_mb_ctx {
														
 
															-	struct mcryptd_ahash *mcryptd_tfm;
														
 
															-};
														
 
															-
														
 
															-static inline struct mcryptd_hash_request_ctx
														
 
															-		*cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
														
 
															-{
														
 
															-	struct ahash_request *areq;
														
 
															-
														
 
															-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
														
 
															-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
														
 
															-}
														
 
															-
														
 
															-static inline struct ahash_request
														
 
															-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
														
 
															-{
														
 
															-	return container_of((void *) ctx, struct ahash_request, __ctx);
														
 
															-}
														
 
															-
														
 
															-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
														
 
															-				struct ahash_request *areq)
														
 
															-{
														
 
															-	rctx->flag = HASH_UPDATE;
														
 
															-}
														
 
															-
														
 
															-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
														
 
															-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
														
 
															-						(struct sha512_mb_mgr *state,
														
 
															-						struct job_sha512 *job);
														
 
															-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
														
 
															-						(struct sha512_mb_mgr *state);
														
 
															-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
														
 
															-						(struct sha512_mb_mgr *state);
														
 
															-
														
 
															-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
														
 
															-			 uint64_t total_len)
														
 
															-{
														
 
															-	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
														
 
															-
														
 
															-	memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
														
 
															-	padblock[i] = 0x80;
														
 
															-
														
 
															-	i += ((SHA512_BLOCK_SIZE - 1) &
														
 
															-	      (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
														
 
															-	     + 1 + SHA512_PADLENGTHFIELD_SIZE;
														
 
															-
														
 
															-#if SHA512_PADLENGTHFIELD_SIZE == 16
														
 
															-	*((uint64_t *) &padblock[i - 16]) = 0;
														
 
															-#endif
														
 
															-
														
 
															-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
														
 
															-
														
 
															-	/* Number of extra blocks to hash */
														
 
															-	return i >> SHA512_LOG2_BLOCK_SIZE;
														
 
															-}
														
 
															-
														
 
															-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
														
 
															-		(struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
														
 
															-{
														
 
															-	while (ctx) {
														
 
															-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
														
 
															-			/* Clear PROCESSING bit */
														
 
															-			ctx->status = HASH_CTX_STS_COMPLETE;
														
 
															-			return ctx;
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra blocks are empty, begin hashing what remains
														
 
															-		 * in the user's buffer.
														
 
															-		 */
														
 
															-		if (ctx->partial_block_buffer_length == 0 &&
														
 
															-		    ctx->incoming_buffer_length) {
														
 
															-
														
 
															-			const void *buffer = ctx->incoming_buffer;
														
 
															-			uint32_t len = ctx->incoming_buffer_length;
														
 
															-			uint32_t copy_len;
														
 
															-
														
 
															-			/*
														
 
															-			 * Only entire blocks can be hashed.
														
 
															-			 * Copy remainder to extra blocks buffer.
														
 
															-			 */
														
 
															-			copy_len = len & (SHA512_BLOCK_SIZE-1);
														
 
															-
														
 
															-			if (copy_len) {
														
 
															-				len -= copy_len;
														
 
															-				memcpy(ctx->partial_block_buffer,
														
 
															-				       ((const char *) buffer + len),
														
 
															-				       copy_len);
														
 
															-				ctx->partial_block_buffer_length = copy_len;
														
 
															-			}
														
 
															-
														
 
															-			ctx->incoming_buffer_length = 0;
														
 
															-
														
 
															-			/* len should be a multiple of the block size now */
														
 
															-			assert((len % SHA512_BLOCK_SIZE) == 0);
														
 
															-
														
 
															-			/* Set len to the number of blocks to be hashed */
														
 
															-			len >>= SHA512_LOG2_BLOCK_SIZE;
														
 
															-
														
 
															-			if (len) {
														
 
															-
														
 
															-				ctx->job.buffer = (uint8_t *) buffer;
														
 
															-				ctx->job.len = len;
														
 
															-				ctx = (struct sha512_hash_ctx *)
														
 
															-					sha512_job_mgr_submit(&mgr->mgr,
														
 
															-					&ctx->job);
														
 
															-				continue;
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		/*
														
 
															-		 * If the extra blocks are not empty, then we are
														
 
															-		 * either on the last block(s) or we need more
														
 
															-		 * user input before continuing.
														
 
															-		 */
														
 
															-		if (ctx->status & HASH_CTX_STS_LAST) {
														
 
															-
														
 
															-			uint8_t *buf = ctx->partial_block_buffer;
														
 
															-			uint32_t n_extra_blocks =
														
 
															-					sha512_pad(buf, ctx->total_length);
														
 
															-
														
 
															-			ctx->status = (HASH_CTX_STS_PROCESSING |
														
 
															-				       HASH_CTX_STS_COMPLETE);
														
 
															-			ctx->job.buffer = buf;
														
 
															-			ctx->job.len = (uint32_t) n_extra_blocks;
														
 
															-			ctx = (struct sha512_hash_ctx *)
														
 
															-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-			continue;
														
 
															-		}
														
 
															-
														
 
															-		if (ctx)
														
 
															-			ctx->status = HASH_CTX_STS_IDLE;
														
 
															-		return ctx;
														
 
															-	}
														
 
															-
														
 
															-	return NULL;
														
 
															-}
														
 
															-
														
 
															-static struct sha512_hash_ctx
														
 
															-		*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	/*
														
 
															-	 * If get_comp_job returns NULL, there are no jobs complete.
														
 
															-	 * If get_comp_job returns a job, verify that it is safe to return to
														
 
															-	 * the user.
														
 
															-	 * If it is not ready, resubmit the job to finish processing.
														
 
															-	 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
														
 
															-	 * returned.
														
 
															-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
														
 
															-	 * still need processing.
														
 
															-	 */
														
 
															-	struct sha512_ctx_mgr *mgr;
														
 
															-	struct sha512_hash_ctx *ctx;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	mgr = cstate->mgr;
														
 
															-	spin_lock_irqsave(&cstate->work_lock, flags);
														
 
															-	ctx = (struct sha512_hash_ctx *)
														
 
															-				sha512_job_mgr_get_comp_job(&mgr->mgr);
														
 
															-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
														
 
															-	spin_unlock_irqrestore(&cstate->work_lock, flags);
														
 
															-	return ctx;
														
 
															-}
														
 
															-
														
 
															-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
														
 
															-{
														
 
															-	sha512_job_mgr_init(&mgr->mgr);
														
 
															-}
														
 
															-
														
 
															-static struct sha512_hash_ctx
														
 
															-			*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
														
 
															-					  struct sha512_hash_ctx *ctx,
														
 
															-					  const void *buffer,
														
 
															-					  uint32_t len,
														
 
															-					  int flags)
														
 
															-{
														
 
															-	struct sha512_ctx_mgr *mgr;
														
 
															-	unsigned long irqflags;
														
 
															-
														
 
															-	mgr = cstate->mgr;
														
 
															-	spin_lock_irqsave(&cstate->work_lock, irqflags);
														
 
															-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
														
 
															-		/* User should not pass anything other than UPDATE or LAST */
														
 
															-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
														
 
															-		goto unlock;
														
 
															-	}
														
 
															-
														
 
															-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
														
 
															-		/* Cannot submit to a currently processing job. */
														
 
															-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
														
 
															-		goto unlock;
														
 
															-	}
														
 
															-
														
 
															-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
														
 
															-		/* Cannot update a finished job. */
														
 
															-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
														
 
															-		goto unlock;
														
 
															-	}
														
 
															-
														
 
															-	/*
														
 
															-	 * If we made it here, there were no errors during this call to
														
 
															-	 * submit
														
 
															-	 */
														
 
															-	ctx->error = HASH_CTX_ERROR_NONE;
														
 
															-
														
 
															-	/* Store buffer ptr info from user */
														
 
															-	ctx->incoming_buffer = buffer;
														
 
															-	ctx->incoming_buffer_length = len;
														
 
															-
														
 
															-	/*
														
 
															-	 * Store the user's request flags and mark this ctx as currently being
														
 
															-	 * processed.
														
 
															-	 */
														
 
															-	ctx->status = (flags & HASH_LAST) ?
														
 
															-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
														
 
															-			HASH_CTX_STS_PROCESSING;
														
 
															-
														
 
															-	/* Advance byte counter */
														
 
															-	ctx->total_length += len;
														
 
															-
														
 
															-	/*
														
 
															-	 * If there is anything currently buffered in the extra blocks,
														
 
															-	 * append to it until it contains a whole block.
														
 
															-	 * Or if the user's buffer contains less than a whole block,
														
 
															-	 * append as much as possible to the extra block.
														
 
															-	 */
														
 
															-	if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
														
 
															-		/* Compute how many bytes to copy from user buffer into extra
														
 
															-		 * block
														
 
															-		 */
														
 
															-		uint32_t copy_len = SHA512_BLOCK_SIZE -
														
 
															-					ctx->partial_block_buffer_length;
														
 
															-		if (len < copy_len)
														
 
															-			copy_len = len;
														
 
															-
														
 
															-		if (copy_len) {
														
 
															-			/* Copy and update relevant pointers and counters */
														
 
															-			memcpy
														
 
															-		(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
														
 
															-				buffer, copy_len);
														
 
															-
														
 
															-			ctx->partial_block_buffer_length += copy_len;
														
 
															-			ctx->incoming_buffer = (const void *)
														
 
															-					((const char *)buffer + copy_len);
														
 
															-			ctx->incoming_buffer_length = len - copy_len;
														
 
															-		}
														
 
															-
														
 
															-		/* The extra block should never contain more than 1 block
														
 
															-		 * here
														
 
															-		 */
														
 
															-		assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
														
 
															-
														
 
															-		/* If the extra block buffer contains exactly 1 block, it can
														
 
															-		 * be hashed.
														
 
															-		 */
														
 
															-		if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
														
 
															-			ctx->partial_block_buffer_length = 0;
														
 
															-
														
 
															-			ctx->job.buffer = ctx->partial_block_buffer;
														
 
															-			ctx->job.len = 1;
														
 
															-			ctx = (struct sha512_hash_ctx *)
														
 
															-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
														
 
															-unlock:
														
 
															-	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
														
 
															-	return ctx;
														
 
															-}
														
 
															-
														
 
															-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	struct sha512_ctx_mgr *mgr;
														
 
															-	struct sha512_hash_ctx *ctx;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	mgr = cstate->mgr;
														
 
															-	spin_lock_irqsave(&cstate->work_lock, flags);
														
 
															-	while (1) {
														
 
															-		ctx = (struct sha512_hash_ctx *)
														
 
															-					sha512_job_mgr_flush(&mgr->mgr);
														
 
															-
														
 
															-		/* If flush returned 0, there are no more jobs in flight. */
														
 
															-		if (!ctx)
														
 
															-			break;
														
 
															-
														
 
															-		/*
														
 
															-		 * If flush returned a job, resubmit the job to finish
														
 
															-		 * processing.
														
 
															-		 */
														
 
															-		ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
														
 
															-
														
 
															-		/*
														
 
															-		 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
														
 
															-		 * be returned. Otherwise, all jobs currently being managed by
														
 
															-		 * the sha512_ctx_mgr still need processing. Loop.
														
 
															-		 */
														
 
															-		if (ctx)
														
 
															-			break;
														
 
															-	}
														
 
															-	spin_unlock_irqrestore(&cstate->work_lock, flags);
														
 
															-	return ctx;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_init(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	hash_ctx_init(sctx);
														
 
															-	sctx->job.result_digest[0] = SHA512_H0;
														
 
															-	sctx->job.result_digest[1] = SHA512_H1;
														
 
															-	sctx->job.result_digest[2] = SHA512_H2;
														
 
															-	sctx->job.result_digest[3] = SHA512_H3;
														
 
															-	sctx->job.result_digest[4] = SHA512_H4;
														
 
															-	sctx->job.result_digest[5] = SHA512_H5;
														
 
															-	sctx->job.result_digest[6] = SHA512_H6;
														
 
															-	sctx->job.result_digest[7] = SHA512_H7;
														
 
															-	sctx->total_length = 0;
														
 
															-	sctx->partial_block_buffer_length = 0;
														
 
															-	sctx->status = HASH_CTX_STS_IDLE;
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
														
 
															-{
														
 
															-	int	i;
														
 
															-	struct	sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
														
 
															-	__be64	*dst = (__be64 *) rctx->out;
														
 
															-
														
 
															-	for (i = 0; i < 8; ++i)
														
 
															-		dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
														
 
															-			struct mcryptd_alg_cstate *cstate, bool flush)
														
 
															-{
														
 
															-	int	flag = HASH_UPDATE;
														
 
															-	int	nbytes, err = 0;
														
 
															-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
														
 
															-	struct sha512_hash_ctx *sha_ctx;
														
 
															-
														
 
															-	/* more work ? */
														
 
															-	while (!(rctx->flag & HASH_DONE)) {
														
 
															-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
														
 
															-		if (nbytes < 0) {
														
 
															-			err = nbytes;
														
 
															-			goto out;
														
 
															-		}
														
 
															-		/* check if the walk is done */
														
 
															-		if (crypto_ahash_walk_last(&rctx->walk)) {
														
 
															-			rctx->flag |= HASH_DONE;
														
 
															-			if (rctx->flag & HASH_FINAL)
														
 
															-				flag |= HASH_LAST;
														
 
															-
														
 
															-		}
														
 
															-		sha_ctx = (struct sha512_hash_ctx *)
														
 
															-						ahash_request_ctx(&rctx->areq);
														
 
															-		kernel_fpu_begin();
														
 
															-		sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
														
 
															-						rctx->walk.data, nbytes, flag);
														
 
															-		if (!sha_ctx) {
														
 
															-			if (flush)
														
 
															-				sha_ctx = sha512_ctx_mgr_flush(cstate);
														
 
															-		}
														
 
															-		kernel_fpu_end();
														
 
															-		if (sha_ctx)
														
 
															-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		else {
														
 
															-			rctx = NULL;
														
 
															-			goto out;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	/* copy the results */
														
 
															-	if (rctx->flag & HASH_FINAL)
														
 
															-		sha512_mb_set_results(rctx);
														
 
															-
														
 
															-out:
														
 
															-	*ret_rctx = rctx;
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
														
 
															-			    struct mcryptd_alg_cstate *cstate,
														
 
															-			    int err)
														
 
															-{
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha512_hash_ctx *sha_ctx;
														
 
															-	struct mcryptd_hash_request_ctx *req_ctx;
														
 
															-	int ret;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	/* remove from work list */
														
 
															-	spin_lock_irqsave(&cstate->work_lock, flags);
														
 
															-	list_del(&rctx->waiter);
														
 
															-	spin_unlock_irqrestore(&cstate->work_lock, flags);
														
 
															-
														
 
															-	if (irqs_disabled())
														
 
															-		rctx->complete(&req->base, err);
														
 
															-	else {
														
 
															-		local_bh_disable();
														
 
															-		rctx->complete(&req->base, err);
														
 
															-		local_bh_enable();
														
 
															-	}
														
 
															-
														
 
															-	/* check to see if there are other jobs that are done */
														
 
															-	sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
														
 
															-	while (sha_ctx) {
														
 
															-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		ret = sha_finish_walk(&req_ctx, cstate, false);
														
 
															-		if (req_ctx) {
														
 
															-			spin_lock_irqsave(&cstate->work_lock, flags);
														
 
															-			list_del(&req_ctx->waiter);
														
 
															-			spin_unlock_irqrestore(&cstate->work_lock, flags);
														
 
															-
														
 
															-			req = cast_mcryptd_ctx_to_req(req_ctx);
														
 
															-			if (irqs_disabled())
														
 
															-				req_ctx->complete(&req->base, ret);
														
 
															-			else {
														
 
															-				local_bh_disable();
														
 
															-				req_ctx->complete(&req->base, ret);
														
 
															-				local_bh_enable();
														
 
															-			}
														
 
															-		}
														
 
															-		sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
														
 
															-			     struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	unsigned long next_flush;
														
 
															-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	/* initialize tag */
														
 
															-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
														
 
															-	rctx->tag.seq_num = cstate->next_seq_num++;
														
 
															-	next_flush = rctx->tag.arrival + delay;
														
 
															-	rctx->tag.expire = next_flush;
														
 
															-
														
 
															-	spin_lock_irqsave(&cstate->work_lock, flags);
														
 
															-	list_add_tail(&rctx->waiter, &cstate->work_list);
														
 
															-	spin_unlock_irqrestore(&cstate->work_lock, flags);
														
 
															-
														
 
															-	mcryptd_arm_flusher(cstate, delay);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_update(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-			container_of(areq, struct mcryptd_hash_request_ctx,
														
 
															-									areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha512_hash_ctx *sha_ctx;
														
 
															-	int ret = 0, nbytes;
														
 
															-
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
														
 
															-
														
 
															-	if (nbytes < 0) {
														
 
															-		ret = nbytes;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	if (crypto_ahash_walk_last(&rctx->walk))
														
 
															-		rctx->flag |= HASH_DONE;
														
 
															-
														
 
															-	/* submit */
														
 
															-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	sha512_mb_add_list(rctx, cstate);
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
														
 
															-							nbytes, HASH_UPDATE);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_finup(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-			container_of(areq, struct mcryptd_hash_request_ctx,
														
 
															-									areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
														
 
															-	struct sha512_hash_ctx *sha_ctx;
														
 
															-	int ret = 0, flag = HASH_UPDATE, nbytes;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
														
 
															-
														
 
															-	if (nbytes < 0) {
														
 
															-		ret = nbytes;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	if (crypto_ahash_walk_last(&rctx->walk)) {
														
 
															-		rctx->flag |= HASH_DONE;
														
 
															-		flag = HASH_LAST;
														
 
															-	}
														
 
															-
														
 
															-	/* submit */
														
 
															-	rctx->flag |= HASH_FINAL;
														
 
															-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	sha512_mb_add_list(rctx, cstate);
														
 
															-
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
														
 
															-								nbytes, flag);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_final(struct ahash_request *areq)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx =
														
 
															-			container_of(areq, struct mcryptd_hash_request_ctx,
														
 
															-									areq);
														
 
															-	struct mcryptd_alg_cstate *cstate =
														
 
															-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
														
 
															-
														
 
															-	struct sha512_hash_ctx *sha_ctx;
														
 
															-	int ret = 0;
														
 
															-	u8 data;
														
 
															-
														
 
															-	/* sanity check */
														
 
															-	if (rctx->tag.cpu != smp_processor_id()) {
														
 
															-		pr_err("mcryptd error: cpu clash\n");
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	/* need to init context */
														
 
															-	req_ctx_init(rctx, areq);
														
 
															-
														
 
															-	rctx->flag |= HASH_DONE | HASH_FINAL;
														
 
															-
														
 
															-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
														
 
															-	/* flag HASH_FINAL and 0 data size */
														
 
															-	sha512_mb_add_list(rctx, cstate);
														
 
															-	kernel_fpu_begin();
														
 
															-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
														
 
															-	kernel_fpu_end();
														
 
															-
														
 
															-	/* check if anything is returned */
														
 
															-	if (!sha_ctx)
														
 
															-		return -EINPROGRESS;
														
 
															-
														
 
															-	if (sha_ctx->error) {
														
 
															-		ret = sha_ctx->error;
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		goto done;
														
 
															-	}
														
 
															-
														
 
															-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-	ret = sha_finish_walk(&rctx, cstate, false);
														
 
															-	if (!rctx)
														
 
															-		return -EINPROGRESS;
														
 
															-done:
														
 
															-	sha_complete_job(rctx, cstate, ret);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_export(struct ahash_request *areq, void *out)
														
 
															-{
														
 
															-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	memcpy(out, sctx, sizeof(*sctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_import(struct ahash_request *areq, const void *in)
														
 
															-{
														
 
															-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
														
 
															-
														
 
															-	memcpy(sctx, in, sizeof(*sctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct mcryptd_ahash *mcryptd_tfm;
														
 
															-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-	struct mcryptd_hash_ctx *mctx;
														
 
															-
														
 
															-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
														
 
															-						CRYPTO_ALG_INTERNAL,
														
 
															-						CRYPTO_ALG_INTERNAL);
														
 
															-	if (IS_ERR(mcryptd_tfm))
														
 
															-		return PTR_ERR(mcryptd_tfm);
														
 
															-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
														
 
															-	mctx->alg_state = &sha512_mb_alg_state;
														
 
															-	ctx->mcryptd_tfm = mcryptd_tfm;
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				sizeof(struct ahash_request) +
														
 
															-				crypto_ahash_reqsize(&mcryptd_tfm->base));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	mcryptd_free_ahash(ctx->mcryptd_tfm);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				sizeof(struct ahash_request) +
														
 
															-				sizeof(struct sha512_hash_ctx));
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	mcryptd_free_ahash(ctx->mcryptd_tfm);
														
 
															-}
														
 
															-
														
 
															-static struct ahash_alg sha512_mb_areq_alg = {
														
 
															-	.init		=	sha512_mb_init,
														
 
															-	.update		=	sha512_mb_update,
														
 
															-	.final		=	sha512_mb_final,
														
 
															-	.finup		=	sha512_mb_finup,
														
 
															-	.export		=	sha512_mb_export,
														
 
															-	.import		=	sha512_mb_import,
														
 
															-	.halg		=	{
														
 
															-	.digestsize	=	SHA512_DIGEST_SIZE,
														
 
															-	.statesize	=	sizeof(struct sha512_hash_ctx),
														
 
															-	.base		=	{
														
 
															-			.cra_name	 = "__sha512-mb",
														
 
															-			.cra_driver_name = "__intel_sha512-mb",
														
 
															-			.cra_priority	 = 100,
														
 
															-			/*
														
 
															-			 * use ASYNC flag as some buffers in multi-buffer
														
 
															-			 * algo may not have completed before hashing thread
														
 
															-			 * sleep
														
 
															-			 */
														
 
															-			.cra_flags	= CRYPTO_ALG_ASYNC |
														
 
															-					  CRYPTO_ALG_INTERNAL,
														
 
															-			.cra_blocksize	= SHA512_BLOCK_SIZE,
														
 
															-			.cra_module	= THIS_MODULE,
														
 
															-			.cra_list	= LIST_HEAD_INIT
														
 
															-					(sha512_mb_areq_alg.halg.base.cra_list),
														
 
															-			.cra_init	= sha512_mb_areq_init_tfm,
														
 
															-			.cra_exit	= sha512_mb_areq_exit_tfm,
														
 
															-			.cra_ctxsize	= sizeof(struct sha512_hash_ctx),
														
 
															-		}
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-static int sha512_mb_async_init(struct ahash_request *req)
														
 
															-{
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_init(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_update(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_update(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_finup(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_finup(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_final(struct ahash_request *req)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_final(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_digest(struct ahash_request *req)
														
 
															-{
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_digest(mcryptd_req);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_export(struct ahash_request *req, void *out)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	return crypto_ahash_export(mcryptd_req, out);
														
 
															-}
														
 
															-
														
 
															-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
														
 
															-{
														
 
															-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
														
 
															-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
														
 
															-	struct mcryptd_hash_request_ctx *rctx;
														
 
															-	struct ahash_request *areq;
														
 
															-
														
 
															-	memcpy(mcryptd_req, req, sizeof(*req));
														
 
															-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
														
 
															-	rctx = ahash_request_ctx(mcryptd_req);
														
 
															-
														
 
															-	areq = &rctx->areq;
														
 
															-
														
 
															-	ahash_request_set_tfm(areq, child);
														
 
															-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															-					rctx->complete, req);
														
 
															-
														
 
															-	return crypto_ahash_import(mcryptd_req, in);
														
 
															-}
														
 
															-
														
 
															-static struct ahash_alg sha512_mb_async_alg = {
														
 
															-	.init           = sha512_mb_async_init,
														
 
															-	.update         = sha512_mb_async_update,
														
 
															-	.final          = sha512_mb_async_final,
														
 
															-	.finup          = sha512_mb_async_finup,
														
 
															-	.digest         = sha512_mb_async_digest,
														
 
															-	.export		= sha512_mb_async_export,
														
 
															-	.import		= sha512_mb_async_import,
														
 
															-	.halg = {
														
 
															-		.digestsize     = SHA512_DIGEST_SIZE,
														
 
															-		.statesize      = sizeof(struct sha512_hash_ctx),
														
 
															-		.base = {
														
 
															-			.cra_name               = "sha512",
														
 
															-			.cra_driver_name        = "sha512_mb",
														
 
															-			/*
														
 
															-			 * Low priority, since with few concurrent hash requests
														
 
															-			 * this is extremely slow due to the flush delay.  Users
														
 
															-			 * whose workloads would benefit from this can request
														
 
															-			 * it explicitly by driver name, or can increase its
														
 
															-			 * priority at runtime using NETLINK_CRYPTO.
														
 
															-			 */
														
 
															-			.cra_priority           = 50,
														
 
															-			.cra_flags              = CRYPTO_ALG_ASYNC,
														
 
															-			.cra_blocksize          = SHA512_BLOCK_SIZE,
														
 
															-			.cra_module             = THIS_MODULE,
														
 
															-			.cra_list               = LIST_HEAD_INIT
														
 
															-				(sha512_mb_async_alg.halg.base.cra_list),
														
 
															-			.cra_init               = sha512_mb_async_init_tfm,
														
 
															-			.cra_exit               = sha512_mb_async_exit_tfm,
														
 
															-			.cra_ctxsize		= sizeof(struct sha512_mb_ctx),
														
 
															-			.cra_alignmask		= 0,
														
 
															-		},
														
 
															-	},
														
 
															-};
														
 
															-
														
 
															-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx;
														
 
															-	unsigned long cur_time;
														
 
															-	unsigned long next_flush = 0;
														
 
															-	struct sha512_hash_ctx *sha_ctx;
														
 
															-
														
 
															-
														
 
															-	cur_time = jiffies;
														
 
															-
														
 
															-	while (!list_empty(&cstate->work_list)) {
														
 
															-		rctx = list_entry(cstate->work_list.next,
														
 
															-				struct mcryptd_hash_request_ctx, waiter);
														
 
															-		if time_before(cur_time, rctx->tag.expire)
														
 
															-			break;
														
 
															-		kernel_fpu_begin();
														
 
															-		sha_ctx = (struct sha512_hash_ctx *)
														
 
															-					sha512_ctx_mgr_flush(cstate);
														
 
															-		kernel_fpu_end();
														
 
															-		if (!sha_ctx) {
														
 
															-			pr_err("sha512_mb error: nothing got flushed for"
														
 
															-							" non-empty list\n");
														
 
															-			break;
														
 
															-		}
														
 
															-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
														
 
															-		sha_finish_walk(&rctx, cstate, true);
														
 
															-		sha_complete_job(rctx, cstate, 0);
														
 
															-	}
														
 
															-
														
 
															-	if (!list_empty(&cstate->work_list)) {
														
 
															-		rctx = list_entry(cstate->work_list.next,
														
 
															-				struct mcryptd_hash_request_ctx, waiter);
														
 
															-		/* get the hash context and then flush time */
														
 
															-		next_flush = rctx->tag.expire;
														
 
															-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
														
 
															-	}
														
 
															-	return next_flush;
														
 
															-}
														
 
															-
														
 
															-static int __init sha512_mb_mod_init(void)
														
 
															-{
														
 
															-
														
 
															-	int cpu;
														
 
															-	int err;
														
 
															-	struct mcryptd_alg_cstate *cpu_state;
														
 
															-
														
 
															-	/* check for dependent cpu features */
														
 
															-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
														
 
															-	    !boot_cpu_has(X86_FEATURE_BMI2))
														
 
															-		return -ENODEV;
														
 
															-
														
 
															-	/* initialize multibuffer structures */
														
 
															-	sha512_mb_alg_state.alg_cstate =
														
 
															-				alloc_percpu(struct mcryptd_alg_cstate);
														
 
															-
														
 
															-	sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
														
 
															-	sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
														
 
															-	sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
														
 
															-	sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
														
 
															-
														
 
															-	if (!sha512_mb_alg_state.alg_cstate)
														
 
															-		return -ENOMEM;
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
														
 
															-		cpu_state->next_flush = 0;
														
 
															-		cpu_state->next_seq_num = 0;
														
 
															-		cpu_state->flusher_engaged = false;
														
 
															-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
														
 
															-		cpu_state->cpu = cpu;
														
 
															-		cpu_state->alg_state = &sha512_mb_alg_state;
														
 
															-		cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
														
 
															-								GFP_KERNEL);
														
 
															-		if (!cpu_state->mgr)
														
 
															-			goto err2;
														
 
															-		sha512_ctx_mgr_init(cpu_state->mgr);
														
 
															-		INIT_LIST_HEAD(&cpu_state->work_list);
														
 
															-		spin_lock_init(&cpu_state->work_lock);
														
 
															-	}
														
 
															-	sha512_mb_alg_state.flusher = &sha512_mb_flusher;
														
 
															-
														
 
															-	err = crypto_register_ahash(&sha512_mb_areq_alg);
														
 
															-	if (err)
														
 
															-		goto err2;
														
 
															-	err = crypto_register_ahash(&sha512_mb_async_alg);
														
 
															-	if (err)
														
 
															-		goto err1;
														
 
															-
														
 
															-
														
 
															-	return 0;
														
 
															-err1:
														
 
															-	crypto_unregister_ahash(&sha512_mb_areq_alg);
														
 
															-err2:
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
														
 
															-		kfree(cpu_state->mgr);
														
 
															-	}
														
 
															-	free_percpu(sha512_mb_alg_state.alg_cstate);
														
 
															-	return -ENODEV;
														
 
															-}
														
 
															-
														
 
															-static void __exit sha512_mb_mod_fini(void)
														
 
															-{
														
 
															-	int cpu;
														
 
															-	struct mcryptd_alg_cstate *cpu_state;
														
 
															-
														
 
															-	crypto_unregister_ahash(&sha512_mb_async_alg);
														
 
															-	crypto_unregister_ahash(&sha512_mb_areq_alg);
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
														
 
															-		kfree(cpu_state->mgr);
														
 
															-	}
														
 
															-	free_percpu(sha512_mb_alg_state.alg_cstate);
														
 
															-}
														
 
															-
														
 
															-module_init(sha512_mb_mod_init);
														
 
															-module_exit(sha512_mb_mod_fini);
														
 
															-
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
														
 
															-
														
 
															-MODULE_ALIAS("sha512");
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -1,128 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA512 context
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#ifndef _SHA_MB_CTX_INTERNAL_H
														
 
															-#define _SHA_MB_CTX_INTERNAL_H
														
 
															-
														
 
															-#include "sha512_mb_mgr.h"
														
 
															-
														
 
															-#define HASH_UPDATE          0x00
														
 
															-#define HASH_LAST            0x01
														
 
															-#define HASH_DONE            0x02
														
 
															-#define HASH_FINAL           0x04
														
 
															-
														
 
															-#define HASH_CTX_STS_IDLE       0x00
														
 
															-#define HASH_CTX_STS_PROCESSING 0x01
														
 
															-#define HASH_CTX_STS_LAST       0x02
														
 
															-#define HASH_CTX_STS_COMPLETE   0x04
														
 
															-
														
 
															-enum hash_ctx_error {
														
 
															-	HASH_CTX_ERROR_NONE               =  0,
														
 
															-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
														
 
															-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
														
 
															-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
														
 
															-};
														
 
															-
														
 
															-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
														
 
															-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
														
 
															-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
														
 
															-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
														
 
															-#define hash_ctx_status(ctx)     ((ctx)->status)
														
 
															-#define hash_ctx_error(ctx)      ((ctx)->error)
														
 
															-#define hash_ctx_init(ctx) \
														
 
															-	do { \
														
 
															-		(ctx)->error = HASH_CTX_ERROR_NONE; \
														
 
															-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
														
 
															-	} while (0)
														
 
															-
														
 
															-/* Hash Constants and Typedefs */
														
 
															-#define SHA512_DIGEST_LENGTH          8
														
 
															-#define SHA512_LOG2_BLOCK_SIZE        7
														
 
															-
														
 
															-#define SHA512_PADLENGTHFIELD_SIZE    16
														
 
															-
														
 
															-#ifdef SHA_MB_DEBUG
														
 
															-#define assert(expr) \
														
 
															-do { \
														
 
															-	if (unlikely(!(expr))) { \
														
 
															-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
														
 
															-		#expr, __FILE__, __func__, __LINE__); \
														
 
															-	} \
														
 
															-} while (0)
														
 
															-#else
														
 
															-#define assert(expr) do {} while (0)
														
 
															-#endif
														
 
															-
														
 
															-struct sha512_ctx_mgr {
														
 
															-	struct sha512_mb_mgr mgr;
														
 
															-};
														
 
															-
														
 
															-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
														
 
															-
														
 
															-struct sha512_hash_ctx {
														
 
															-	/* Must be at struct offset 0 */
														
 
															-	struct job_sha512       job;
														
 
															-	/* status flag */
														
 
															-	int status;
														
 
															-	/* error flag */
														
 
															-	int error;
														
 
															-
														
 
															-	uint64_t        total_length;
														
 
															-	const void      *incoming_buffer;
														
 
															-	uint32_t        incoming_buffer_length;
														
 
															-	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
														
 
															-	uint32_t        partial_block_buffer_length;
														
 
															-	void            *user_data;
														
 
															-};
														
 
															-
														
 
															-#endif
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
@@ -1,104 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA512 algorithm manager
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#ifndef __SHA_MB_MGR_H
														
 
															-#define __SHA_MB_MGR_H
														
 
															-
														
 
															-#include <linux/types.h>
														
 
															-
														
 
															-#define NUM_SHA512_DIGEST_WORDS 8
														
 
															-
														
 
															-enum job_sts {STS_UNKNOWN = 0,
														
 
															-	STS_BEING_PROCESSED = 1,
														
 
															-	STS_COMPLETED =       2,
														
 
															-	STS_INTERNAL_ERROR = 3,
														
 
															-	STS_ERROR = 4
														
 
															-};
														
 
															-
														
 
															-struct job_sha512 {
														
 
															-	u8  *buffer;
														
 
															-	u64  len;
														
 
															-	u64  result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
														
 
															-	enum job_sts status;
														
 
															-	void   *user_data;
														
 
															-};
														
 
															-
														
 
															-struct sha512_args_x4 {
														
 
															-	uint64_t        digest[8][4];
														
 
															-	uint8_t         *data_ptr[4];
														
 
															-};
														
 
															-
														
 
															-struct sha512_lane_data {
														
 
															-	struct job_sha512 *job_in_lane;
														
 
															-};
														
 
															-
														
 
															-struct sha512_mb_mgr {
														
 
															-	struct sha512_args_x4 args;
														
 
															-
														
 
															-	uint64_t lens[4];
														
 
															-
														
 
															-	/* each byte is index (0...7) of unused lanes */
														
 
															-	uint64_t unused_lanes;
														
 
															-	/* byte 4 is set to FF as a flag */
														
 
															-	struct sha512_lane_data ldata[4];
														
 
															-};
														
 
															-
														
 
															-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
														
 
															-
														
 
															-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
														
 
															-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
														
 
															-						struct job_sha512 *job);
														
 
															-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
														
 
															-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
														
 
															-
														
 
															-#endif
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
@@ -1,281 +0,0 @@
 
															-/*
														
 
															- * Header file for multi buffer SHA256 algorithm data structure
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  This program is free software; you can redistribute it and/or modify
														
 
															- *  it under the terms of version 2 of the GNU General Public License as
														
 
															- *  published by the Free Software Foundation.
														
 
															- *
														
 
															- *  This program is distributed in the hope that it will be useful, but
														
 
															- *  WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- *  General Public License for more details.
														
 
															- *
														
 
															- *  Contact Information:
														
 
															- *      Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- *  BSD LICENSE
														
 
															- *
														
 
															- *  Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- *  Redistribution and use in source and binary forms, with or without
														
 
															- *  modification, are permitted provided that the following conditions
														
 
															- *  are met:
														
 
															- *
														
 
															- *    * Redistributions of source code must retain the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer.
														
 
															- *    * Redistributions in binary form must reproduce the above copyright
														
 
															- *      notice, this list of conditions and the following disclaimer in
														
 
															- *      the documentation and/or other materials provided with the
														
 
															- *      distribution.
														
 
															- *    * Neither the name of Intel Corporation nor the names of its
														
 
															- *      contributors may be used to endorse or promote products derived
														
 
															- *      from this software without specific prior written permission.
														
 
															- *
														
 
															- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-# Macros for defining data structures
														
 
															-
														
 
															-# Usage example
														
 
															-
														
 
															-#START_FIELDS   # JOB_AES
														
 
															-###     name            size    align
														
 
															-#FIELD  _plaintext,     8,      8       # pointer to plaintext
														
 
															-#FIELD  _ciphertext,    8,      8       # pointer to ciphertext
														
 
															-#FIELD  _IV,            16,     8       # IV
														
 
															-#FIELD  _keys,          8,      8       # pointer to keys
														
 
															-#FIELD  _len,           4,      4       # length in bytes
														
 
															-#FIELD  _status,        4,      4       # status enumeration
														
 
															-#FIELD  _user_data,     8,      8       # pointer to user data
														
 
															-#UNION  _union,         size1,  align1, \
														
 
															-#                       size2,  align2, \
														
 
															-#                       size3,  align3, \
														
 
															-#                       ...
														
 
															-#END_FIELDS
														
 
															-#%assign _JOB_AES_size  _FIELD_OFFSET
														
 
															-#%assign _JOB_AES_align _STRUCT_ALIGN
														
 
															-
														
 
															-#########################################################################
														
 
															-
														
 
															-# Alternate "struc-like" syntax:
														
 
															-#       STRUCT job_aes2
														
 
															-#       RES_Q   .plaintext,     1
														
 
															-#       RES_Q   .ciphertext,    1
														
 
															-#       RES_DQ  .IV,            1
														
 
															-#       RES_B   .nested,        _JOB_AES_SIZE, _JOB_AES_ALIGN
														
 
															-#       RES_U   .union,         size1, align1, \
														
 
															-#                               size2, align2, \
														
 
															-#                               ...
														
 
															-#       ENDSTRUCT
														
 
															-#       # Following only needed if nesting
														
 
															-#       %assign job_aes2_size   _FIELD_OFFSET
														
 
															-#       %assign job_aes2_align  _STRUCT_ALIGN
														
 
															-#
														
 
															-# RES_* macros take a name, a count and an optional alignment.
														
 
															-# The count in in terms of the base size of the macro, and the
														
 
															-# default alignment is the base size.
														
 
															-# The macros are:
														
 
															-# Macro    Base size
														
 
															-# RES_B     1
														
 
															-# RES_W     2
														
 
															-# RES_D     4
														
 
															-# RES_Q     8
														
 
															-# RES_DQ   16
														
 
															-# RES_Y    32
														
 
															-# RES_Z    64
														
 
															-#
														
 
															-# RES_U defines a union. It's arguments are a name and two or more
														
 
															-# pairs of "size, alignment"
														
 
															-#
														
 
															-# The two assigns are only needed if this structure is being nested
														
 
															-# within another. Even if the assigns are not done, one can still use
														
 
															-# STRUCT_NAME_size as the size of the structure.
														
 
															-#
														
 
															-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
														
 
															-#
														
 
															-# The differences between this and using "struc" directly are that each
														
 
															-# type is implicitly aligned to its natural length (although this can be
														
 
															-# over-ridden with an explicit third parameter), and that the structure
														
 
															-# is padded at the end to its overall alignment.
														
 
															-#
														
 
															-
														
 
															-#########################################################################
														
 
															-
														
 
															-#ifndef _DATASTRUCT_ASM_
														
 
															-#define _DATASTRUCT_ASM_
														
 
															-
														
 
															-#define PTR_SZ                  8
														
 
															-#define SHA512_DIGEST_WORD_SIZE 8
														
 
															-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
														
 
															-#define NUM_SHA512_DIGEST_WORDS 8
														
 
															-#define SZ4                     4*SHA512_DIGEST_WORD_SIZE
														
 
															-#define ROUNDS                  80*SZ4
														
 
															-#define SHA512_DIGEST_ROW_SIZE  (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
														
 
															-
														
 
															-# START_FIELDS
														
 
															-.macro START_FIELDS
														
 
															- _FIELD_OFFSET = 0
														
 
															- _STRUCT_ALIGN = 0
														
 
															-.endm
														
 
															-
														
 
															-# FIELD name size align
														
 
															-.macro FIELD name size align
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
														
 
															- \name  = _FIELD_OFFSET
														
 
															- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
														
 
															-.if (\align > _STRUCT_ALIGN)
														
 
															- _STRUCT_ALIGN = \align
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-# END_FIELDS
														
 
															-.macro END_FIELDS
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
														
 
															-.endm
														
 
															-
														
 
															-.macro STRUCT p1
														
 
															-START_FIELDS
														
 
															-.struc \p1
														
 
															-.endm
														
 
															-
														
 
															-.macro ENDSTRUCT
														
 
															- tmp = _FIELD_OFFSET
														
 
															- END_FIELDS
														
 
															- tmp = (_FIELD_OFFSET - ##tmp)
														
 
															-.if (tmp > 0)
														
 
															-        .lcomm  tmp
														
 
															-.endm
														
 
															-
														
 
															-## RES_int name size align
														
 
															-.macro RES_int p1 p2 p3
														
 
															- name = \p1
														
 
															- size = \p2
														
 
															- align = .\p3
														
 
															-
														
 
															- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
														
 
															-.align align
														
 
															-.lcomm name size
														
 
															- _FIELD_OFFSET = _FIELD_OFFSET + (size)
														
 
															-.if (align > _STRUCT_ALIGN)
														
 
															- _STRUCT_ALIGN = align
														
 
															-.endif
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_B name, size [, align]
														
 
															-.macro RES_B _name, _size, _align=1
														
 
															-RES_int _name _size _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_W name, size [, align]
														
 
															-.macro RES_W _name, _size, _align=2
														
 
															-RES_int _name 2*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_D name, size [, align]
														
 
															-.macro RES_D _name, _size, _align=4
														
 
															-RES_int _name 4*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Q name, size [, align]
														
 
															-.macro RES_Q _name, _size, _align=8
														
 
															-RES_int _name 8*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_DQ name, size [, align]
														
 
															-.macro RES_DQ _name, _size, _align=16
														
 
															-RES_int _name 16*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Y name, size [, align]
														
 
															-.macro RES_Y _name, _size, _align=32
														
 
															-RES_int _name 32*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-# macro RES_Z name, size [, align]
														
 
															-.macro RES_Z _name, _size, _align=64
														
 
															-RES_int _name 64*(_size) _align
														
 
															-.endm
														
 
															-
														
 
															-#endif
														
 
															-
														
 
															-###################################################################
														
 
															-### Define SHA512 Out Of Order Data Structures
														
 
															-###################################################################
														
 
															-
														
 
															-START_FIELDS    # LANE_DATA
														
 
															-###     name            size    align
														
 
															-FIELD   _job_in_lane,   8,      8       # pointer to job object
														
 
															-END_FIELDS
														
 
															-
														
 
															- _LANE_DATA_size = _FIELD_OFFSET
														
 
															- _LANE_DATA_align = _STRUCT_ALIGN
														
 
															-
														
 
															-####################################################################
														
 
															-
														
 
															-START_FIELDS    # SHA512_ARGS_X4
														
 
															-###     name            size    align
														
 
															-FIELD   _digest,        8*8*4,  4      # transposed digest
														
 
															-FIELD   _data_ptr,      8*4,    8       # array of pointers to data
														
 
															-END_FIELDS
														
 
															-
														
 
															- _SHA512_ARGS_X4_size  =  _FIELD_OFFSET
														
 
															- _SHA512_ARGS_X4_align =  _STRUCT_ALIGN
														
 
															-
														
 
															-#####################################################################
														
 
															-
														
 
															-START_FIELDS    # MB_MGR
														
 
															-###     name            size    align
														
 
															-FIELD   _args,          _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
														
 
															-FIELD   _lens,          8*4,    8
														
 
															-FIELD   _unused_lanes,  8,      8
														
 
															-FIELD   _ldata,         _LANE_DATA_size*4, _LANE_DATA_align
														
 
															-END_FIELDS
														
 
															-
														
 
															- _MB_MGR_size  =  _FIELD_OFFSET
														
 
															- _MB_MGR_align =  _STRUCT_ALIGN
														
 
															-
														
 
															-_args_digest = _args + _digest
														
 
															-_args_data_ptr = _args + _data_ptr
														
 
															-
														
 
															-#######################################################################
														
 
															-
														
 
															-#######################################################################
														
 
															-#### Define constants
														
 
															-#######################################################################
														
 
															-
														
 
															-#define STS_UNKNOWN             0
														
 
															-#define STS_BEING_PROCESSED     1
														
 
															-#define STS_COMPLETED           2
														
 
															-
														
 
															-#######################################################################
														
 
															-#### Define JOB_SHA512 structure
														
 
															-#######################################################################
														
 
															-
														
 
															-START_FIELDS    # JOB_SHA512
														
 
															-###     name                            size    align
														
 
															-FIELD   _buffer,                        8,      8       # pointer to buffer
														
 
															-FIELD   _len,                           8,      8       # length in bytes
														
 
															-FIELD   _result_digest,                 8*8,    32      # Digest (output)
														
 
															-FIELD   _status,                        4,      4
														
 
															-FIELD   _user_data,                     8,      8
														
 
															-END_FIELDS
														
 
															-
														
 
															- _JOB_SHA512_size = _FIELD_OFFSET
														
 
															- _JOB_SHA512_align = _STRUCT_ALIGN
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
@@ -1,297 +0,0 @@
 
															-/*
														
 
															- * Flush routine for SHA512 multibuffer
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of version 2 of the GNU General Public License as
														
 
															- * published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License for more details.
														
 
															- *
														
 
															- * Contact Information:
														
 
															- *     Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- * BSD LICENSE
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * Redistribution and use in source and binary forms, with or without
														
 
															- * modification, are permitted provided that the following conditions
														
 
															- * are met:
														
 
															- *
														
 
															- *   * Redistributions of source code must retain the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer.
														
 
															- *   * Redistributions in binary form must reproduce the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer in
														
 
															- *     the documentation and/or other materials provided with the
														
 
															- *     distribution.
														
 
															- *   * Neither the name of Intel Corporation nor the names of its
														
 
															- *     contributors may be used to endorse or promote products derived
														
 
															- *     from this software without specific prior written permission.
														
 
															- *
														
 
															- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/frame.h>
														
 
															-#include "sha512_mb_mgr_datastruct.S"
														
 
															-
														
 
															-.extern sha512_x4_avx2
														
 
															-
														
 
															-# LINUX register definitions
														
 
															-#define arg1    %rdi
														
 
															-#define arg2    %rsi
														
 
															-
														
 
															-# idx needs to be other than arg1, arg2, rbx, r12
														
 
															-#define idx     %rdx
														
 
															-
														
 
															-# Common definitions
														
 
															-#define state   arg1
														
 
															-#define job     arg2
														
 
															-#define len2    arg2
														
 
															-
														
 
															-#define unused_lanes    %rbx
														
 
															-#define lane_data       %rbx
														
 
															-#define tmp2            %rbx
														
 
															-
														
 
															-#define job_rax         %rax
														
 
															-#define tmp1            %rax
														
 
															-#define size_offset     %rax
														
 
															-#define tmp             %rax
														
 
															-#define start_offset    %rax
														
 
															-
														
 
															-#define tmp3            arg1
														
 
															-
														
 
															-#define extra_blocks    arg2
														
 
															-#define p               arg2
														
 
															-
														
 
															-#define tmp4            %r8
														
 
															-#define lens0           %r8
														
 
															-
														
 
															-#define lens1           %r9
														
 
															-#define lens2           %r10
														
 
															-#define lens3           %r11
														
 
															-
														
 
															-.macro LABEL prefix n
														
 
															-\prefix\n\():
														
 
															-.endm
														
 
															-
														
 
															-.macro JNE_SKIP i
														
 
															-jne     skip_\i
														
 
															-.endm
														
 
															-
														
 
															-.altmacro
														
 
															-.macro SET_OFFSET _offset
														
 
															-offset = \_offset
														
 
															-.endm
														
 
															-.noaltmacro
														
 
															-
														
 
															-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
														
 
															-# arg 1 : rcx : state
														
 
															-ENTRY(sha512_mb_mgr_flush_avx2)
														
 
															-	FRAME_BEGIN
														
 
															-	push	%rbx
														
 
															-
														
 
															-	# If bit (32+3) is set, then all lanes are empty
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-        bt      $32+7, unused_lanes
														
 
															-        jc      return_null
														
 
															-
														
 
															-        # find a lane with a non-null job
														
 
															-	xor     idx, idx
														
 
															-        offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
														
 
															-        cmpq    $0, offset(state)
														
 
															-        cmovne  one(%rip), idx
														
 
															-        offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
														
 
															-        cmpq    $0, offset(state)
														
 
															-        cmovne  two(%rip), idx
														
 
															-        offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
														
 
															-        cmpq    $0, offset(state)
														
 
															-        cmovne  three(%rip), idx
														
 
															-
														
 
															-        # copy idx to empty lanes
														
 
															-copy_lane_data:
														
 
															-	offset =  (_args + _data_ptr)
														
 
															-        mov     offset(state,idx,8), tmp
														
 
															-
														
 
															-        I = 0
														
 
															-.rep 4
														
 
															-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
														
 
															-        cmpq    $0, offset(state)
														
 
															-.altmacro
														
 
															-        JNE_SKIP %I
														
 
															-        offset =  (_args + _data_ptr + 8*I)
														
 
															-        mov     tmp, offset(state)
														
 
															-        offset =  (_lens + 8*I +4)
														
 
															-        movl    $0xFFFFFFFF, offset(state)
														
 
															-LABEL skip_ %I
														
 
															-        I = (I+1)
														
 
															-.noaltmacro
														
 
															-.endr
														
 
															-
														
 
															-        # Find min length
														
 
															-        mov     _lens + 0*8(state),lens0
														
 
															-        mov     lens0,idx
														
 
															-        mov     _lens + 1*8(state),lens1
														
 
															-        cmp     idx,lens1
														
 
															-        cmovb   lens1,idx
														
 
															-        mov     _lens + 2*8(state),lens2
														
 
															-        cmp     idx,lens2
														
 
															-        cmovb   lens2,idx
														
 
															-        mov     _lens + 3*8(state),lens3
														
 
															-        cmp     idx,lens3
														
 
															-        cmovb   lens3,idx
														
 
															-        mov     idx,len2
														
 
															-        and     $0xF,idx
														
 
															-        and     $~0xFF,len2
														
 
															-	jz      len_is_0
														
 
															-
														
 
															-        sub     len2, lens0
														
 
															-        sub     len2, lens1
														
 
															-        sub     len2, lens2
														
 
															-        sub     len2, lens3
														
 
															-        shr     $32,len2
														
 
															-        mov     lens0, _lens + 0*8(state)
														
 
															-        mov     lens1, _lens + 1*8(state)
														
 
															-        mov     lens2, _lens + 2*8(state)
														
 
															-        mov     lens3, _lens + 3*8(state)
														
 
															-
														
 
															-        # "state" and "args" are the same address, arg1
														
 
															-        # len is arg2
														
 
															-        call    sha512_x4_avx2
														
 
															-        # state and idx are intact
														
 
															-
														
 
															-len_is_0:
														
 
															-        # process completed job "idx"
														
 
															-	imul    $_LANE_DATA_size, idx, lane_data
														
 
															-        lea     _ldata(state, lane_data), lane_data
														
 
															-
														
 
															-        mov     _job_in_lane(lane_data), job_rax
														
 
															-        movq    $0,  _job_in_lane(lane_data)
														
 
															-        movl    $STS_COMPLETED, _status(job_rax)
														
 
															-        mov     _unused_lanes(state), unused_lanes
														
 
															-        shl     $8, unused_lanes
														
 
															-        or      idx, unused_lanes
														
 
															-        mov     unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
														
 
															-
														
 
															-	vmovq _args_digest+0*32(state, idx, 8), %xmm0
														
 
															-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
														
 
															-	vmovq _args_digest+2*32(state, idx, 8), %xmm1
														
 
															-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
														
 
															-	vmovq _args_digest+4*32(state, idx, 8), %xmm2
														
 
															-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
														
 
															-	vmovq _args_digest+6*32(state, idx, 8), %xmm3
														
 
															-	vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
														
 
															-
														
 
															-	vmovdqu %xmm0, _result_digest(job_rax)
														
 
															-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
														
 
															-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
														
 
															-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
														
 
															-
														
 
															-return:
														
 
															-	pop	%rbx
														
 
															-	FRAME_END
														
 
															-        ret
														
 
															-
														
 
															-return_null:
														
 
															-        xor     job_rax, job_rax
														
 
															-        jmp     return
														
 
															-ENDPROC(sha512_mb_mgr_flush_avx2)
														
 
															-.align 16
														
 
															-
														
 
															-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
														
 
															-        push    %rbx
														
 
															-
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-        bt      $(32+7), unused_lanes
														
 
															-        jc      .return_null
														
 
															-
														
 
															-        # Find min length
														
 
															-        mov     _lens(state),lens0
														
 
															-        mov     lens0,idx
														
 
															-        mov     _lens+1*8(state),lens1
														
 
															-        cmp     idx,lens1
														
 
															-        cmovb   lens1,idx
														
 
															-        mov     _lens+2*8(state),lens2
														
 
															-        cmp     idx,lens2
														
 
															-        cmovb   lens2,idx
														
 
															-        mov     _lens+3*8(state),lens3
														
 
															-        cmp     idx,lens3
														
 
															-        cmovb   lens3,idx
														
 
															-        test    $~0xF,idx
														
 
															-        jnz     .return_null
														
 
															-        and     $0xF,idx
														
 
															-
														
 
															-        #process completed job "idx"
														
 
															-	imul    $_LANE_DATA_size, idx, lane_data
														
 
															-        lea     _ldata(state, lane_data), lane_data
														
 
															-
														
 
															-        mov     _job_in_lane(lane_data), job_rax
														
 
															-        movq    $0,  _job_in_lane(lane_data)
														
 
															-        movl    $STS_COMPLETED, _status(job_rax)
														
 
															-        mov     _unused_lanes(state), unused_lanes
														
 
															-        shl     $8, unused_lanes
														
 
															-        or      idx, unused_lanes
														
 
															-        mov     unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-        movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
														
 
															-
														
 
															-	vmovq   _args_digest(state, idx, 8), %xmm0
														
 
															-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
														
 
															-	vmovq    _args_digest+2*32(state, idx, 8), %xmm1
														
 
															-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
														
 
															-	vmovq    _args_digest+4*32(state, idx, 8), %xmm2
														
 
															-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
														
 
															-        vmovq    _args_digest+6*32(state, idx, 8), %xmm3
														
 
															-        vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
														
 
															-
														
 
															-	vmovdqu %xmm0, _result_digest+0*16(job_rax)
														
 
															-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
														
 
															-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
														
 
															-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
														
 
															-
														
 
															-	pop     %rbx
														
 
															-
														
 
															-        ret
														
 
															-
														
 
															-.return_null:
														
 
															-        xor     job_rax, job_rax
														
 
															-	pop     %rbx
														
 
															-        ret
														
 
															-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
														
 
															-
														
 
															-.section	.rodata.cst8.one, "aM", @progbits, 8
														
 
															-.align 8
														
 
															-one:
														
 
															-.quad  1
														
 
															-
														
 
															-.section	.rodata.cst8.two, "aM", @progbits, 8
														
 
															-.align 8
														
 
															-two:
														
 
															-.quad  2
														
 
															-
														
 
															-.section	.rodata.cst8.three, "aM", @progbits, 8
														
 
															-.align 8
														
 
															-three:
														
 
															-.quad  3
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -1,69 +0,0 @@
 
															-/*
														
 
															- * Initialization code for multi buffer SHA256 algorithm for AVX2
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of version 2 of the GNU General Public License as
														
 
															- * published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License for more details.
														
 
															- *
														
 
															- * Contact Information:
														
 
															- *     Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- * BSD LICENSE
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * Redistribution and use in source and binary forms, with or without
														
 
															- * modification, are permitted provided that the following conditions
														
 
															- * are met:
														
 
															- *
														
 
															- *   * Redistributions of source code must retain the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer.
														
 
															- *   * Redistributions in binary form must reproduce the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer in
														
 
															- *     the documentation and/or other materials provided with the
														
 
															- *     distribution.
														
 
															- *   * Neither the name of Intel Corporation nor the names of its
														
 
															- *     contributors may be used to endorse or promote products derived
														
 
															- *     from this software without specific prior written permission.
														
 
															- *
														
 
															- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include "sha512_mb_mgr.h"
														
 
															-
														
 
															-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
														
 
															-{
														
 
															-	unsigned int j;
														
 
															-
														
 
															-	/* initially all lanes are unused */
														
 
															-	state->lens[0] = 0xFFFFFFFF00000000;
														
 
															-	state->lens[1] = 0xFFFFFFFF00000001;
														
 
															-	state->lens[2] = 0xFFFFFFFF00000002;
														
 
															-	state->lens[3] = 0xFFFFFFFF00000003;
														
 
															-
														
 
															-	state->unused_lanes = 0xFF03020100;
														
 
															-	for (j = 0; j < 4; j++)
														
 
															-		state->ldata[j].job_in_lane = NULL;
														
 
															-}
														
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
@@ -1,224 +0,0 @@
 
															-/*
														
 
															- * Buffer submit code for multi buffer SHA512 algorithm
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of version 2 of the GNU General Public License as
														
 
															- * published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License for more details.
														
 
															- *
														
 
															- * Contact Information:
														
 
															- *     Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- * BSD LICENSE
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * Redistribution and use in source and binary forms, with or without
														
 
															- * modification, are permitted provided that the following conditions
														
 
															- * are met:
														
 
															- *
														
 
															- *   * Redistributions of source code must retain the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer.
														
 
															- *   * Redistributions in binary form must reproduce the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer in
														
 
															- *     the documentation and/or other materials provided with the
														
 
															- *     distribution.
														
 
															- *   * Neither the name of Intel Corporation nor the names of its
														
 
															- *     contributors may be used to endorse or promote products derived
														
 
															- *     from this software without specific prior written permission.
														
 
															- *
														
 
															- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include <asm/frame.h>
														
 
															-#include "sha512_mb_mgr_datastruct.S"
														
 
															-
														
 
															-.extern sha512_x4_avx2
														
 
															-
														
 
															-#define arg1    %rdi
														
 
															-#define arg2    %rsi
														
 
															-
														
 
															-#define idx             %rdx
														
 
															-#define last_len        %rdx
														
 
															-
														
 
															-#define size_offset     %rcx
														
 
															-#define tmp2            %rcx
														
 
															-
														
 
															-# Common definitions
														
 
															-#define state   arg1
														
 
															-#define job     arg2
														
 
															-#define len2    arg2
														
 
															-#define p2      arg2
														
 
															-
														
 
															-#define p               %r11
														
 
															-#define start_offset    %r11
														
 
															-
														
 
															-#define unused_lanes    %rbx
														
 
															-
														
 
															-#define job_rax         %rax
														
 
															-#define len             %rax
														
 
															-
														
 
															-#define lane            %r12
														
 
															-#define tmp3            %r12
														
 
															-#define lens3           %r12
														
 
															-
														
 
															-#define extra_blocks    %r8
														
 
															-#define lens0           %r8
														
 
															-
														
 
															-#define tmp             %r9
														
 
															-#define lens1           %r9
														
 
															-
														
 
															-#define lane_data       %r10
														
 
															-#define lens2           %r10
														
 
															-
														
 
															-#define DWORD_len %eax
														
 
															-
														
 
															-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
														
 
															-# arg 1 : rcx : state
														
 
															-# arg 2 : rdx : job
														
 
															-ENTRY(sha512_mb_mgr_submit_avx2)
														
 
															-	FRAME_BEGIN
														
 
															-	push	%rbx
														
 
															-	push	%r12
														
 
															-
														
 
															-        mov     _unused_lanes(state), unused_lanes
														
 
															-        movzb     %bl,lane
														
 
															-        shr     $8, unused_lanes
														
 
															-        imul    $_LANE_DATA_size, lane,lane_data
														
 
															-        movl    $STS_BEING_PROCESSED, _status(job)
														
 
															-	lea     _ldata(state, lane_data), lane_data
														
 
															-        mov     unused_lanes, _unused_lanes(state)
														
 
															-        movl    _len(job),  DWORD_len
														
 
															-
														
 
															-	mov     job, _job_in_lane(lane_data)
														
 
															-        movl    DWORD_len,_lens+4(state , lane, 8)
														
 
															-
														
 
															-	# Load digest words from result_digest
														
 
															-	vmovdqu	_result_digest+0*16(job), %xmm0
														
 
															-	vmovdqu _result_digest+1*16(job), %xmm1
														
 
															-	vmovdqu	_result_digest+2*16(job), %xmm2
														
 
															-        vmovdqu	_result_digest+3*16(job), %xmm3
														
 
															-
														
 
															-	vmovq    %xmm0, _args_digest(state, lane, 8)
														
 
															-	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
														
 
															-	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
														
 
															-	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
														
 
															-	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
														
 
															-	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
														
 
															-	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
														
 
															-	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
														
 
															-
														
 
															-	mov     _buffer(job), p
														
 
															-	mov     p, _args_data_ptr(state, lane, 8)
														
 
															-
														
 
															-	cmp     $0xFF, unused_lanes
														
 
															-	jne     return_null
														
 
															-
														
 
															-start_loop:
														
 
															-
														
 
															-	# Find min length
														
 
															-	mov     _lens+0*8(state),lens0
														
 
															-	mov     lens0,idx
														
 
															-	mov     _lens+1*8(state),lens1
														
 
															-	cmp     idx,lens1
														
 
															-	cmovb   lens1, idx
														
 
															-	mov     _lens+2*8(state),lens2
														
 
															-	cmp     idx,lens2
														
 
															-	cmovb   lens2,idx
														
 
															-	mov     _lens+3*8(state),lens3
														
 
															-	cmp     idx,lens3
														
 
															-	cmovb   lens3,idx
														
 
															-	mov     idx,len2
														
 
															-	and     $0xF,idx
														
 
															-	and     $~0xFF,len2
														
 
															-	jz      len_is_0
														
 
															-
														
 
															-	sub     len2,lens0
														
 
															-	sub     len2,lens1
														
 
															-	sub     len2,lens2
														
 
															-	sub     len2,lens3
														
 
															-	shr     $32,len2
														
 
															-	mov     lens0, _lens + 0*8(state)
														
 
															-	mov     lens1, _lens + 1*8(state)
														
 
															-	mov     lens2, _lens + 2*8(state)
														
 
															-	mov     lens3, _lens + 3*8(state)
														
 
															-
														
 
															-	# "state" and "args" are the same address, arg1
														
 
															-	# len is arg2
														
 
															-	call    sha512_x4_avx2
														
 
															-	# state and idx are intact
														
 
															-
														
 
															-len_is_0:
														
 
															-
														
 
															-	# process completed job "idx"
														
 
															-	imul    $_LANE_DATA_size, idx, lane_data
														
 
															-	lea     _ldata(state, lane_data), lane_data
														
 
															-
														
 
															-	mov     _job_in_lane(lane_data), job_rax
														
 
															-	mov     _unused_lanes(state), unused_lanes
														
 
															-	movq    $0, _job_in_lane(lane_data)
														
 
															-	movl    $STS_COMPLETED, _status(job_rax)
														
 
															-	shl     $8, unused_lanes
														
 
															-	or      idx, unused_lanes
														
 
															-	mov     unused_lanes, _unused_lanes(state)
														
 
															-
														
 
															-	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
														
 
															-	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
														
 
															-	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
														
 
															-	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
														
 
															-	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
														
 
															-	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
														
 
															-	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
														
 
															-	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
														
 
															-	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
														
 
															-
														
 
															-	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
														
 
															-	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
														
 
															-	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
														
 
															-	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
														
 
															-
														
 
															-return:
														
 
															-	pop	%r12
														
 
															-	pop	%rbx
														
 
															-	FRAME_END
														
 
															-	ret
														
 
															-
														
 
															-return_null:
														
 
															-	xor     job_rax, job_rax
														
 
															-	jmp     return
														
 
															-ENDPROC(sha512_mb_mgr_submit_avx2)
														
 
															-
														
 
															-/* UNUSED?
														
 
															-.section	.rodata.cst16, "aM", @progbits, 16
														
 
															-.align 16
														
 
															-H0:     .int  0x6a09e667
														
 
															-H1:     .int  0xbb67ae85
														
 
															-H2:     .int  0x3c6ef372
														
 
															-H3:     .int  0xa54ff53a
														
 
															-H4:     .int  0x510e527f
														
 
															-H5:     .int  0x9b05688c
														
 
															-H6:     .int  0x1f83d9ab
														
 
															-H7:     .int  0x5be0cd19
														
 
															-*/
														
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
@@ -1,531 +0,0 @@
 
															-/*
														
 
															- * Multi-buffer SHA512 algorithm hash compute routine
														
 
															- *
														
 
															- * This file is provided under a dual BSD/GPLv2 license.  When using or
														
 
															- * redistributing this file, you may do so under either license.
														
 
															- *
														
 
															- * GPL LICENSE SUMMARY
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of version 2 of the GNU General Public License as
														
 
															- * published by the Free Software Foundation.
														
 
															- *
														
 
															- * This program is distributed in the hope that it will be useful, but
														
 
															- * WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															- * General Public License for more details.
														
 
															- *
														
 
															- * Contact Information:
														
 
															- *     Megha Dey <megha.dey@linux.intel.com>
														
 
															- *
														
 
															- * BSD LICENSE
														
 
															- *
														
 
															- * Copyright(c) 2016 Intel Corporation.
														
 
															- *
														
 
															- * Redistribution and use in source and binary forms, with or without
														
 
															- * modification, are permitted provided that the following conditions
														
 
															- * are met:
														
 
															- *
														
 
															- *   * Redistributions of source code must retain the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer.
														
 
															- *   * Redistributions in binary form must reproduce the above copyright
														
 
															- *     notice, this list of conditions and the following disclaimer in
														
 
															- *     the documentation and/or other materials provided with the
														
 
															- *     distribution.
														
 
															- *   * Neither the name of Intel Corporation nor the names of its
														
 
															- *     contributors may be used to endorse or promote products derived
														
 
															- *     from this software without specific prior written permission.
														
 
															- *
														
 
															- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
														
 
															- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
														
 
															- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
														
 
															- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
														
 
															- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
														
 
															- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
														
 
															- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															- */
														
 
															-
														
 
															-# code to compute quad SHA512 using AVX2
														
 
															-# use YMMs to tackle the larger digest size
														
 
															-# outer calling routine takes care of save and restore of XMM registers
														
 
															-# Logic designed/laid out by JDG
														
 
															-
														
 
															-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
														
 
															-# Stack must be aligned to 32 bytes before call
														
 
															-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
														
 
															-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
														
 
															-# clobbers ymm0-15
														
 
															-
														
 
															-#include <linux/linkage.h>
														
 
															-#include "sha512_mb_mgr_datastruct.S"
														
 
															-
														
 
															-arg1 = %rdi
														
 
															-arg2 = %rsi
														
 
															-
														
 
															-# Common definitions
														
 
															-STATE = arg1
														
 
															-INP_SIZE = arg2
														
 
															-
														
 
															-IDX = %rax
														
 
															-ROUND = %rbx
														
 
															-TBL = %r8
														
 
															-
														
 
															-inp0 = %r9
														
 
															-inp1 = %r10
														
 
															-inp2 = %r11
														
 
															-inp3 = %r12
														
 
															-
														
 
															-a = %ymm0
														
 
															-b = %ymm1
														
 
															-c = %ymm2
														
 
															-d = %ymm3
														
 
															-e = %ymm4
														
 
															-f = %ymm5
														
 
															-g = %ymm6
														
 
															-h = %ymm7
														
 
															-
														
 
															-a0 = %ymm8
														
 
															-a1 = %ymm9
														
 
															-a2 = %ymm10
														
 
															-
														
 
															-TT0 = %ymm14
														
 
															-TT1 = %ymm13
														
 
															-TT2 = %ymm12
														
 
															-TT3 = %ymm11
														
 
															-TT4 = %ymm10
														
 
															-TT5 = %ymm9
														
 
															-
														
 
															-T1 = %ymm14
														
 
															-TMP = %ymm15
														
 
															-
														
 
															-# Define stack usage
														
 
															-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
														
 
															-
														
 
															-#define VMOVPD	vmovupd
														
 
															-_digest = SZ4*16
														
 
															-
														
 
															-# transpose r0, r1, r2, r3, t0, t1
														
 
															-# "transpose" data in {r0..r3} using temps {t0..t3}
														
 
															-# Input looks like: {r0 r1 r2 r3}
														
 
															-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
														
 
															-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
														
 
															-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
														
 
															-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
														
 
															-#
														
 
															-# output looks like: {t0 r1 r0 r3}
														
 
															-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
														
 
															-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
														
 
															-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
														
 
															-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
														
 
															-
														
 
															-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
														
 
															-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
														
 
															-        vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
														
 
															-        vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
														
 
															-        vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
														
 
															-
														
 
															-	vperm2f128      $0x20, \r2, \r0, \r1  # h6...a6
														
 
															-        vperm2f128      $0x31, \r2, \r0, \r3  # h2...a2
														
 
															-        vperm2f128      $0x31, \t1, \t0, \r0  # h5...a5
														
 
															-        vperm2f128      $0x20, \t1, \t0, \t0  # h1...a1
														
 
															-.endm
														
 
															-
														
 
															-.macro ROTATE_ARGS
														
 
															-TMP_ = h
														
 
															-h = g
														
 
															-g = f
														
 
															-f = e
														
 
															-e = d
														
 
															-d = c
														
 
															-c = b
														
 
															-b = a
														
 
															-a = TMP_
														
 
															-.endm
														
 
															-
														
 
															-# PRORQ reg, imm, tmp
														
 
															-# packed-rotate-right-double
														
 
															-# does a rotate by doing two shifts and an or
														
 
															-.macro _PRORQ reg imm tmp
														
 
															-	vpsllq	$(64-\imm),\reg,\tmp
														
 
															-	vpsrlq	$\imm,\reg, \reg
														
 
															-	vpor	\tmp,\reg, \reg
														
 
															-.endm
														
 
															-
														
 
															-# non-destructive
														
 
															-# PRORQ_nd reg, imm, tmp, src
														
 
															-.macro _PRORQ_nd reg imm tmp src
														
 
															-	vpsllq	$(64-\imm), \src, \tmp
														
 
															-	vpsrlq	$\imm, \src, \reg
														
 
															-	vpor	\tmp, \reg, \reg
														
 
															-.endm
														
 
															-
														
 
															-# PRORQ dst/src, amt
														
 
															-.macro PRORQ reg imm
														
 
															-	_PRORQ	\reg, \imm, TMP
														
 
															-.endm
														
 
															-
														
 
															-# PRORQ_nd dst, src, amt
														
 
															-.macro PRORQ_nd reg tmp imm
														
 
															-	_PRORQ_nd	\reg, \imm, TMP, \tmp
														
 
															-.endm
														
 
															-
														
 
															-#; arguments passed implicitly in preprocessor symbols i, a...h
														
 
															-.macro ROUND_00_15 _T1 i
														
 
															-	PRORQ_nd a0, e, (18-14)	# sig1: a0 = (e >> 4)
														
 
															-
														
 
															-	vpxor   g, f, a2        # ch: a2 = f^g
														
 
															-        vpand   e,a2, a2                # ch: a2 = (f^g)&e
														
 
															-        vpxor   g, a2, a2               # a2 = ch
														
 
															-
														
 
															-        PRORQ_nd        a1,e,41         # sig1: a1 = (e >> 25)
														
 
															-
														
 
															-        offset = SZ4*(\i & 0xf)
														
 
															-        vmovdqu \_T1,offset(%rsp)
														
 
															-        vpaddq  (TBL,ROUND,1), \_T1, \_T1       # T1 = W + K
														
 
															-        vpxor   e,a0, a0        # sig1: a0 = e ^ (e >> 5)
														
 
															-        PRORQ   a0, 14           # sig1: a0 = (e >> 6) ^ (e >> 11)
														
 
															-        vpaddq  a2, h, h        # h = h + ch
														
 
															-        PRORQ_nd        a2,a,6  # sig0: a2 = (a >> 11)
														
 
															-        vpaddq  \_T1,h, h       # h = h + ch + W + K
														
 
															-        vpxor   a1, a0, a0      # a0 = sigma1
														
 
															-	vmovdqu a,\_T1
														
 
															-        PRORQ_nd        a1,a,39 # sig0: a1 = (a >> 22)
														
 
															-        vpxor   c, \_T1, \_T1      # maj: T1 = a^c
														
 
															-        add     $SZ4, ROUND     # ROUND++
														
 
															-        vpand   b, \_T1, \_T1   # maj: T1 = (a^c)&b
														
 
															-        vpaddq  a0, h, h
														
 
															-        vpaddq  h, d, d
														
 
															-        vpxor   a, a2, a2       # sig0: a2 = a ^ (a >> 11)
														
 
															-        PRORQ   a2,28            # sig0: a2 = (a >> 2) ^ (a >> 13)
														
 
															-        vpxor   a1, a2, a2      # a2 = sig0
														
 
															-        vpand   c, a, a1        # maj: a1 = a&c
														
 
															-        vpor    \_T1, a1, a1    # a1 = maj
														
 
															-        vpaddq  a1, h, h        # h = h + ch + W + K + maj
														
 
															-        vpaddq  a2, h, h        # h = h + ch + W + K + maj + sigma0
														
 
															-        ROTATE_ARGS
														
 
															-.endm
														
 
															-
														
 
															-
														
 
															-#; arguments passed implicitly in preprocessor symbols i, a...h
														
 
															-.macro ROUND_16_XX _T1 i
														
 
															-	vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
														
 
															-        vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
														
 
															-        vmovdqu \_T1, a0
														
 
															-        PRORQ   \_T1,7
														
 
															-        vmovdqu a1, a2
														
 
															-        PRORQ   a1,42
														
 
															-        vpxor   a0, \_T1, \_T1
														
 
															-        PRORQ   \_T1, 1
														
 
															-        vpxor   a2, a1, a1
														
 
															-        PRORQ   a1, 19
														
 
															-        vpsrlq  $7, a0, a0
														
 
															-        vpxor   a0, \_T1, \_T1
														
 
															-        vpsrlq  $6, a2, a2
														
 
															-        vpxor   a2, a1, a1
														
 
															-        vpaddq  SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
														
 
															-        vpaddq  SZ4*((\i-7)&0xf)(%rsp), a1, a1
														
 
															-        vpaddq  a1, \_T1, \_T1
														
 
															-
														
 
															-        ROUND_00_15 \_T1,\i
														
 
															-.endm
														
 
															-
														
 
															-
														
 
															-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
														
 
															-# arg 1 : STATE    : pointer to input data
														
 
															-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
														
 
															-ENTRY(sha512_x4_avx2)
														
 
															-	# general registers preserved in outer calling routine
														
 
															-	# outer calling routine saves all the XMM registers
														
 
															-	# save callee-saved clobbered registers to comply with C function ABI
														
 
															-	push    %r12
														
 
															-	push    %r13
														
 
															-	push    %r14
														
 
															-	push    %r15
														
 
															-
														
 
															-	sub     $STACK_SPACE1, %rsp
														
 
															-
														
 
															-        # Load the pre-transposed incoming digest.
														
 
															-        vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
														
 
															-        vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
														
 
															-        vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
														
 
															-        vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
														
 
															-        vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
														
 
															-        vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
														
 
															-        vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
														
 
															-        vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
														
 
															-
														
 
															-        lea     K512_4(%rip),TBL
														
 
															-
														
 
															-        # load the address of each of the 4 message lanes
														
 
															-        # getting ready to transpose input onto stack
														
 
															-        mov     _data_ptr+0*PTR_SZ(STATE),inp0
														
 
															-        mov     _data_ptr+1*PTR_SZ(STATE),inp1
														
 
															-        mov     _data_ptr+2*PTR_SZ(STATE),inp2
														
 
															-        mov     _data_ptr+3*PTR_SZ(STATE),inp3
														
 
															-
														
 
															-        xor     IDX, IDX
														
 
															-lloop:
														
 
															-        xor     ROUND, ROUND
														
 
															-
														
 
															-	# save old digest
														
 
															-        vmovdqu a, _digest(%rsp)
														
 
															-        vmovdqu b, _digest+1*SZ4(%rsp)
														
 
															-        vmovdqu c, _digest+2*SZ4(%rsp)
														
 
															-        vmovdqu d, _digest+3*SZ4(%rsp)
														
 
															-        vmovdqu e, _digest+4*SZ4(%rsp)
														
 
															-        vmovdqu f, _digest+5*SZ4(%rsp)
														
 
															-        vmovdqu g, _digest+6*SZ4(%rsp)
														
 
															-        vmovdqu h, _digest+7*SZ4(%rsp)
														
 
															-        i = 0
														
 
															-.rep 4
														
 
															-	vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
														
 
															-        VMOVPD  i*32(inp0, IDX), TT2
														
 
															-        VMOVPD  i*32(inp1, IDX), TT1
														
 
															-        VMOVPD  i*32(inp2, IDX), TT4
														
 
															-        VMOVPD  i*32(inp3, IDX), TT3
														
 
															-	TRANSPOSE	TT2, TT1, TT4, TT3, TT0, TT5
														
 
															-	vpshufb	TMP, TT0, TT0
														
 
															-	vpshufb	TMP, TT1, TT1
														
 
															-	vpshufb	TMP, TT2, TT2
														
 
															-	vpshufb	TMP, TT3, TT3
														
 
															-	ROUND_00_15	TT0,(i*4+0)
														
 
															-	ROUND_00_15	TT1,(i*4+1)
														
 
															-	ROUND_00_15	TT2,(i*4+2)
														
 
															-	ROUND_00_15	TT3,(i*4+3)
														
 
															-	i = (i+1)
														
 
															-.endr
														
 
															-        add     $128, IDX
														
 
															-
														
 
															-        i = (i*4)
														
 
															-
														
 
															-        jmp     Lrounds_16_xx
														
 
															-.align 16
														
 
															-Lrounds_16_xx:
														
 
															-.rep 16
														
 
															-        ROUND_16_XX     T1, i
														
 
															-        i = (i+1)
														
 
															-.endr
														
 
															-        cmp     $0xa00,ROUND
														
 
															-        jb      Lrounds_16_xx
														
 
															-
														
 
															-	# add old digest
														
 
															-        vpaddq  _digest(%rsp), a, a
														
 
															-        vpaddq  _digest+1*SZ4(%rsp), b, b
														
 
															-        vpaddq  _digest+2*SZ4(%rsp), c, c
														
 
															-        vpaddq  _digest+3*SZ4(%rsp), d, d
														
 
															-        vpaddq  _digest+4*SZ4(%rsp), e, e
														
 
															-        vpaddq  _digest+5*SZ4(%rsp), f, f
														
 
															-        vpaddq  _digest+6*SZ4(%rsp), g, g
														
 
															-        vpaddq  _digest+7*SZ4(%rsp), h, h
														
 
															-
														
 
															-        sub     $1, INP_SIZE  # unit is blocks
														
 
															-        jne     lloop
														
 
															-
														
 
															-        # write back to memory (state object) the transposed digest
														
 
															-        vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-        vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
														
 
															-
														
 
															-	# update input data pointers
														
 
															-	add     IDX, inp0
														
 
															-        mov     inp0, _data_ptr+0*PTR_SZ(STATE)
														
 
															-        add     IDX, inp1
														
 
															-        mov     inp1, _data_ptr+1*PTR_SZ(STATE)
														
 
															-        add     IDX, inp2
														
 
															-        mov     inp2, _data_ptr+2*PTR_SZ(STATE)
														
 
															-        add     IDX, inp3
														
 
															-        mov     inp3, _data_ptr+3*PTR_SZ(STATE)
														
 
															-
														
 
															-	#;;;;;;;;;;;;;;;
														
 
															-	#; Postamble
														
 
															-	add $STACK_SPACE1, %rsp
														
 
															-	# restore callee-saved clobbered registers
														
 
															-
														
 
															-	pop     %r15
														
 
															-	pop     %r14
														
 
															-	pop     %r13
														
 
															-	pop     %r12
														
 
															-
														
 
															-	# outer calling routine restores XMM and other GP registers
														
 
															-	ret
														
 
															-ENDPROC(sha512_x4_avx2)
														
 
															-
														
 
															-.section	.rodata.K512_4, "a", @progbits
														
 
															-.align 64
														
 
															-K512_4:
														
 
															-	.octa 0x428a2f98d728ae22428a2f98d728ae22,\
														
 
															-		0x428a2f98d728ae22428a2f98d728ae22
														
 
															-	.octa 0x7137449123ef65cd7137449123ef65cd,\
														
 
															-		0x7137449123ef65cd7137449123ef65cd
														
 
															-	.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
														
 
															-		0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
														
 
															-	.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
														
 
															-		0xe9b5dba58189dbbce9b5dba58189dbbc
														
 
															-	.octa 0x3956c25bf348b5383956c25bf348b538,\
														
 
															-		0x3956c25bf348b5383956c25bf348b538
														
 
															-	.octa 0x59f111f1b605d01959f111f1b605d019,\
														
 
															-		0x59f111f1b605d01959f111f1b605d019
														
 
															-	.octa 0x923f82a4af194f9b923f82a4af194f9b,\
														
 
															-		0x923f82a4af194f9b923f82a4af194f9b
														
 
															-	.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
														
 
															-		0xab1c5ed5da6d8118ab1c5ed5da6d8118
														
 
															-	.octa 0xd807aa98a3030242d807aa98a3030242,\
														
 
															-		0xd807aa98a3030242d807aa98a3030242
														
 
															-	.octa 0x12835b0145706fbe12835b0145706fbe,\
														
 
															-		0x12835b0145706fbe12835b0145706fbe
														
 
															-	.octa 0x243185be4ee4b28c243185be4ee4b28c,\
														
 
															-		0x243185be4ee4b28c243185be4ee4b28c
														
 
															-	.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
														
 
															-		0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
														
 
															-	.octa 0x72be5d74f27b896f72be5d74f27b896f,\
														
 
															-		0x72be5d74f27b896f72be5d74f27b896f
														
 
															-	.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
														
 
															-		0x80deb1fe3b1696b180deb1fe3b1696b1
														
 
															-	.octa 0x9bdc06a725c712359bdc06a725c71235,\
														
 
															-		0x9bdc06a725c712359bdc06a725c71235
														
 
															-	.octa 0xc19bf174cf692694c19bf174cf692694,\
														
 
															-		0xc19bf174cf692694c19bf174cf692694
														
 
															-	.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
														
 
															-		0xe49b69c19ef14ad2e49b69c19ef14ad2
														
 
															-	.octa 0xefbe4786384f25e3efbe4786384f25e3,\
														
 
															-		0xefbe4786384f25e3efbe4786384f25e3
														
 
															-	.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
														
 
															-		0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
														
 
															-	.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
														
 
															-		0x240ca1cc77ac9c65240ca1cc77ac9c65
														
 
															-	.octa 0x2de92c6f592b02752de92c6f592b0275,\
														
 
															-		0x2de92c6f592b02752de92c6f592b0275
														
 
															-	.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
														
 
															-		0x4a7484aa6ea6e4834a7484aa6ea6e483
														
 
															-	.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
														
 
															-		0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
														
 
															-	.octa 0x76f988da831153b576f988da831153b5,\
														
 
															-		0x76f988da831153b576f988da831153b5
														
 
															-	.octa 0x983e5152ee66dfab983e5152ee66dfab,\
														
 
															-		0x983e5152ee66dfab983e5152ee66dfab
														
 
															-	.octa 0xa831c66d2db43210a831c66d2db43210,\
														
 
															-		0xa831c66d2db43210a831c66d2db43210
														
 
															-	.octa 0xb00327c898fb213fb00327c898fb213f,\
														
 
															-		0xb00327c898fb213fb00327c898fb213f
														
 
															-	.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
														
 
															-		0xbf597fc7beef0ee4bf597fc7beef0ee4
														
 
															-	.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
														
 
															-		0xc6e00bf33da88fc2c6e00bf33da88fc2
														
 
															-	.octa 0xd5a79147930aa725d5a79147930aa725,\
														
 
															-		0xd5a79147930aa725d5a79147930aa725
														
 
															-	.octa 0x06ca6351e003826f06ca6351e003826f,\
														
 
															-		0x06ca6351e003826f06ca6351e003826f
														
 
															-	.octa 0x142929670a0e6e70142929670a0e6e70,\
														
 
															-		0x142929670a0e6e70142929670a0e6e70
														
 
															-	.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
														
 
															-		0x27b70a8546d22ffc27b70a8546d22ffc
														
 
															-	.octa 0x2e1b21385c26c9262e1b21385c26c926,\
														
 
															-		0x2e1b21385c26c9262e1b21385c26c926
														
 
															-	.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
														
 
															-		0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
														
 
															-	.octa 0x53380d139d95b3df53380d139d95b3df,\
														
 
															-		0x53380d139d95b3df53380d139d95b3df
														
 
															-	.octa 0x650a73548baf63de650a73548baf63de,\
														
 
															-		0x650a73548baf63de650a73548baf63de
														
 
															-	.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
														
 
															-		0x766a0abb3c77b2a8766a0abb3c77b2a8
														
 
															-	.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
														
 
															-		0x81c2c92e47edaee681c2c92e47edaee6
														
 
															-	.octa 0x92722c851482353b92722c851482353b,\
														
 
															-		0x92722c851482353b92722c851482353b
														
 
															-	.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
														
 
															-		0xa2bfe8a14cf10364a2bfe8a14cf10364
														
 
															-	.octa 0xa81a664bbc423001a81a664bbc423001,\
														
 
															-		0xa81a664bbc423001a81a664bbc423001
														
 
															-	.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
														
 
															-		0xc24b8b70d0f89791c24b8b70d0f89791
														
 
															-	.octa 0xc76c51a30654be30c76c51a30654be30,\
														
 
															-		0xc76c51a30654be30c76c51a30654be30
														
 
															-	.octa 0xd192e819d6ef5218d192e819d6ef5218,\
														
 
															-		0xd192e819d6ef5218d192e819d6ef5218
														
 
															-	.octa 0xd69906245565a910d69906245565a910,\
														
 
															-		0xd69906245565a910d69906245565a910
														
 
															-	.octa 0xf40e35855771202af40e35855771202a,\
														
 
															-		0xf40e35855771202af40e35855771202a
														
 
															-	.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
														
 
															-		0x106aa07032bbd1b8106aa07032bbd1b8
														
 
															-	.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
														
 
															-		0x19a4c116b8d2d0c819a4c116b8d2d0c8
														
 
															-	.octa 0x1e376c085141ab531e376c085141ab53,\
														
 
															-		0x1e376c085141ab531e376c085141ab53
														
 
															-	.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
														
 
															-		0x2748774cdf8eeb992748774cdf8eeb99
														
 
															-	.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
														
 
															-		0x34b0bcb5e19b48a834b0bcb5e19b48a8
														
 
															-	.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
														
 
															-		0x391c0cb3c5c95a63391c0cb3c5c95a63
														
 
															-	.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
														
 
															-		0x4ed8aa4ae3418acb4ed8aa4ae3418acb
														
 
															-	.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
														
 
															-		0x5b9cca4f7763e3735b9cca4f7763e373
														
 
															-	.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
														
 
															-		0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
														
 
															-	.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
														
 
															-		0x748f82ee5defb2fc748f82ee5defb2fc
														
 
															-	.octa 0x78a5636f43172f6078a5636f43172f60,\
														
 
															-		0x78a5636f43172f6078a5636f43172f60
														
 
															-	.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
														
 
															-		0x84c87814a1f0ab7284c87814a1f0ab72
														
 
															-	.octa 0x8cc702081a6439ec8cc702081a6439ec,\
														
 
															-		0x8cc702081a6439ec8cc702081a6439ec
														
 
															-	.octa 0x90befffa23631e2890befffa23631e28,\
														
 
															-		0x90befffa23631e2890befffa23631e28
														
 
															-	.octa 0xa4506cebde82bde9a4506cebde82bde9,\
														
 
															-		0xa4506cebde82bde9a4506cebde82bde9
														
 
															-	.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
														
 
															-		0xbef9a3f7b2c67915bef9a3f7b2c67915
														
 
															-	.octa 0xc67178f2e372532bc67178f2e372532b,\
														
 
															-		0xc67178f2e372532bc67178f2e372532b
														
 
															-	.octa 0xca273eceea26619cca273eceea26619c,\
														
 
															-		0xca273eceea26619cca273eceea26619c
														
 
															-	.octa 0xd186b8c721c0c207d186b8c721c0c207,\
														
 
															-		0xd186b8c721c0c207d186b8c721c0c207
														
 
															-	.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
														
 
															-		0xeada7dd6cde0eb1eeada7dd6cde0eb1e
														
 
															-	.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
														
 
															-		0xf57d4f7fee6ed178f57d4f7fee6ed178
														
 
															-	.octa 0x06f067aa72176fba06f067aa72176fba,\
														
 
															-		0x06f067aa72176fba06f067aa72176fba
														
 
															-	.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
														
 
															-		0x0a637dc5a2c898a60a637dc5a2c898a6
														
 
															-	.octa 0x113f9804bef90dae113f9804bef90dae,\
														
 
															-		0x113f9804bef90dae113f9804bef90dae
														
 
															-	.octa 0x1b710b35131c471b1b710b35131c471b,\
														
 
															-		0x1b710b35131c471b1b710b35131c471b
														
 
															-	.octa 0x28db77f523047d8428db77f523047d84,\
														
 
															-		0x28db77f523047d8428db77f523047d84
														
 
															-	.octa 0x32caab7b40c7249332caab7b40c72493,\
														
 
															-		0x32caab7b40c7249332caab7b40c72493
														
 
															-	.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
														
 
															-		0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
														
 
															-	.octa 0x431d67c49c100d4c431d67c49c100d4c,\
														
 
															-		0x431d67c49c100d4c431d67c49c100d4c
														
 
															-	.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
														
 
															-		0x4cc5d4becb3e42b64cc5d4becb3e42b6
														
 
															-	.octa 0x597f299cfc657e2a597f299cfc657e2a,\
														
 
															-		0x597f299cfc657e2a597f299cfc657e2a
														
 
															-	.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
														
 
															-		0x5fcb6fab3ad6faec5fcb6fab3ad6faec
														
 
															-	.octa 0x6c44198c4a4758176c44198c4a475817,\
														
 
															-		0x6c44198c4a4758176c44198c4a475817
														
 
															-
														
 
															-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
														
 
															-.align 32
														
 
															-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
														
 
															-                         .octa 0x18191a1b1c1d1e1f1011121314151617
														
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
 
															 	  converts an arbitrary synchronous software crypto algorithm
														
 
															 	  into an asynchronous algorithm that executes in a kernel thread.
														
 
															-config CRYPTO_MCRYPTD
														
 
															-	tristate "Software async multi-buffer crypto daemon"
														
 
															-	select CRYPTO_BLKCIPHER
														
 
															-	select CRYPTO_HASH
														
 
															-	select CRYPTO_MANAGER
														
 
															-	select CRYPTO_WORKQUEUE
														
 
															-	help
														
 
															-	  This is a generic software asynchronous crypto daemon that
														
 
															-	  provides the kernel thread to assist multi-buffer crypto
														
 
															-	  algorithms for submitting jobs and flushing jobs in multi-buffer
														
 
															-	  crypto algorithms.  Multi-buffer crypto algorithms are executed
														
 
															-	  in the context of this kernel thread and drivers can post
														
 
															-	  their crypto request asynchronously to be processed by this daemon.
														
 
															-
														
 
															 config CRYPTO_AUTHENC
														
 
															 	tristate "Authenc support"
														
 
															 	select CRYPTO_AEAD
														
@@ -470,6 +456,18 @@ config CRYPTO_LRW
 
															 	  The first 128, 192 or 256 bits in the key are used for AES and the
														
 
															 	  rest is used to tie each cipher block to its logical position.
														
 
															+config CRYPTO_OFB
														
 
															+	tristate "OFB support"
														
 
															+	select CRYPTO_BLKCIPHER
														
 
															+	select CRYPTO_MANAGER
														
 
															+	help
														
 
															+	  OFB: the Output Feedback mode makes a block cipher into a synchronous
														
 
															+	  stream cipher. It generates keystream blocks, which are then XORed
														
 
															+	  with the plaintext blocks to get the ciphertext. Flipping a bit in the
														
 
															+	  ciphertext produces a flipped bit in the plaintext at the same
														
 
															+	  location. This property allows many error correcting codes to function
														
 
															+	  normally even when applied before encryption.
														
 
															+
														
 
															 config CRYPTO_PCBC
														
 
															 	tristate "PCBC support"
														
 
															 	select CRYPTO_BLKCIPHER
														
@@ -848,54 +846,6 @@ config CRYPTO_SHA1_PPC_SPE
 
															 	  SHA-1 secure hash standard (DFIPS 180-4) implemented
														
 
															 	  using powerpc SPE SIMD instruction set.
														
 
															-config CRYPTO_SHA1_MB
														
 
															-	tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
														
 
															-	depends on X86 && 64BIT
														
 
															-	select CRYPTO_SHA1
														
 
															-	select CRYPTO_HASH
														
 
															-	select CRYPTO_MCRYPTD
														
 
															-	help
														
 
															-	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
														
 
															-	  using multi-buffer technique.  This algorithm computes on
														
 
															-	  multiple data lanes concurrently with SIMD instructions for
														
 
															-	  better throughput.  It should not be enabled by default but
														
 
															-	  used when there is significant amount of work to keep the keep
														
 
															-	  the data lanes filled to get performance benefit.  If the data
														
 
															-	  lanes remain unfilled, a flush operation will be initiated to
														
 
															-	  process the crypto jobs, adding a slight latency.
														
 
															-
														
 
															-config CRYPTO_SHA256_MB
														
 
															-	tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
														
 
															-	depends on X86 && 64BIT
														
 
															-	select CRYPTO_SHA256
														
 
															-	select CRYPTO_HASH
														
 
															-	select CRYPTO_MCRYPTD
														
 
															-	help
														
 
															-	  SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
														
 
															-	  using multi-buffer technique.  This algorithm computes on
														
 
															-	  multiple data lanes concurrently with SIMD instructions for
														
 
															-	  better throughput.  It should not be enabled by default but
														
 
															-	  used when there is significant amount of work to keep the keep
														
 
															-	  the data lanes filled to get performance benefit.  If the data
														
 
															-	  lanes remain unfilled, a flush operation will be initiated to
														
 
															-	  process the crypto jobs, adding a slight latency.
														
 
															-
														
 
															-config CRYPTO_SHA512_MB
														
 
															-        tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
														
 
															-        depends on X86 && 64BIT
														
 
															-        select CRYPTO_SHA512
														
 
															-        select CRYPTO_HASH
														
 
															-        select CRYPTO_MCRYPTD
														
 
															-        help
														
 
															-          SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
														
 
															-          using multi-buffer technique.  This algorithm computes on
														
 
															-          multiple data lanes concurrently with SIMD instructions for
														
 
															-          better throughput.  It should not be enabled by default but
														
 
															-          used when there is significant amount of work to keep the keep
														
 
															-          the data lanes filled to get performance benefit.  If the data
														
 
															-          lanes remain unfilled, a flush operation will be initiated to
														
 
															-          process the crypto jobs, adding a slight latency.
														
 
															-
														
 
															 config CRYPTO_SHA256
														
 
															 	tristate "SHA224 and SHA256 digest algorithm"
														
 
															 	select CRYPTO_HASH
														
@@ -1133,7 +1083,7 @@ config CRYPTO_AES_NI_INTEL
 
															 	  In addition to AES cipher algorithm support, the acceleration
														
 
															 	  for some popular block cipher mode is supported too, including
														
 
															-	  ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
														
 
															+	  ECB, CBC, LRW, XTS. The 64 bit version has additional
														
 
															 	  acceleration for CTR.
														
 
															 config CRYPTO_AES_SPARC64
														
@@ -1590,20 +1540,6 @@ config CRYPTO_SM4
 
															 	  If unsure, say N.
														
 
															-config CRYPTO_SPECK
														
 
															-	tristate "Speck cipher algorithm"
														
 
															-	select CRYPTO_ALGAPI
														
 
															-	help
														
 
															-	  Speck is a lightweight block cipher that is tuned for optimal
														
 
															-	  performance in software (rather than hardware).
														
 
															-
														
 
															-	  Speck may not be as secure as AES, and should only be used on systems
														
 
															-	  where AES is not fast enough.
														
 
															-
														
 
															-	  See also: <https://eprint.iacr.org/2013/404.pdf>
														
 
															-
														
 
															-	  If unsure, say N.
														
 
															-
														
 
															 config CRYPTO_TEA
														
 
															 	tristate "TEA, XTEA and XETA cipher algorithms"
														
 
															 	select CRYPTO_ALGAPI
														
@@ -1875,6 +1811,17 @@ config CRYPTO_USER_API_AEAD
 
															 	  This option enables the user-spaces interface for AEAD
														
 
															 	  cipher algorithms.
														
 
															+config CRYPTO_STATS
														
 
															+	bool "Crypto usage statistics for User-space"
														
 
															+	help
														
 
															+	  This option enables the gathering of crypto stats.
														
 
															+	  This will collect:
														
 
															+	  - encrypt/decrypt size and numbers of symmeric operations
														
 
															+	  - compress/decompress size and numbers of compress operations
														
 
															+	  - size and numbers of hash operations
														
 
															+	  - encrypt/decrypt/sign/verify numbers for asymmetric operations
														
 
															+	  - generate/seed numbers for rng operations
														
 
															+
														
 
															 config CRYPTO_HASH_INFO
														
 
															 	bool
														
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -54,6 +54,7 @@ cryptomgr-y := algboss.o testmgr.o
 
															 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
														
 
															 obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
														
 
															+crypto_user-y := crypto_user_base.o crypto_user_stat.o
														
 
															 obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
														
 
															 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
														
 
															 obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
														
@@ -93,7 +94,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
 
															 obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
														
 
															 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
														
 
															 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
														
 
															-obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
														
 
															 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
														
 
															 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
														
 
															 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
														
@@ -115,7 +115,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 
															 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
														
 
															 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
														
 
															 obj-$(CONFIG_CRYPTO_SEED) += seed.o
														
 
															-obj-$(CONFIG_CRYPTO_SPECK) += speck.o
														
 
															 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
														
 
															 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
														
 
															 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
														
@@ -143,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 
															 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
														
 
															 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
														
 
															 obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
														
 
															+obj-$(CONFIG_CRYPTO_OFB) += ofb.o
														
 
															 ecdh_generic-y := ecc.o
														
 
															 ecdh_generic-y += ecdh.o
														
--- a/crypto/aegis.h
+++ b/crypto/aegis.h
@@ -21,7 +21,7 @@
 
															 union aegis_block {
														
 
															 	__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
														
 
															-	u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
														
 
															+	__le32 words32[AEGIS_BLOCK_SIZE / sizeof(__le32)];
														
 
															 	u8 bytes[AEGIS_BLOCK_SIZE];
														
 
															 };
														
@@ -57,24 +57,22 @@ static void crypto_aegis_aesenc(union aegis_block *dst,
 
															 				const union aegis_block *src,
														
 
															 				const union aegis_block *key)
														
 
															 {
														
 
															-	u32 *d = dst->words32;
														
 
															 	const u8  *s  = src->bytes;
														
 
															-	const u32 *k  = key->words32;
														
 
															 	const u32 *t0 = crypto_ft_tab[0];
														
 
															 	const u32 *t1 = crypto_ft_tab[1];
														
 
															 	const u32 *t2 = crypto_ft_tab[2];
														
 
															 	const u32 *t3 = crypto_ft_tab[3];
														
 
															 	u32 d0, d1, d2, d3;
														
 
															-	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
														
 
															-	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
														
 
															-	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
														
 
															-	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
														
 
															+	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]];
														
 
															+	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]];
														
 
															+	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]];
														
 
															+	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]];
														
 
															-	d[0] = d0;
														
 
															-	d[1] = d1;
														
 
															-	d[2] = d2;
														
 
															-	d[3] = d3;
														
 
															+	dst->words32[0] = cpu_to_le32(d0) ^ key->words32[0];
														
 
															+	dst->words32[1] = cpu_to_le32(d1) ^ key->words32[1];
														
 
															+	dst->words32[2] = cpu_to_le32(d2) ^ key->words32[2];
														
 
															+	dst->words32[3] = cpu_to_le32(d3) ^ key->words32[3];
														
 
															 }
														
 
															 #endif /* _CRYPTO_AEGIS_H */
														
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -364,24 +364,35 @@ static int crypto_ahash_op(struct ahash_request *req,
 
															 int crypto_ahash_final(struct ahash_request *req)
														
 
															 {
														
 
															-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
														
 
															+	crypto_stat_ahash_final(req, ret);
														
 
															+	return ret;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(crypto_ahash_final);
														
 
															 int crypto_ahash_finup(struct ahash_request *req)
														
 
															 {
														
 
															-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
														
 
															+	crypto_stat_ahash_final(req, ret);
														
 
															+	return ret;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(crypto_ahash_finup);
														
 
															 int crypto_ahash_digest(struct ahash_request *req)
														
 
															 {
														
 
															 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	int ret;
														
 
															 	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
														
 
															-		return -ENOKEY;
														
 
															-
														
 
															-	return crypto_ahash_op(req, tfm->digest);
														
 
															+		ret = -ENOKEY;
														
 
															+	else
														
 
															+		ret = crypto_ahash_op(req, tfm->digest);
														
 
															+	crypto_stat_ahash_final(req, ret);
														
 
															+	return ret;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
														
@@ -550,8 +561,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
 
															 {
														
 
															 	struct crypto_alg *base = &alg->halg.base;
														
 
															-	if (alg->halg.digestsize > PAGE_SIZE / 8 ||
														
 
															-	    alg->halg.statesize > PAGE_SIZE / 8 ||
														
 
															+	if (alg->halg.digestsize > HASH_MAX_DIGESTSIZE ||
														
 
															+	    alg->halg.statesize > HASH_MAX_STATESIZE ||
														
 
															 	    alg->halg.statesize == 0)
														
 
															 		return -EINVAL;
														
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -57,9 +57,14 @@ static int crypto_check_alg(struct crypto_alg *alg)
 
															 	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
														
 
															 		return -EINVAL;
														
 
															-	if (alg->cra_blocksize > PAGE_SIZE / 8)
														
 
															+	/* General maximums for all algs. */
														
 
															+	if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK)
														
 
															 		return -EINVAL;
														
 
															+	if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/* Lower maximums for specific alg types. */
														
 
															 	if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
														
 
															 			       CRYPTO_ALG_TYPE_CIPHER) {
														
 
															 		if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
														
@@ -253,6 +258,14 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 
															 	list_add(&alg->cra_list, &crypto_alg_list);
														
 
															 	list_add(&larval->alg.cra_list, &crypto_alg_list);
														
 
															+	atomic_set(&alg->encrypt_cnt, 0);
														
 
															+	atomic_set(&alg->decrypt_cnt, 0);
														
 
															+	atomic64_set(&alg->encrypt_tlen, 0);
														
 
															+	atomic64_set(&alg->decrypt_tlen, 0);
														
 
															+	atomic_set(&alg->verify_cnt, 0);
														
 
															+	atomic_set(&alg->cipher_err_cnt, 0);
														
 
															+	atomic_set(&alg->sign_cnt, 0);
														
 
															+
														
 
															 out:
														
 
															 	return larval;
														
@@ -367,6 +380,8 @@ static void crypto_wait_for_test(struct crypto_larval *larval)
 
															 	err = wait_for_completion_killable(&larval->completion);
														
 
															 	WARN_ON(err);
														
 
															+	if (!err)
														
 
															+		crypto_probing_notify(CRYPTO_MSG_ALG_LOADED, larval);
														
 
															 out:
														
 
															 	crypto_larval_kill(&larval->alg);
														
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -274,6 +274,8 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
 
															 		return cryptomgr_schedule_probe(data);
														
 
															 	case CRYPTO_MSG_ALG_REGISTER:
														
 
															 		return cryptomgr_schedule_test(data);
														
 
															+	case CRYPTO_MSG_ALG_LOADED:
														
 
															+		break;
														
 
															 	}
														
 
															 	return NOTIFY_DONE;
														
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -42,7 +42,7 @@
 
															 struct aead_tfm {
														
 
															 	struct crypto_aead *aead;
														
 
															-	struct crypto_skcipher *null_tfm;
														
 
															+	struct crypto_sync_skcipher *null_tfm;
														
 
															 };
														
 
															 static inline bool aead_sufficient_data(struct sock *sk)
														
@@ -75,13 +75,13 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
															 	return af_alg_sendmsg(sock, msg, size, ivsize);
														
 
															 }
														
 
															-static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
														
 
															+static int crypto_aead_copy_sgl(struct crypto_sync_skcipher *null_tfm,
														
 
															 				struct scatterlist *src,
														
 
															 				struct scatterlist *dst, unsigned int len)
														
 
															 {
														
 
															-	SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
														
 
															-	skcipher_request_set_tfm(skreq, null_tfm);
														
 
															+	skcipher_request_set_sync_tfm(skreq, null_tfm);
														
 
															 	skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
														
 
															 				      NULL, NULL);
														
 
															 	skcipher_request_set_crypt(skreq, src, dst, len, NULL);
														
@@ -99,7 +99,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 
															 	struct af_alg_ctx *ctx = ask->private;
														
 
															 	struct aead_tfm *aeadc = pask->private;
														
 
															 	struct crypto_aead *tfm = aeadc->aead;
														
 
															-	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
														
 
															+	struct crypto_sync_skcipher *null_tfm = aeadc->null_tfm;
														
 
															 	unsigned int i, as = crypto_aead_authsize(tfm);
														
 
															 	struct af_alg_async_req *areq;
														
 
															 	struct af_alg_tsgl *tsgl, *tmp;
														
@@ -478,7 +478,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 
															 {
														
 
															 	struct aead_tfm *tfm;
														
 
															 	struct crypto_aead *aead;
														
 
															-	struct crypto_skcipher *null_tfm;
														
 
															+	struct crypto_sync_skcipher *null_tfm;
														
 
															 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
														
 
															 	if (!tfm)
														
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
 
															 	struct alg_sock *ask = alg_sk(sk);
														
 
															 	struct hash_ctx *ctx = ask->private;
														
 
															 	struct ahash_request *req = &ctx->req;
														
 
															-	char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
														
 
															+	char state[HASH_MAX_STATESIZE];
														
 
															 	struct sock *sk2;
														
 
															 	struct alg_sock *ask2;
														
 
															 	struct hash_ctx *ctx2;
														
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -33,7 +33,7 @@ struct authenc_instance_ctx {
 
															 struct crypto_authenc_ctx {
														
 
															 	struct crypto_ahash *auth;
														
 
															 	struct crypto_skcipher *enc;
														
 
															-	struct crypto_skcipher *null;
														
 
															+	struct crypto_sync_skcipher *null;
														
 
															 };
														
 
															 struct authenc_request_ctx {
														
@@ -185,9 +185,9 @@ static int crypto_authenc_copy_assoc(struct aead_request *req)
 
															 {
														
 
															 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
														
 
															 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
														
 
															-	skcipher_request_set_tfm(skreq, ctx->null);
														
 
															+	skcipher_request_set_sync_tfm(skreq, ctx->null);
														
 
															 	skcipher_request_set_callback(skreq, aead_request_flags(req),
														
 
															 				      NULL, NULL);
														
 
															 	skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
														
@@ -318,7 +318,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 
															 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
														
 
															 	struct crypto_ahash *auth;
														
 
															 	struct crypto_skcipher *enc;
														
 
															-	struct crypto_skcipher *null;
														
 
															+	struct crypto_sync_skcipher *null;
														
 
															 	int err;
														
 
															 	auth = crypto_spawn_ahash(&ictx->auth);
														
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -36,7 +36,7 @@ struct crypto_authenc_esn_ctx {
 
															 	unsigned int reqoff;
														
 
															 	struct crypto_ahash *auth;
														
 
															 	struct crypto_skcipher *enc;
														
 
															-	struct crypto_skcipher *null;
														
 
															+	struct crypto_sync_skcipher *null;
														
 
															 };
														
 
															 struct authenc_esn_request_ctx {
														
@@ -183,9 +183,9 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
 
															 {
														
 
															 	struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
														
 
															 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
														
 
															-	skcipher_request_set_tfm(skreq, ctx->null);
														
 
															+	skcipher_request_set_sync_tfm(skreq, ctx->null);
														
 
															 	skcipher_request_set_callback(skreq, aead_request_flags(req),
														
 
															 				      NULL, NULL);
														
 
															 	skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
														
@@ -341,7 +341,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 
															 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
														
 
															 	struct crypto_ahash *auth;
														
 
															 	struct crypto_skcipher *enc;
														
 
															-	struct crypto_skcipher *null;
														
 
															+	struct crypto_sync_skcipher *null;
														
 
															 	int err;
														
 
															 	auth = crypto_spawn_ahash(&ictx->auth);
														
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -50,7 +50,10 @@ struct crypto_ccm_req_priv_ctx {
 
															 	u32 flags;
														
 
															 	struct scatterlist src[3];
														
 
															 	struct scatterlist dst[3];
														
 
															-	struct skcipher_request skreq;
														
 
															+	union {
														
 
															+		struct ahash_request ahreq;
														
 
															+		struct skcipher_request skreq;
														
 
															+	};
														
 
															 };
														
 
															 struct cbcmac_tfm_ctx {
														
@@ -181,7 +184,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 
															 	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
														
 
															 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
														
 
															 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
														
 
															-	AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
														
 
															+	struct ahash_request *ahreq = &pctx->ahreq;
														
 
															 	unsigned int assoclen = req->assoclen;
														
 
															 	struct scatterlist sg[3];
														
 
															 	u8 *odata = pctx->odata;
														
@@ -427,7 +430,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 
															 	crypto_aead_set_reqsize(
														
 
															 		tfm,
														
 
															 		align + sizeof(struct crypto_ccm_req_priv_ctx) +
														
 
															-		crypto_skcipher_reqsize(ctr));
														
 
															+		max(crypto_ahash_reqsize(mac), crypto_skcipher_reqsize(ctr)));
														
 
															 	return 0;
														
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -18,20 +18,21 @@
 
															 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
														
 
															 			     unsigned int bytes)
														
 
															 {
														
 
															-	u32 stream[CHACHA20_BLOCK_WORDS];
														
 
															+	/* aligned to potentially speed up crypto_xor() */
														
 
															+	u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long));
														
 
															 	if (dst != src)
														
 
															 		memcpy(dst, src, bytes);
														
 
															 	while (bytes >= CHACHA20_BLOCK_SIZE) {
														
 
															 		chacha20_block(state, stream);
														
 
															-		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
														
 
															+		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
														
 
															 		bytes -= CHACHA20_BLOCK_SIZE;
														
 
															 		dst += CHACHA20_BLOCK_SIZE;
														
 
															 	}
														
 
															 	if (bytes) {
														
 
															 		chacha20_block(state, stream);
														
 
															-		crypto_xor(dst, (const u8 *)stream, bytes);
														
 
															+		crypto_xor(dst, stream, bytes);
														
 
															 	}
														
 
															 }
														
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -76,7 +76,7 @@ struct cryptd_blkcipher_request_ctx {
 
															 struct cryptd_skcipher_ctx {
														
 
															 	atomic_t refcnt;
														
 
															-	struct crypto_skcipher *child;
														
 
															+	struct crypto_sync_skcipher *child;
														
 
															 };
														
 
															 struct cryptd_skcipher_request_ctx {
														
@@ -449,14 +449,16 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
 
															 				  const u8 *key, unsigned int keylen)
														
 
															 {
														
 
															 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
														
 
															-	struct crypto_skcipher *child = ctx->child;
														
 
															+	struct crypto_sync_skcipher *child = ctx->child;
														
 
															 	int err;
														
 
															-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
														
 
															-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
														
 
															+	crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
														
 
															+	crypto_sync_skcipher_set_flags(child,
														
 
															+				       crypto_skcipher_get_flags(parent) &
														
 
															 					 CRYPTO_TFM_REQ_MASK);
														
 
															-	err = crypto_skcipher_setkey(child, key, keylen);
														
 
															-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
														
 
															+	err = crypto_sync_skcipher_setkey(child, key, keylen);
														
 
															+	crypto_skcipher_set_flags(parent,
														
 
															+				  crypto_sync_skcipher_get_flags(child) &
														
 
															 					  CRYPTO_TFM_RES_MASK);
														
 
															 	return err;
														
 
															 }
														
@@ -483,13 +485,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
 
															 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
														
 
															 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct crypto_skcipher *child = ctx->child;
														
 
															-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
														
 
															+	struct crypto_sync_skcipher *child = ctx->child;
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
														
 
															 	if (unlikely(err == -EINPROGRESS))
														
 
															 		goto out;
														
 
															-	skcipher_request_set_tfm(subreq, child);
														
 
															+	skcipher_request_set_sync_tfm(subreq, child);
														
 
															 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															 				      NULL, NULL);
														
 
															 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
														
@@ -511,13 +513,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
 
															 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
														
 
															 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	struct crypto_skcipher *child = ctx->child;
														
 
															-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
														
 
															+	struct crypto_sync_skcipher *child = ctx->child;
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
														
 
															 	if (unlikely(err == -EINPROGRESS))
														
 
															 		goto out;
														
 
															-	skcipher_request_set_tfm(subreq, child);
														
 
															+	skcipher_request_set_sync_tfm(subreq, child);
														
 
															 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															 				      NULL, NULL);
														
 
															 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
														
@@ -568,7 +570,7 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
 
															 	if (IS_ERR(cipher))
														
 
															 		return PTR_ERR(cipher);
														
 
															-	ctx->child = cipher;
														
 
															+	ctx->child = (struct crypto_sync_skcipher *)cipher;
														
 
															 	crypto_skcipher_set_reqsize(
														
 
															 		tfm, sizeof(struct cryptd_skcipher_request_ctx));
														
 
															 	return 0;
														
@@ -578,7 +580,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
 
															 {
														
 
															 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	crypto_free_skcipher(ctx->child);
														
 
															+	crypto_free_sync_skcipher(ctx->child);
														
 
															 }
														
 
															 static void cryptd_skcipher_free(struct skcipher_instance *inst)
														
@@ -1243,7 +1245,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
 
															 {
														
 
															 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
														
 
															-	return ctx->child;
														
 
															+	return &ctx->child->base;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
														
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -26,7 +26,7 @@
 
															 #include <linux/string.h>
														
 
															 static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
														
 
															-static struct crypto_skcipher *crypto_default_null_skcipher;
														
 
															+static struct crypto_sync_skcipher *crypto_default_null_skcipher;
														
 
															 static int crypto_default_null_skcipher_refcnt;
														
 
															 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
														
@@ -152,16 +152,15 @@ MODULE_ALIAS_CRYPTO("compress_null");
 
															 MODULE_ALIAS_CRYPTO("digest_null");
														
 
															 MODULE_ALIAS_CRYPTO("cipher_null");
														
 
															-struct crypto_skcipher *crypto_get_default_null_skcipher(void)
														
 
															+struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void)
														
 
															 {
														
 
															-	struct crypto_skcipher *tfm;
														
 
															+	struct crypto_sync_skcipher *tfm;
														
 
															 	mutex_lock(&crypto_default_null_skcipher_lock);
														
 
															 	tfm = crypto_default_null_skcipher;
														
 
															 	if (!tfm) {
														
 
															-		tfm = crypto_alloc_skcipher("ecb(cipher_null)",
														
 
															-					    0, CRYPTO_ALG_ASYNC);
														
 
															+		tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0);
														
 
															 		if (IS_ERR(tfm))
														
 
															 			goto unlock;
														
@@ -181,7 +180,7 @@ void crypto_put_default_null_skcipher(void)
 
															 {
														
 
															 	mutex_lock(&crypto_default_null_skcipher_lock);
														
 
															 	if (!--crypto_default_null_skcipher_refcnt) {
														
 
															-		crypto_free_skcipher(crypto_default_null_skcipher);
														
 
															+		crypto_free_sync_skcipher(crypto_default_null_skcipher);
														
 
															 		crypto_default_null_skcipher = NULL;
														
 
															 	}
														
 
															 	mutex_unlock(&crypto_default_null_skcipher_lock);
														
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -29,6 +29,7 @@
 
															 #include <crypto/internal/rng.h>
														
 
															 #include <crypto/akcipher.h>
														
 
															 #include <crypto/kpp.h>
														
 
															+#include <crypto/internal/cryptouser.h>
														
 
															 #include "internal.h"
														
@@ -37,7 +38,7 @@
 
															 static DEFINE_MUTEX(crypto_cfg_mutex);
														
 
															 /* The crypto netlink socket */
														
 
															-static struct sock *crypto_nlsk;
														
 
															+struct sock *crypto_nlsk;
														
 
															 struct crypto_dump_info {
														
 
															 	struct sk_buff *in_skb;
														
@@ -46,7 +47,7 @@ struct crypto_dump_info {
 
															 	u16 nlmsg_flags;
														
 
															 };
														
 
															-static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
														
 
															+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
														
 
															 {
														
 
															 	struct crypto_alg *q, *alg = NULL;
														
@@ -461,6 +462,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 
															 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
														
 
															 	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
														
 
															 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
														
 
															+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
														
 
															 };
														
 
															 static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
														
@@ -481,6 +483,9 @@ static const struct crypto_link {
 
															 						       .dump = crypto_dump_report,
														
 
															 						       .done = crypto_dump_report_done},
														
 
															 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
														
 
															+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
														
 
															+						       .dump = crypto_dump_reportstat,
														
 
															+						       .done = crypto_dump_reportstat_done},
														
 
															 };
														
 
															 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
														
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -0,0 +1,463 @@
 
															+// SPDX-License-Identifier: GPL-2.0
														
 
															+/*
														
 
															+ * Crypto user configuration API.
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com>
														
 
															+ *
														
 
															+ */
														
 
															+
														
 
															+#include <linux/crypto.h>
														
 
															+#include <linux/cryptouser.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <net/netlink.h>
														
 
															+#include <crypto/internal/skcipher.h>
														
 
															+#include <crypto/internal/rng.h>
														
 
															+#include <crypto/akcipher.h>
														
 
															+#include <crypto/kpp.h>
														
 
															+#include <crypto/internal/cryptouser.h>
														
 
															+
														
 
															+#include "internal.h"
														
 
															+
														
 
															+#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
														
 
															+
														
 
															+static DEFINE_MUTEX(crypto_cfg_mutex);
														
 
															+
														
 
															+extern struct sock *crypto_nlsk;
														
 
															+
														
 
															+struct crypto_dump_info {
														
 
															+	struct sk_buff *in_skb;
														
 
															+	struct sk_buff *out_skb;
														
 
															+	u32 nlmsg_seq;
														
 
															+	u16 nlmsg_flags;
														
 
															+};
														
 
															+
														
 
															+static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat raead;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strncpy(raead.type, "aead", sizeof(raead.type));
														
 
															+
														
 
															+	v32 = atomic_read(&alg->encrypt_cnt);
														
 
															+	raead.stat_encrypt_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->encrypt_tlen);
														
 
															+	raead.stat_encrypt_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->decrypt_cnt);
														
 
															+	raead.stat_decrypt_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->decrypt_tlen);
														
 
															+	raead.stat_decrypt_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->aead_err_cnt);
														
 
															+	raead.stat_aead_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
														
 
															+		    sizeof(struct crypto_stat), &raead))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rcipher;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
														
 
															+
														
 
															+	v32 = atomic_read(&alg->encrypt_cnt);
														
 
															+	rcipher.stat_encrypt_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->encrypt_tlen);
														
 
															+	rcipher.stat_encrypt_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->decrypt_cnt);
														
 
															+	rcipher.stat_decrypt_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->decrypt_tlen);
														
 
															+	rcipher.stat_decrypt_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->cipher_err_cnt);
														
 
															+	rcipher.stat_cipher_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
														
 
															+		    sizeof(struct crypto_stat), &rcipher))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rcomp;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
														
 
															+	v32 = atomic_read(&alg->compress_cnt);
														
 
															+	rcomp.stat_compress_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->compress_tlen);
														
 
															+	rcomp.stat_compress_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->decompress_cnt);
														
 
															+	rcomp.stat_decompress_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->decompress_tlen);
														
 
															+	rcomp.stat_decompress_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->cipher_err_cnt);
														
 
															+	rcomp.stat_compress_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
														
 
															+		    sizeof(struct crypto_stat), &rcomp))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat racomp;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
														
 
															+	v32 = atomic_read(&alg->compress_cnt);
														
 
															+	racomp.stat_compress_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->compress_tlen);
														
 
															+	racomp.stat_compress_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->decompress_cnt);
														
 
															+	racomp.stat_decompress_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->decompress_tlen);
														
 
															+	racomp.stat_decompress_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->cipher_err_cnt);
														
 
															+	racomp.stat_compress_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
														
 
															+		    sizeof(struct crypto_stat), &racomp))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rakcipher;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
														
 
															+	v32 = atomic_read(&alg->encrypt_cnt);
														
 
															+	rakcipher.stat_encrypt_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->encrypt_tlen);
														
 
															+	rakcipher.stat_encrypt_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->decrypt_cnt);
														
 
															+	rakcipher.stat_decrypt_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->decrypt_tlen);
														
 
															+	rakcipher.stat_decrypt_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->sign_cnt);
														
 
															+	rakcipher.stat_sign_cnt = v32;
														
 
															+	v32 = atomic_read(&alg->verify_cnt);
														
 
															+	rakcipher.stat_verify_cnt = v32;
														
 
															+	v32 = atomic_read(&alg->akcipher_err_cnt);
														
 
															+	rakcipher.stat_akcipher_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
														
 
															+		    sizeof(struct crypto_stat), &rakcipher))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rkpp;
														
 
															+	u32 v;
														
 
															+
														
 
															+	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
														
 
															+
														
 
															+	v = atomic_read(&alg->setsecret_cnt);
														
 
															+	rkpp.stat_setsecret_cnt = v;
														
 
															+	v = atomic_read(&alg->generate_public_key_cnt);
														
 
															+	rkpp.stat_generate_public_key_cnt = v;
														
 
															+	v = atomic_read(&alg->compute_shared_secret_cnt);
														
 
															+	rkpp.stat_compute_shared_secret_cnt = v;
														
 
															+	v = atomic_read(&alg->kpp_err_cnt);
														
 
															+	rkpp.stat_kpp_err_cnt = v;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
														
 
															+		    sizeof(struct crypto_stat), &rkpp))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rhash;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strncpy(rhash.type, "ahash", sizeof(rhash.type));
														
 
															+
														
 
															+	v32 = atomic_read(&alg->hash_cnt);
														
 
															+	rhash.stat_hash_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->hash_tlen);
														
 
															+	rhash.stat_hash_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->hash_err_cnt);
														
 
															+	rhash.stat_hash_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
														
 
															+		    sizeof(struct crypto_stat), &rhash))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rhash;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strncpy(rhash.type, "shash", sizeof(rhash.type));
														
 
															+
														
 
															+	v32 = atomic_read(&alg->hash_cnt);
														
 
															+	rhash.stat_hash_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->hash_tlen);
														
 
															+	rhash.stat_hash_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->hash_err_cnt);
														
 
															+	rhash.stat_hash_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
														
 
															+		    sizeof(struct crypto_stat), &rhash))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
														
 
															+{
														
 
															+	struct crypto_stat rrng;
														
 
															+	u64 v64;
														
 
															+	u32 v32;
														
 
															+
														
 
															+	strncpy(rrng.type, "rng", sizeof(rrng.type));
														
 
															+
														
 
															+	v32 = atomic_read(&alg->generate_cnt);
														
 
															+	rrng.stat_generate_cnt = v32;
														
 
															+	v64 = atomic64_read(&alg->generate_tlen);
														
 
															+	rrng.stat_generate_tlen = v64;
														
 
															+	v32 = atomic_read(&alg->seed_cnt);
														
 
															+	rrng.stat_seed_cnt = v32;
														
 
															+	v32 = atomic_read(&alg->hash_err_cnt);
														
 
															+	rrng.stat_rng_err_cnt = v32;
														
 
															+
														
 
															+	if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
														
 
															+		    sizeof(struct crypto_stat), &rrng))
														
 
															+		goto nla_put_failure;
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_reportstat_one(struct crypto_alg *alg,
														
 
															+				 struct crypto_user_alg *ualg,
														
 
															+				 struct sk_buff *skb)
														
 
															+{
														
 
															+	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
														
 
															+	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
														
 
															+		sizeof(ualg->cru_driver_name));
														
 
															+	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
														
 
															+		sizeof(ualg->cru_module_name));
														
 
															+
														
 
															+	ualg->cru_type = 0;
														
 
															+	ualg->cru_mask = 0;
														
 
															+	ualg->cru_flags = alg->cra_flags;
														
 
															+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
														
 
															+
														
 
															+	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
														
 
															+		goto nla_put_failure;
														
 
															+	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
														
 
															+		struct crypto_stat rl;
														
 
															+
														
 
															+		strlcpy(rl.type, "larval", sizeof(rl.type));
														
 
															+		if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
														
 
															+			    sizeof(struct crypto_stat), &rl))
														
 
															+			goto nla_put_failure;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
														
 
															+	case CRYPTO_ALG_TYPE_AEAD:
														
 
															+		if (crypto_report_aead(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_SKCIPHER:
														
 
															+		if (crypto_report_cipher(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_BLKCIPHER:
														
 
															+		if (crypto_report_cipher(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_CIPHER:
														
 
															+		if (crypto_report_cipher(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_COMPRESS:
														
 
															+		if (crypto_report_comp(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_ACOMPRESS:
														
 
															+		if (crypto_report_acomp(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_SCOMPRESS:
														
 
															+		if (crypto_report_acomp(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_AKCIPHER:
														
 
															+		if (crypto_report_akcipher(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_KPP:
														
 
															+		if (crypto_report_kpp(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_AHASH:
														
 
															+		if (crypto_report_ahash(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_HASH:
														
 
															+		if (crypto_report_shash(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	case CRYPTO_ALG_TYPE_RNG:
														
 
															+		if (crypto_report_rng(skb, alg))
														
 
															+			goto nla_put_failure;
														
 
															+		break;
														
 
															+	default:
														
 
															+		pr_err("ERROR: Unhandled alg %d in %s\n",
														
 
															+		       alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL),
														
 
															+		       __func__);
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	return 0;
														
 
															+
														
 
															+nla_put_failure:
														
 
															+	return -EMSGSIZE;
														
 
															+}
														
 
															+
														
 
															+static int crypto_reportstat_alg(struct crypto_alg *alg,
														
 
															+				 struct crypto_dump_info *info)
														
 
															+{
														
 
															+	struct sk_buff *in_skb = info->in_skb;
														
 
															+	struct sk_buff *skb = info->out_skb;
														
 
															+	struct nlmsghdr *nlh;
														
 
															+	struct crypto_user_alg *ualg;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
														
 
															+			CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags);
														
 
															+	if (!nlh) {
														
 
															+		err = -EMSGSIZE;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ualg = nlmsg_data(nlh);
														
 
															+
														
 
															+	err = crypto_reportstat_one(alg, ualg, skb);
														
 
															+	if (err) {
														
 
															+		nlmsg_cancel(skb, nlh);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	nlmsg_end(skb, nlh);
														
 
															+
														
 
															+out:
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
														
 
															+		      struct nlattr **attrs)
														
 
															+{
														
 
															+	struct crypto_user_alg *p = nlmsg_data(in_nlh);
														
 
															+	struct crypto_alg *alg;
														
 
															+	struct sk_buff *skb;
														
 
															+	struct crypto_dump_info info;
														
 
															+	int err;
														
 
															+
														
 
															+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	alg = crypto_alg_match(p, 0);
														
 
															+	if (!alg)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	err = -ENOMEM;
														
 
															+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
														
 
															+	if (!skb)
														
 
															+		goto drop_alg;
														
 
															+
														
 
															+	info.in_skb = in_skb;
														
 
															+	info.out_skb = skb;
														
 
															+	info.nlmsg_seq = in_nlh->nlmsg_seq;
														
 
															+	info.nlmsg_flags = 0;
														
 
															+
														
 
															+	err = crypto_reportstat_alg(alg, &info);
														
 
															+
														
 
															+drop_alg:
														
 
															+	crypto_mod_put(alg);
														
 
															+
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
														
 
															+}
														
 
															+
														
 
															+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
														
 
															+{
														
 
															+	struct crypto_alg *alg;
														
 
															+	struct crypto_dump_info info;
														
 
															+	int err;
														
 
															+
														
 
															+	if (cb->args[0])
														
 
															+		goto out;
														
 
															+
														
 
															+	cb->args[0] = 1;
														
 
															+
														
 
															+	info.in_skb = cb->skb;
														
 
															+	info.out_skb = skb;
														
 
															+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
														
 
															+	info.nlmsg_flags = NLM_F_MULTI;
														
 
															+
														
 
															+	list_for_each_entry(alg, &crypto_alg_list, cra_list) {
														
 
															+		err = crypto_reportstat_alg(alg, &info);
														
 
															+		if (err)
														
 
															+			goto out_err;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	return skb->len;
														
 
															+out_err:
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+int crypto_dump_reportstat_done(struct netlink_callback *cb)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+MODULE_LICENSE("GPL");
														
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -47,9 +47,9 @@ static int echainiv_encrypt(struct aead_request *req)
 
															 	info = req->iv;
														
 
															 	if (req->src != req->dst) {
														
 
															-		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
														
 
															+		SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
														
 
															-		skcipher_request_set_tfm(nreq, ctx->sknull);
														
 
															+		skcipher_request_set_sync_tfm(nreq, ctx->sknull);
														
 
															 		skcipher_request_set_callback(nreq, req->base.flags,
														
 
															 					      NULL, NULL);
														
 
															 		skcipher_request_set_crypt(nreq, req->src, req->dst,
														
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx {
 
															 struct crypto_rfc4543_ctx {
														
 
															 	struct crypto_aead *child;
														
 
															-	struct crypto_skcipher *null;
														
 
															+	struct crypto_sync_skcipher *null;
														
 
															 	u8 nonce[4];
														
 
															 };
														
@@ -1067,9 +1067,9 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
 
															 	unsigned int authsize = crypto_aead_authsize(aead);
														
 
															 	unsigned int nbytes = req->assoclen + req->cryptlen -
														
 
															 			      (enc ? 0 : authsize);
														
 
															-	SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
														
 
															+	SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
														
 
															-	skcipher_request_set_tfm(nreq, ctx->null);
														
 
															+	skcipher_request_set_sync_tfm(nreq, ctx->null);
														
 
															 	skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
														
 
															 	skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
														
@@ -1093,7 +1093,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 
															 	struct crypto_aead_spawn *spawn = &ictx->aead;
														
 
															 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
														
 
															 	struct crypto_aead *aead;
														
 
															-	struct crypto_skcipher *null;
														
 
															+	struct crypto_sync_skcipher *null;
														
 
															 	unsigned long align;
														
 
															 	int err = 0;
														
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -26,12 +26,6 @@
 
															 #include <linux/rwsem.h>
														
 
															 #include <linux/slab.h>
														
 
															-/* Crypto notification events. */
														
 
															-enum {
														
 
															-	CRYPTO_MSG_ALG_REQUEST,
														
 
															-	CRYPTO_MSG_ALG_REGISTER,
														
 
															-};
														
 
															-
														
 
															 struct crypto_instance;
														
 
															 struct crypto_template;
														
@@ -90,8 +84,6 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 
															 void *crypto_alloc_tfm(const char *alg_name,
														
 
															 		       const struct crypto_type *frontend, u32 type, u32 mask);
														
 
															-int crypto_register_notifier(struct notifier_block *nb);
														
 
															-int crypto_unregister_notifier(struct notifier_block *nb);
														
 
															 int crypto_probing_notify(unsigned long val, void *v);
														
 
															 unsigned int crypto_alg_extsize(struct crypto_alg *alg);
														
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -29,8 +29,6 @@
 
															 #include <crypto/b128ops.h>
														
 
															 #include <crypto/gf128mul.h>
														
 
															-#define LRW_BUFFER_SIZE 128u
														
 
															-
														
 
															 #define LRW_BLOCK_SIZE 16
														
 
															 struct priv {
														
@@ -56,19 +54,7 @@ struct priv {
 
															 };
														
 
															 struct rctx {
														
 
															-	be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
														
 
															-
														
 
															 	be128 t;
														
 
															-
														
 
															-	be128 *ext;
														
 
															-
														
 
															-	struct scatterlist srcbuf[2];
														
 
															-	struct scatterlist dstbuf[2];
														
 
															-	struct scatterlist *src;
														
 
															-	struct scatterlist *dst;
														
 
															-
														
 
															-	unsigned int left;
														
 
															-
														
 
															 	struct skcipher_request subreq;
														
 
															 };
														
@@ -120,112 +106,68 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 
															 	return 0;
														
 
															 }
														
 
															-static inline void inc(be128 *iv)
														
 
															-{
														
 
															-	be64_add_cpu(&iv->b, 1);
														
 
															-	if (!iv->b)
														
 
															-		be64_add_cpu(&iv->a, 1);
														
 
															-}
														
 
															-
														
 
															-/* this returns the number of consequative 1 bits starting
														
 
															- * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
														
 
															-static inline int get_index128(be128 *block)
														
 
															+/*
														
 
															+ * Returns the number of trailing '1' bits in the words of the counter, which is
														
 
															+ * represented by 4 32-bit words, arranged from least to most significant.
														
 
															+ * At the same time, increments the counter by one.
														
 
															+ *
														
 
															+ * For example:
														
 
															+ *
														
 
															+ * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
														
 
															+ * int i = next_index(&counter);
														
 
															+ * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
														
 
															+ */
														
 
															+static int next_index(u32 *counter)
														
 
															 {
														
 
															-	int x;
														
 
															-	__be32 *p = (__be32 *) block;
														
 
															+	int i, res = 0;
														
 
															-	for (p += 3, x = 0; x < 128; p--, x += 32) {
														
 
															-		u32 val = be32_to_cpup(p);
														
 
															+	for (i = 0; i < 4; i++) {
														
 
															+		if (counter[i] + 1 != 0)
														
 
															+			return res + ffz(counter[i]++);
														
 
															-		if (!~val)
														
 
															-			continue;
														
 
															-
														
 
															-		return x + ffz(val);
														
 
															+		counter[i] = 0;
														
 
															+		res += 32;
														
 
															 	}
														
 
															-	return x;
														
 
															+	/*
														
 
															+	 * If we get here, then x == 128 and we are incrementing the counter
														
 
															+	 * from all ones to all zeros. This means we must return index 127, i.e.
														
 
															+	 * the one corresponding to key2*{ 1,...,1 }.
														
 
															+	 */
														
 
															+	return 127;
														
 
															 }
														
 
															-static int post_crypt(struct skcipher_request *req)
														
 
															+/*
														
 
															+ * We compute the tweak masks twice (both before and after the ECB encryption or
														
 
															+ * decryption) to avoid having to allocate a temporary buffer and/or make
														
 
															+ * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
														
 
															+ * just doing the next_index() calls again.
														
 
															+ */
														
 
															+static int xor_tweak(struct skcipher_request *req, bool second_pass)
														
 
															 {
														
 
															-	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															-	be128 *buf = rctx->ext ?: rctx->buf;
														
 
															-	struct skcipher_request *subreq;
														
 
															 	const int bs = LRW_BLOCK_SIZE;
														
 
															-	struct skcipher_walk w;
														
 
															-	struct scatterlist *sg;
														
 
															-	unsigned offset;
														
 
															-	int err;
														
 
															-
														
 
															-	subreq = &rctx->subreq;
														
 
															-	err = skcipher_walk_virt(&w, subreq, false);
														
 
															-
														
 
															-	while (w.nbytes) {
														
 
															-		unsigned int avail = w.nbytes;
														
 
															-		be128 *wdst;
														
 
															-
														
 
															-		wdst = w.dst.virt.addr;
														
 
															-
														
 
															-		do {
														
 
															-			be128_xor(wdst, buf++, wdst);
														
 
															-			wdst++;
														
 
															-		} while ((avail -= bs) >= bs);
														
 
															-
														
 
															-		err = skcipher_walk_done(&w, avail);
														
 
															-	}
														
 
															-
														
 
															-	rctx->left -= subreq->cryptlen;
														
 
															-
														
 
															-	if (err || !rctx->left)
														
 
															-		goto out;
														
 
															-
														
 
															-	rctx->dst = rctx->dstbuf;
														
 
															-
														
 
															-	scatterwalk_done(&w.out, 0, 1);
														
 
															-	sg = w.out.sg;
														
 
															-	offset = w.out.offset;
														
 
															-
														
 
															-	if (rctx->dst != sg) {
														
 
															-		rctx->dst[0] = *sg;
														
 
															-		sg_unmark_end(rctx->dst);
														
 
															-		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
														
 
															-	}
														
 
															-	rctx->dst[0].length -= offset - sg->offset;
														
 
															-	rctx->dst[0].offset = offset;
														
 
															-
														
 
															-out:
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int pre_crypt(struct skcipher_request *req)
														
 
															-{
														
 
															 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															-	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															 	struct priv *ctx = crypto_skcipher_ctx(tfm);
														
 
															-	be128 *buf = rctx->ext ?: rctx->buf;
														
 
															-	struct skcipher_request *subreq;
														
 
															-	const int bs = LRW_BLOCK_SIZE;
														
 
															+	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															+	be128 t = rctx->t;
														
 
															 	struct skcipher_walk w;
														
 
															-	struct scatterlist *sg;
														
 
															-	unsigned cryptlen;
														
 
															-	unsigned offset;
														
 
															-	be128 *iv;
														
 
															-	bool more;
														
 
															+	__be32 *iv;
														
 
															+	u32 counter[4];
														
 
															 	int err;
														
 
															-	subreq = &rctx->subreq;
														
 
															-	skcipher_request_set_tfm(subreq, tfm);
														
 
															-
														
 
															-	cryptlen = subreq->cryptlen;
														
 
															-	more = rctx->left > cryptlen;
														
 
															-	if (!more)
														
 
															-		cryptlen = rctx->left;
														
 
															+	if (second_pass) {
														
 
															+		req = &rctx->subreq;
														
 
															+		/* set to our TFM to enforce correct alignment: */
														
 
															+		skcipher_request_set_tfm(req, tfm);
														
 
															+	}
														
 
															-	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
														
 
															-				   cryptlen, req->iv);
														
 
															+	err = skcipher_walk_virt(&w, req, false);
														
 
															+	iv = (__be32 *)w.iv;
														
 
															-	err = skcipher_walk_virt(&w, subreq, false);
														
 
															-	iv = w.iv;
														
 
															+	counter[0] = be32_to_cpu(iv[3]);
														
 
															+	counter[1] = be32_to_cpu(iv[2]);
														
 
															+	counter[2] = be32_to_cpu(iv[1]);
														
 
															+	counter[3] = be32_to_cpu(iv[0]);
														
 
															 	while (w.nbytes) {
														
 
															 		unsigned int avail = w.nbytes;
														
@@ -236,188 +178,85 @@ static int pre_crypt(struct skcipher_request *req)
 
															 		wdst = w.dst.virt.addr;
														
 
															 		do {
														
 
															-			*buf++ = rctx->t;
														
 
															-			be128_xor(wdst++, &rctx->t, wsrc++);
														
 
															+			be128_xor(wdst++, &t, wsrc++);
														
 
															 			/* T <- I*Key2, using the optimization
														
 
															 			 * discussed in the specification */
														
 
															-			be128_xor(&rctx->t, &rctx->t,
														
 
															-				  &ctx->mulinc[get_index128(iv)]);
														
 
															-			inc(iv);
														
 
															+			be128_xor(&t, &t, &ctx->mulinc[next_index(counter)]);
														
 
															 		} while ((avail -= bs) >= bs);
														
 
															-		err = skcipher_walk_done(&w, avail);
														
 
															-	}
														
 
															-
														
 
															-	skcipher_request_set_tfm(subreq, ctx->child);
														
 
															-	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
														
 
															-				   cryptlen, NULL);
														
 
															-
														
 
															-	if (err || !more)
														
 
															-		goto out;
														
 
															-
														
 
															-	rctx->src = rctx->srcbuf;
														
 
															-
														
 
															-	scatterwalk_done(&w.in, 0, 1);
														
 
															-	sg = w.in.sg;
														
 
															-	offset = w.in.offset;
														
 
															+		if (second_pass && w.nbytes == w.total) {
														
 
															+			iv[0] = cpu_to_be32(counter[3]);
														
 
															+			iv[1] = cpu_to_be32(counter[2]);
														
 
															+			iv[2] = cpu_to_be32(counter[1]);
														
 
															+			iv[3] = cpu_to_be32(counter[0]);
														
 
															+		}
														
 
															-	if (rctx->src != sg) {
														
 
															-		rctx->src[0] = *sg;
														
 
															-		sg_unmark_end(rctx->src);
														
 
															-		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
														
 
															+		err = skcipher_walk_done(&w, avail);
														
 
															 	}
														
 
															-	rctx->src[0].length -= offset - sg->offset;
														
 
															-	rctx->src[0].offset = offset;
														
 
															-out:
														
 
															 	return err;
														
 
															 }
														
 
															-static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
														
 
															+static int xor_tweak_pre(struct skcipher_request *req)
														
 
															 {
														
 
															-	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
														
 
															-	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															-	struct skcipher_request *subreq;
														
 
															-	gfp_t gfp;
														
 
															-
														
 
															-	subreq = &rctx->subreq;
														
 
															-	skcipher_request_set_callback(subreq, req->base.flags, done, req);
														
 
															-
														
 
															-	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
														
 
															-							   GFP_ATOMIC;
														
 
															-	rctx->ext = NULL;
														
 
															-
														
 
															-	subreq->cryptlen = LRW_BUFFER_SIZE;
														
 
															-	if (req->cryptlen > LRW_BUFFER_SIZE) {
														
 
															-		unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
														
 
															-
														
 
															-		rctx->ext = kmalloc(n, gfp);
														
 
															-		if (rctx->ext)
														
 
															-			subreq->cryptlen = n;
														
 
															-	}
														
 
															-
														
 
															-	rctx->src = req->src;
														
 
															-	rctx->dst = req->dst;
														
 
															-	rctx->left = req->cryptlen;
														
 
															-
														
 
															-	/* calculate first value of T */
														
 
															-	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
														
 
															-
														
 
															-	/* T <- I*Key2 */
														
 
															-	gf128mul_64k_bbe(&rctx->t, ctx->table);
														
 
															-
														
 
															-	return 0;
														
 
															+	return xor_tweak(req, false);
														
 
															 }
														
 
															-static void exit_crypt(struct skcipher_request *req)
														
 
															+static int xor_tweak_post(struct skcipher_request *req)
														
 
															 {
														
 
															-	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															-
														
 
															-	rctx->left = 0;
														
 
															-
														
 
															-	if (rctx->ext)
														
 
															-		kzfree(rctx->ext);
														
 
															+	return xor_tweak(req, true);
														
 
															 }
														
 
															-static int do_encrypt(struct skcipher_request *req, int err)
														
 
															-{
														
 
															-	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															-	struct skcipher_request *subreq;
														
 
															-
														
 
															-	subreq = &rctx->subreq;
														
 
															-
														
 
															-	while (!err && rctx->left) {
														
 
															-		err = pre_crypt(req) ?:
														
 
															-		      crypto_skcipher_encrypt(subreq) ?:
														
 
															-		      post_crypt(req);
														
 
															-
														
 
															-		if (err == -EINPROGRESS || err == -EBUSY)
														
 
															-			return err;
														
 
															-	}
														
 
															-
														
 
															-	exit_crypt(req);
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static void encrypt_done(struct crypto_async_request *areq, int err)
														
 
															+static void crypt_done(struct crypto_async_request *areq, int err)
														
 
															 {
														
 
															 	struct skcipher_request *req = areq->data;
														
 
															-	struct skcipher_request *subreq;
														
 
															-	struct rctx *rctx;
														
 
															-	rctx = skcipher_request_ctx(req);
														
 
															+	if (!err)
														
 
															+		err = xor_tweak_post(req);
														
 
															-	if (err == -EINPROGRESS) {
														
 
															-		if (rctx->left != req->cryptlen)
														
 
															-			return;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	subreq = &rctx->subreq;
														
 
															-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
														
 
															-
														
 
															-	err = do_encrypt(req, err ?: post_crypt(req));
														
 
															-	if (rctx->left)
														
 
															-		return;
														
 
															-
														
 
															-out:
														
 
															 	skcipher_request_complete(req, err);
														
 
															 }
														
 
															-static int encrypt(struct skcipher_request *req)
														
 
															-{
														
 
															-	return do_encrypt(req, init_crypt(req, encrypt_done));
														
 
															-}
														
 
															-
														
 
															-static int do_decrypt(struct skcipher_request *req, int err)
														
 
															+static void init_crypt(struct skcipher_request *req)
														
 
															 {
														
 
															+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
														
 
															 	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															-	struct skcipher_request *subreq;
														
 
															-
														
 
															-	subreq = &rctx->subreq;
														
 
															+	struct skcipher_request *subreq = &rctx->subreq;
														
 
															-	while (!err && rctx->left) {
														
 
															-		err = pre_crypt(req) ?:
														
 
															-		      crypto_skcipher_decrypt(subreq) ?:
														
 
															-		      post_crypt(req);
														
 
															+	skcipher_request_set_tfm(subreq, ctx->child);
														
 
															+	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
														
 
															+	/* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */
														
 
															+	skcipher_request_set_crypt(subreq, req->dst, req->dst,
														
 
															+				   req->cryptlen, req->iv);
														
 
															-		if (err == -EINPROGRESS || err == -EBUSY)
														
 
															-			return err;
														
 
															-	}
														
 
															+	/* calculate first value of T */
														
 
															+	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
														
 
															-	exit_crypt(req);
														
 
															-	return err;
														
 
															+	/* T <- I*Key2 */
														
 
															+	gf128mul_64k_bbe(&rctx->t, ctx->table);
														
 
															 }
														
 
															-static void decrypt_done(struct crypto_async_request *areq, int err)
														
 
															+static int encrypt(struct skcipher_request *req)
														
 
															 {
														
 
															-	struct skcipher_request *req = areq->data;
														
 
															-	struct skcipher_request *subreq;
														
 
															-	struct rctx *rctx;
														
 
															-
														
 
															-	rctx = skcipher_request_ctx(req);
														
 
															-
														
 
															-	if (err == -EINPROGRESS) {
														
 
															-		if (rctx->left != req->cryptlen)
														
 
															-			return;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	subreq = &rctx->subreq;
														
 
															-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
														
 
															-
														
 
															-	err = do_decrypt(req, err ?: post_crypt(req));
														
 
															-	if (rctx->left)
														
 
															-		return;
														
 
															+	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															+	struct skcipher_request *subreq = &rctx->subreq;
														
 
															-out:
														
 
															-	skcipher_request_complete(req, err);
														
 
															+	init_crypt(req);
														
 
															+	return xor_tweak_pre(req) ?:
														
 
															+		crypto_skcipher_encrypt(subreq) ?:
														
 
															+		xor_tweak_post(req);
														
 
															 }
														
 
															 static int decrypt(struct skcipher_request *req)
														
 
															 {
														
 
															-	return do_decrypt(req, init_crypt(req, decrypt_done));
														
 
															+	struct rctx *rctx = skcipher_request_ctx(req);
														
 
															+	struct skcipher_request *subreq = &rctx->subreq;
														
 
															+
														
 
															+	init_crypt(req);
														
 
															+	return xor_tweak_pre(req) ?:
														
 
															+		crypto_skcipher_decrypt(subreq) ?:
														
 
															+		xor_tweak_post(req);
														
 
															 }
														
 
															 static int init_tfm(struct crypto_skcipher *tfm)
														
@@ -543,7 +382,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 
															 	inst->alg.base.cra_priority = alg->base.cra_priority;
														
 
															 	inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
														
 
															 	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
														
 
															-				       (__alignof__(u64) - 1);
														
 
															+				       (__alignof__(__be32) - 1);
														
 
															 	inst->alg.ivsize = LRW_BLOCK_SIZE;
														
 
															 	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
														
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -1,675 +0,0 @@
 
															-/*
														
 
															- * Software multibuffer async crypto daemon.
														
 
															- *
														
 
															- * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
														
 
															- *
														
 
															- * Adapted from crypto daemon.
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify it
														
 
															- * under the terms of the GNU General Public License as published by the Free
														
 
															- * Software Foundation; either version 2 of the License, or (at your option)
														
 
															- * any later version.
														
 
															- *
														
 
															- */
														
 
															-
														
 
															-#include <crypto/algapi.h>
														
 
															-#include <crypto/internal/hash.h>
														
 
															-#include <crypto/internal/aead.h>
														
 
															-#include <crypto/mcryptd.h>
														
 
															-#include <crypto/crypto_wq.h>
														
 
															-#include <linux/err.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/list.h>
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/scatterlist.h>
														
 
															-#include <linux/sched.h>
														
 
															-#include <linux/sched/stat.h>
														
 
															-#include <linux/slab.h>
														
 
															-
														
 
															-#define MCRYPTD_MAX_CPU_QLEN 100
														
 
															-#define MCRYPTD_BATCH 9
														
 
															-
														
 
															-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
														
 
															-				   unsigned int tail);
														
 
															-
														
 
															-struct mcryptd_flush_list {
														
 
															-	struct list_head list;
														
 
															-	struct mutex lock;
														
 
															-};
														
 
															-
														
 
															-static struct mcryptd_flush_list __percpu *mcryptd_flist;
														
 
															-
														
 
															-struct hashd_instance_ctx {
														
 
															-	struct crypto_ahash_spawn spawn;
														
 
															-	struct mcryptd_queue *queue;
														
 
															-};
														
 
															-
														
 
															-static void mcryptd_queue_worker(struct work_struct *work);
														
 
															-
														
 
															-void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
														
 
															-{
														
 
															-	struct mcryptd_flush_list *flist;
														
 
															-
														
 
															-	if (!cstate->flusher_engaged) {
														
 
															-		/* put the flusher on the flush list */
														
 
															-		flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
														
 
															-		mutex_lock(&flist->lock);
														
 
															-		list_add_tail(&cstate->flush_list, &flist->list);
														
 
															-		cstate->flusher_engaged = true;
														
 
															-		cstate->next_flush = jiffies + delay;
														
 
															-		queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
														
 
															-			&cstate->flush, delay);
														
 
															-		mutex_unlock(&flist->lock);
														
 
															-	}
														
 
															-}
														
 
															-EXPORT_SYMBOL(mcryptd_arm_flusher);
														
 
															-
														
 
															-static int mcryptd_init_queue(struct mcryptd_queue *queue,
														
 
															-			     unsigned int max_cpu_qlen)
														
 
															-{
														
 
															-	int cpu;
														
 
															-	struct mcryptd_cpu_queue *cpu_queue;
														
 
															-
														
 
															-	queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
														
 
															-	pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
														
 
															-	if (!queue->cpu_queue)
														
 
															-		return -ENOMEM;
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
														
 
															-		pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
														
 
															-		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
														
 
															-		INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
														
 
															-		spin_lock_init(&cpu_queue->q_lock);
														
 
															-	}
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_fini_queue(struct mcryptd_queue *queue)
														
 
															-{
														
 
															-	int cpu;
														
 
															-	struct mcryptd_cpu_queue *cpu_queue;
														
 
															-
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
														
 
															-		BUG_ON(cpu_queue->queue.qlen);
														
 
															-	}
														
 
															-	free_percpu(queue->cpu_queue);
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
														
 
															-				  struct crypto_async_request *request,
														
 
															-				  struct mcryptd_hash_request_ctx *rctx)
														
 
															-{
														
 
															-	int cpu, err;
														
 
															-	struct mcryptd_cpu_queue *cpu_queue;
														
 
															-
														
 
															-	cpu_queue = raw_cpu_ptr(queue->cpu_queue);
														
 
															-	spin_lock(&cpu_queue->q_lock);
														
 
															-	cpu = smp_processor_id();
														
 
															-	rctx->tag.cpu = smp_processor_id();
														
 
															-
														
 
															-	err = crypto_enqueue_request(&cpu_queue->queue, request);
														
 
															-	pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
														
 
															-		 cpu, cpu_queue, request);
														
 
															-	spin_unlock(&cpu_queue->q_lock);
														
 
															-	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Try to opportunisticlly flush the partially completed jobs if
														
 
															- * crypto daemon is the only task running.
														
 
															- */
														
 
															-static void mcryptd_opportunistic_flush(void)
														
 
															-{
														
 
															-	struct mcryptd_flush_list *flist;
														
 
															-	struct mcryptd_alg_cstate *cstate;
														
 
															-
														
 
															-	flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
														
 
															-	while (single_task_running()) {
														
 
															-		mutex_lock(&flist->lock);
														
 
															-		cstate = list_first_entry_or_null(&flist->list,
														
 
															-				struct mcryptd_alg_cstate, flush_list);
														
 
															-		if (!cstate || !cstate->flusher_engaged) {
														
 
															-			mutex_unlock(&flist->lock);
														
 
															-			return;
														
 
															-		}
														
 
															-		list_del(&cstate->flush_list);
														
 
															-		cstate->flusher_engaged = false;
														
 
															-		mutex_unlock(&flist->lock);
														
 
															-		cstate->alg_state->flusher(cstate);
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Called in workqueue context, do one real cryption work (via
														
 
															- * req->complete) and reschedule itself if there are more work to
														
 
															- * do.
														
 
															- */
														
 
															-static void mcryptd_queue_worker(struct work_struct *work)
														
 
															-{
														
 
															-	struct mcryptd_cpu_queue *cpu_queue;
														
 
															-	struct crypto_async_request *req, *backlog;
														
 
															-	int i;
														
 
															-
														
 
															-	/*
														
 
															-	 * Need to loop through more than once for multi-buffer to
														
 
															-	 * be effective.
														
 
															-	 */
														
 
															-
														
 
															-	cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
														
 
															-	i = 0;
														
 
															-	while (i < MCRYPTD_BATCH || single_task_running()) {
														
 
															-
														
 
															-		spin_lock_bh(&cpu_queue->q_lock);
														
 
															-		backlog = crypto_get_backlog(&cpu_queue->queue);
														
 
															-		req = crypto_dequeue_request(&cpu_queue->queue);
														
 
															-		spin_unlock_bh(&cpu_queue->q_lock);
														
 
															-
														
 
															-		if (!req) {
														
 
															-			mcryptd_opportunistic_flush();
														
 
															-			return;
														
 
															-		}
														
 
															-
														
 
															-		if (backlog)
														
 
															-			backlog->complete(backlog, -EINPROGRESS);
														
 
															-		req->complete(req, 0);
														
 
															-		if (!cpu_queue->queue.qlen)
														
 
															-			return;
														
 
															-		++i;
														
 
															-	}
														
 
															-	if (cpu_queue->queue.qlen)
														
 
															-		queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
														
 
															-}
														
 
															-
														
 
															-void mcryptd_flusher(struct work_struct *__work)
														
 
															-{
														
 
															-	struct	mcryptd_alg_cstate	*alg_cpu_state;
														
 
															-	struct	mcryptd_alg_state	*alg_state;
														
 
															-	struct	mcryptd_flush_list	*flist;
														
 
															-	int	cpu;
														
 
															-
														
 
															-	cpu = smp_processor_id();
														
 
															-	alg_cpu_state = container_of(to_delayed_work(__work),
														
 
															-				     struct mcryptd_alg_cstate, flush);
														
 
															-	alg_state = alg_cpu_state->alg_state;
														
 
															-	if (alg_cpu_state->cpu != cpu)
														
 
															-		pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
														
 
															-				cpu, alg_cpu_state->cpu);
														
 
															-
														
 
															-	if (alg_cpu_state->flusher_engaged) {
														
 
															-		flist = per_cpu_ptr(mcryptd_flist, cpu);
														
 
															-		mutex_lock(&flist->lock);
														
 
															-		list_del(&alg_cpu_state->flush_list);
														
 
															-		alg_cpu_state->flusher_engaged = false;
														
 
															-		mutex_unlock(&flist->lock);
														
 
															-		alg_state->flusher(alg_cpu_state);
														
 
															-	}
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(mcryptd_flusher);
														
 
															-
														
 
															-static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
														
 
															-	struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
														
 
															-
														
 
															-	return ictx->queue;
														
 
															-}
														
 
															-
														
 
															-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
														
 
															-				   unsigned int tail)
														
 
															-{
														
 
															-	char *p;
														
 
															-	struct crypto_instance *inst;
														
 
															-	int err;
														
 
															-
														
 
															-	p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
														
 
															-	if (!p)
														
 
															-		return ERR_PTR(-ENOMEM);
														
 
															-
														
 
															-	inst = (void *)(p + head);
														
 
															-
														
 
															-	err = -ENAMETOOLONG;
														
 
															-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
														
 
															-		    "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
														
 
															-		goto out_free_inst;
														
 
															-
														
 
															-	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
														
 
															-
														
 
															-	inst->alg.cra_priority = alg->cra_priority + 50;
														
 
															-	inst->alg.cra_blocksize = alg->cra_blocksize;
														
 
															-	inst->alg.cra_alignmask = alg->cra_alignmask;
														
 
															-
														
 
															-out:
														
 
															-	return p;
														
 
															-
														
 
															-out_free_inst:
														
 
															-	kfree(p);
														
 
															-	p = ERR_PTR(err);
														
 
															-	goto out;
														
 
															-}
														
 
															-
														
 
															-static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
														
 
															-					  u32 *mask)
														
 
															-{
														
 
															-	struct crypto_attr_type *algt;
														
 
															-
														
 
															-	algt = crypto_get_attr_type(tb);
														
 
															-	if (IS_ERR(algt))
														
 
															-		return false;
														
 
															-
														
 
															-	*type |= algt->type & CRYPTO_ALG_INTERNAL;
														
 
															-	*mask |= algt->mask & CRYPTO_ALG_INTERNAL;
														
 
															-
														
 
															-	if (*type & *mask & CRYPTO_ALG_INTERNAL)
														
 
															-		return true;
														
 
															-	else
														
 
															-		return false;
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
														
 
															-	struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
														
 
															-	struct crypto_ahash_spawn *spawn = &ictx->spawn;
														
 
															-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-	struct crypto_ahash *hash;
														
 
															-
														
 
															-	hash = crypto_spawn_ahash(spawn);
														
 
															-	if (IS_ERR(hash))
														
 
															-		return PTR_ERR(hash);
														
 
															-
														
 
															-	ctx->child = hash;
														
 
															-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															-				 sizeof(struct mcryptd_hash_request_ctx) +
														
 
															-				 crypto_ahash_reqsize(hash));
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	crypto_free_ahash(ctx->child);
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_setkey(struct crypto_ahash *parent,
														
 
															-				   const u8 *key, unsigned int keylen)
														
 
															-{
														
 
															-	struct mcryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
														
 
															-	struct crypto_ahash *child = ctx->child;
														
 
															-	int err;
														
 
															-
														
 
															-	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
														
 
															-	crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
														
 
															-				      CRYPTO_TFM_REQ_MASK);
														
 
															-	err = crypto_ahash_setkey(child, key, keylen);
														
 
															-	crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
														
 
															-				       CRYPTO_TFM_RES_MASK);
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_enqueue(struct ahash_request *req,
														
 
															-				crypto_completion_t complete)
														
 
															-{
														
 
															-	int ret;
														
 
															-
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															-	struct mcryptd_queue *queue =
														
 
															-		mcryptd_get_queue(crypto_ahash_tfm(tfm));
														
 
															-
														
 
															-	rctx->complete = req->base.complete;
														
 
															-	req->base.complete = complete;
														
 
															-
														
 
															-	ret = mcryptd_enqueue_request(queue, &req->base, rctx);
														
 
															-
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
														
 
															-{
														
 
															-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
														
 
															-	struct crypto_ahash *child = ctx->child;
														
 
															-	struct ahash_request *req = ahash_request_cast(req_async);
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-	struct ahash_request *desc = &rctx->areq;
														
 
															-
														
 
															-	if (unlikely(err == -EINPROGRESS))
														
 
															-		goto out;
														
 
															-
														
 
															-	ahash_request_set_tfm(desc, child);
														
 
															-	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															-						rctx->complete, req_async);
														
 
															-
														
 
															-	rctx->out = req->result;
														
 
															-	err = crypto_ahash_init(desc);
														
 
															-
														
 
															-out:
														
 
															-	local_bh_disable();
														
 
															-	rctx->complete(&req->base, err);
														
 
															-	local_bh_enable();
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_init_enqueue(struct ahash_request *req)
														
 
															-{
														
 
															-	return mcryptd_hash_enqueue(req, mcryptd_hash_init);
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
														
 
															-{
														
 
															-	struct ahash_request *req = ahash_request_cast(req_async);
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-
														
 
															-	if (unlikely(err == -EINPROGRESS))
														
 
															-		goto out;
														
 
															-
														
 
															-	rctx->out = req->result;
														
 
															-	err = crypto_ahash_update(&rctx->areq);
														
 
															-	if (err) {
														
 
															-		req->base.complete = rctx->complete;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	return;
														
 
															-out:
														
 
															-	local_bh_disable();
														
 
															-	rctx->complete(&req->base, err);
														
 
															-	local_bh_enable();
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_update_enqueue(struct ahash_request *req)
														
 
															-{
														
 
															-	return mcryptd_hash_enqueue(req, mcryptd_hash_update);
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
														
 
															-{
														
 
															-	struct ahash_request *req = ahash_request_cast(req_async);
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-
														
 
															-	if (unlikely(err == -EINPROGRESS))
														
 
															-		goto out;
														
 
															-
														
 
															-	rctx->out = req->result;
														
 
															-	err = crypto_ahash_final(&rctx->areq);
														
 
															-	if (err) {
														
 
															-		req->base.complete = rctx->complete;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	return;
														
 
															-out:
														
 
															-	local_bh_disable();
														
 
															-	rctx->complete(&req->base, err);
														
 
															-	local_bh_enable();
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_final_enqueue(struct ahash_request *req)
														
 
															-{
														
 
															-	return mcryptd_hash_enqueue(req, mcryptd_hash_final);
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
														
 
															-{
														
 
															-	struct ahash_request *req = ahash_request_cast(req_async);
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-
														
 
															-	if (unlikely(err == -EINPROGRESS))
														
 
															-		goto out;
														
 
															-	rctx->out = req->result;
														
 
															-	err = crypto_ahash_finup(&rctx->areq);
														
 
															-
														
 
															-	if (err) {
														
 
															-		req->base.complete = rctx->complete;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	return;
														
 
															-out:
														
 
															-	local_bh_disable();
														
 
															-	rctx->complete(&req->base, err);
														
 
															-	local_bh_enable();
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
														
 
															-{
														
 
															-	return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
														
 
															-{
														
 
															-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
														
 
															-	struct crypto_ahash *child = ctx->child;
														
 
															-	struct ahash_request *req = ahash_request_cast(req_async);
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-	struct ahash_request *desc = &rctx->areq;
														
 
															-
														
 
															-	if (unlikely(err == -EINPROGRESS))
														
 
															-		goto out;
														
 
															-
														
 
															-	ahash_request_set_tfm(desc, child);
														
 
															-	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
														
 
															-						rctx->complete, req_async);
														
 
															-
														
 
															-	rctx->out = req->result;
														
 
															-	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
														
 
															-
														
 
															-out:
														
 
															-	local_bh_disable();
														
 
															-	rctx->complete(&req->base, err);
														
 
															-	local_bh_enable();
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
														
 
															-{
														
 
															-	return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_export(struct ahash_request *req, void *out)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-
														
 
															-	return crypto_ahash_export(&rctx->areq, out);
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_hash_import(struct ahash_request *req, const void *in)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-
														
 
															-	return crypto_ahash_import(&rctx->areq, in);
														
 
															-}
														
 
															-
														
 
															-static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
														
 
															-			      struct mcryptd_queue *queue)
														
 
															-{
														
 
															-	struct hashd_instance_ctx *ctx;
														
 
															-	struct ahash_instance *inst;
														
 
															-	struct hash_alg_common *halg;
														
 
															-	struct crypto_alg *alg;
														
 
															-	u32 type = 0;
														
 
															-	u32 mask = 0;
														
 
															-	int err;
														
 
															-
														
 
															-	if (!mcryptd_check_internal(tb, &type, &mask))
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	halg = ahash_attr_alg(tb[1], type, mask);
														
 
															-	if (IS_ERR(halg))
														
 
															-		return PTR_ERR(halg);
														
 
															-
														
 
															-	alg = &halg->base;
														
 
															-	pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
														
 
															-	inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
														
 
															-					sizeof(*ctx));
														
 
															-	err = PTR_ERR(inst);
														
 
															-	if (IS_ERR(inst))
														
 
															-		goto out_put_alg;
														
 
															-
														
 
															-	ctx = ahash_instance_ctx(inst);
														
 
															-	ctx->queue = queue;
														
 
															-
														
 
															-	err = crypto_init_ahash_spawn(&ctx->spawn, halg,
														
 
															-				      ahash_crypto_instance(inst));
														
 
															-	if (err)
														
 
															-		goto out_free_inst;
														
 
															-
														
 
															-	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
														
 
															-		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
														
 
															-				   CRYPTO_ALG_OPTIONAL_KEY));
														
 
															-
														
 
															-	inst->alg.halg.digestsize = halg->digestsize;
														
 
															-	inst->alg.halg.statesize = halg->statesize;
														
 
															-	inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
														
 
															-
														
 
															-	inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
														
 
															-	inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
														
 
															-
														
 
															-	inst->alg.init   = mcryptd_hash_init_enqueue;
														
 
															-	inst->alg.update = mcryptd_hash_update_enqueue;
														
 
															-	inst->alg.final  = mcryptd_hash_final_enqueue;
														
 
															-	inst->alg.finup  = mcryptd_hash_finup_enqueue;
														
 
															-	inst->alg.export = mcryptd_hash_export;
														
 
															-	inst->alg.import = mcryptd_hash_import;
														
 
															-	if (crypto_hash_alg_has_setkey(halg))
														
 
															-		inst->alg.setkey = mcryptd_hash_setkey;
														
 
															-	inst->alg.digest = mcryptd_hash_digest_enqueue;
														
 
															-
														
 
															-	err = ahash_register_instance(tmpl, inst);
														
 
															-	if (err) {
														
 
															-		crypto_drop_ahash(&ctx->spawn);
														
 
															-out_free_inst:
														
 
															-		kfree(inst);
														
 
															-	}
														
 
															-
														
 
															-out_put_alg:
														
 
															-	crypto_mod_put(alg);
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static struct mcryptd_queue mqueue;
														
 
															-
														
 
															-static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
														
 
															-{
														
 
															-	struct crypto_attr_type *algt;
														
 
															-
														
 
															-	algt = crypto_get_attr_type(tb);
														
 
															-	if (IS_ERR(algt))
														
 
															-		return PTR_ERR(algt);
														
 
															-
														
 
															-	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
														
 
															-	case CRYPTO_ALG_TYPE_DIGEST:
														
 
															-		return mcryptd_create_hash(tmpl, tb, &mqueue);
														
 
															-	break;
														
 
															-	}
														
 
															-
														
 
															-	return -EINVAL;
														
 
															-}
														
 
															-
														
 
															-static void mcryptd_free(struct crypto_instance *inst)
														
 
															-{
														
 
															-	struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
														
 
															-	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
														
 
															-
														
 
															-	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
														
 
															-	case CRYPTO_ALG_TYPE_AHASH:
														
 
															-		crypto_drop_ahash(&hctx->spawn);
														
 
															-		kfree(ahash_instance(inst));
														
 
															-		return;
														
 
															-	default:
														
 
															-		crypto_drop_spawn(&ctx->spawn);
														
 
															-		kfree(inst);
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static struct crypto_template mcryptd_tmpl = {
														
 
															-	.name = "mcryptd",
														
 
															-	.create = mcryptd_create,
														
 
															-	.free = mcryptd_free,
														
 
															-	.module = THIS_MODULE,
														
 
															-};
														
 
															-
														
 
															-struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
														
 
															-					u32 type, u32 mask)
														
 
															-{
														
 
															-	char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
														
 
															-	struct crypto_ahash *tfm;
														
 
															-
														
 
															-	if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
														
 
															-		     "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
														
 
															-		return ERR_PTR(-EINVAL);
														
 
															-	tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
														
 
															-	if (IS_ERR(tfm))
														
 
															-		return ERR_CAST(tfm);
														
 
															-	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
														
 
															-		crypto_free_ahash(tfm);
														
 
															-		return ERR_PTR(-EINVAL);
														
 
															-	}
														
 
															-
														
 
															-	return __mcryptd_ahash_cast(tfm);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
														
 
															-
														
 
															-struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
														
 
															-{
														
 
															-	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
														
 
															-
														
 
															-	return ctx->child;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
														
 
															-
														
 
															-struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
														
 
															-{
														
 
															-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
														
 
															-	return &rctx->areq;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
														
 
															-
														
 
															-void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
														
 
															-{
														
 
															-	crypto_free_ahash(&tfm->base);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
														
 
															-
														
 
															-static int __init mcryptd_init(void)
														
 
															-{
														
 
															-	int err, cpu;
														
 
															-	struct mcryptd_flush_list *flist;
														
 
															-
														
 
															-	mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
														
 
															-	for_each_possible_cpu(cpu) {
														
 
															-		flist = per_cpu_ptr(mcryptd_flist, cpu);
														
 
															-		INIT_LIST_HEAD(&flist->list);
														
 
															-		mutex_init(&flist->lock);
														
 
															-	}
														
 
															-
														
 
															-	err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
														
 
															-	if (err) {
														
 
															-		free_percpu(mcryptd_flist);
														
 
															-		return err;
														
 
															-	}
														
 
															-
														
 
															-	err = crypto_register_template(&mcryptd_tmpl);
														
 
															-	if (err) {
														
 
															-		mcryptd_fini_queue(&mqueue);
														
 
															-		free_percpu(mcryptd_flist);
														
 
															-	}
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static void __exit mcryptd_exit(void)
														
 
															-{
														
 
															-	mcryptd_fini_queue(&mqueue);
														
 
															-	crypto_unregister_template(&mcryptd_tmpl);
														
 
															-	free_percpu(mcryptd_flist);
														
 
															-}
														
 
															-
														
 
															-subsys_initcall(mcryptd_init);
														
 
															-module_exit(mcryptd_exit);
														
 
															-
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
														
 
															-MODULE_ALIAS_CRYPTO("mcryptd");
														
--- a/crypto/morus1280.c
+++ b/crypto/morus1280.c
@@ -385,14 +385,11 @@ static void crypto_morus1280_final(struct morus1280_state *state,
 
															 				   struct morus1280_block *tag_xor,
														
 
															 				   u64 assoclen, u64 cryptlen)
														
 
															 {
														
 
															-	u64 assocbits = assoclen * 8;
														
 
															-	u64 cryptbits = cryptlen * 8;
														
 
															-
														
 
															 	struct morus1280_block tmp;
														
 
															 	unsigned int i;
														
 
															-	tmp.words[0] = cpu_to_le64(assocbits);
														
 
															-	tmp.words[1] = cpu_to_le64(cryptbits);
														
 
															+	tmp.words[0] = assoclen * 8;
														
 
															+	tmp.words[1] = cryptlen * 8;
														
 
															 	tmp.words[2] = 0;
														
 
															 	tmp.words[3] = 0;
														
--- a/crypto/morus640.c
+++ b/crypto/morus640.c
@@ -384,21 +384,13 @@ static void crypto_morus640_final(struct morus640_state *state,
 
															 				  struct morus640_block *tag_xor,
														
 
															 				  u64 assoclen, u64 cryptlen)
														
 
															 {
														
 
															-	u64 assocbits = assoclen * 8;
														
 
															-	u64 cryptbits = cryptlen * 8;
														
 
															-
														
 
															-	u32 assocbits_lo = (u32)assocbits;
														
 
															-	u32 assocbits_hi = (u32)(assocbits >> 32);
														
 
															-	u32 cryptbits_lo = (u32)cryptbits;
														
 
															-	u32 cryptbits_hi = (u32)(cryptbits >> 32);
														
 
															-
														
 
															 	struct morus640_block tmp;
														
 
															 	unsigned int i;
														
 
															-	tmp.words[0] = cpu_to_le32(assocbits_lo);
														
 
															-	tmp.words[1] = cpu_to_le32(assocbits_hi);
														
 
															-	tmp.words[2] = cpu_to_le32(cryptbits_lo);
														
 
															-	tmp.words[3] = cpu_to_le32(cryptbits_hi);
														
 
															+	tmp.words[0] = lower_32_bits(assoclen * 8);
														
 
															+	tmp.words[1] = upper_32_bits(assoclen * 8);
														
 
															+	tmp.words[2] = lower_32_bits(cryptlen * 8);
														
 
															+	tmp.words[3] = upper_32_bits(cryptlen * 8);
														
 
															 	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
														
 
															 		state->s[4].words[i] ^= state->s[0].words[i];
														
--- a/crypto/ofb.c
+++ b/crypto/ofb.c
@@ -0,0 +1,225 @@
 
															+// SPDX-License-Identifier: GPL-2.0
														
 
															+
														
 
															+/*
														
 
															+ * OFB: Output FeedBack mode
														
 
															+ *
														
 
															+ * Copyright (C) 2018 ARM Limited or its affiliates.
														
 
															+ * All rights reserved.
														
 
															+ *
														
 
															+ * Based loosely on public domain code gleaned from libtomcrypt
														
 
															+ * (https://github.com/libtom/libtomcrypt).
														
 
															+ */
														
 
															+
														
 
															+#include <crypto/algapi.h>
														
 
															+#include <crypto/internal/skcipher.h>
														
 
															+#include <linux/err.h>
														
 
															+#include <linux/init.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/slab.h>
														
 
															+
														
 
															+struct crypto_ofb_ctx {
														
 
															+	struct crypto_cipher *child;
														
 
															+	int cnt;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+static int crypto_ofb_setkey(struct crypto_skcipher *parent, const u8 *key,
														
 
															+			     unsigned int keylen)
														
 
															+{
														
 
															+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(parent);
														
 
															+	struct crypto_cipher *child = ctx->child;
														
 
															+	int err;
														
 
															+
														
 
															+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
														
 
															+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
														
 
															+				       CRYPTO_TFM_REQ_MASK);
														
 
															+	err = crypto_cipher_setkey(child, key, keylen);
														
 
															+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
														
 
															+				  CRYPTO_TFM_RES_MASK);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int crypto_ofb_encrypt_segment(struct crypto_ofb_ctx *ctx,
														
 
															+				      struct skcipher_walk *walk,
														
 
															+				      struct crypto_cipher *tfm)
														
 
															+{
														
 
															+	int bsize = crypto_cipher_blocksize(tfm);
														
 
															+	int nbytes = walk->nbytes;
														
 
															+
														
 
															+	u8 *src = walk->src.virt.addr;
														
 
															+	u8 *dst = walk->dst.virt.addr;
														
 
															+	u8 *iv = walk->iv;
														
 
															+
														
 
															+	do {
														
 
															+		if (ctx->cnt == bsize) {
														
 
															+			if (nbytes < bsize)
														
 
															+				break;
														
 
															+			crypto_cipher_encrypt_one(tfm, iv, iv);
														
 
															+			ctx->cnt = 0;
														
 
															+		}
														
 
															+		*dst = *src ^ iv[ctx->cnt];
														
 
															+		src++;
														
 
															+		dst++;
														
 
															+		ctx->cnt++;
														
 
															+	} while (--nbytes);
														
 
															+	return nbytes;
														
 
															+}
														
 
															+
														
 
															+static int crypto_ofb_encrypt(struct skcipher_request *req)
														
 
															+{
														
 
															+	struct skcipher_walk walk;
														
 
															+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
														
 
															+	unsigned int bsize;
														
 
															+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															+	struct crypto_cipher *child = ctx->child;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	bsize =  crypto_cipher_blocksize(child);
														
 
															+	ctx->cnt = bsize;
														
 
															+
														
 
															+	ret = skcipher_walk_virt(&walk, req, false);
														
 
															+
														
 
															+	while (walk.nbytes) {
														
 
															+		ret = crypto_ofb_encrypt_segment(ctx, &walk, child);
														
 
															+		ret = skcipher_walk_done(&walk, ret);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* OFB encrypt and decrypt are identical */
														
 
															+static int crypto_ofb_decrypt(struct skcipher_request *req)
														
 
															+{
														
 
															+	return crypto_ofb_encrypt(req);
														
 
															+}
														
 
															+
														
 
															+static int crypto_ofb_init_tfm(struct crypto_skcipher *tfm)
														
 
															+{
														
 
															+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
														
 
															+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
														
 
															+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															+	struct crypto_cipher *cipher;
														
 
															+
														
 
															+	cipher = crypto_spawn_cipher(spawn);
														
 
															+	if (IS_ERR(cipher))
														
 
															+		return PTR_ERR(cipher);
														
 
															+
														
 
															+	ctx->child = cipher;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void crypto_ofb_exit_tfm(struct crypto_skcipher *tfm)
														
 
															+{
														
 
															+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
														
 
															+
														
 
															+	crypto_free_cipher(ctx->child);
														
 
															+}
														
 
															+
														
 
															+static void crypto_ofb_free(struct skcipher_instance *inst)
														
 
															+{
														
 
															+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
														
 
															+	kfree(inst);
														
 
															+}
														
 
															+
														
 
															+static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
														
 
															+{
														
 
															+	struct skcipher_instance *inst;
														
 
															+	struct crypto_attr_type *algt;
														
 
															+	struct crypto_spawn *spawn;
														
 
															+	struct crypto_alg *alg;
														
 
															+	u32 mask;
														
 
															+	int err;
														
 
															+
														
 
															+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
														
 
															+	if (!inst)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	algt = crypto_get_attr_type(tb);
														
 
															+	err = PTR_ERR(algt);
														
 
															+	if (IS_ERR(algt))
														
 
															+		goto err_free_inst;
														
 
															+
														
 
															+	mask = CRYPTO_ALG_TYPE_MASK |
														
 
															+		crypto_requires_off(algt->type, algt->mask,
														
 
															+				    CRYPTO_ALG_NEED_FALLBACK);
														
 
															+
														
 
															+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
														
 
															+	err = PTR_ERR(alg);
														
 
															+	if (IS_ERR(alg))
														
 
															+		goto err_free_inst;
														
 
															+
														
 
															+	spawn = skcipher_instance_ctx(inst);
														
 
															+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
														
 
															+				CRYPTO_ALG_TYPE_MASK);
														
 
															+	crypto_mod_put(alg);
														
 
															+	if (err)
														
 
															+		goto err_free_inst;
														
 
															+
														
 
															+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "ofb", alg);
														
 
															+	if (err)
														
 
															+		goto err_drop_spawn;
														
 
															+
														
 
															+	inst->alg.base.cra_priority = alg->cra_priority;
														
 
															+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
														
 
															+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
														
 
															+
														
 
															+	/* We access the data as u32s when xoring. */
														
 
															+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
														
 
															+
														
 
															+	inst->alg.ivsize = alg->cra_blocksize;
														
 
															+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
														
 
															+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
														
 
															+
														
 
															+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_ofb_ctx);
														
 
															+
														
 
															+	inst->alg.init = crypto_ofb_init_tfm;
														
 
															+	inst->alg.exit = crypto_ofb_exit_tfm;
														
 
															+
														
 
															+	inst->alg.setkey = crypto_ofb_setkey;
														
 
															+	inst->alg.encrypt = crypto_ofb_encrypt;
														
 
															+	inst->alg.decrypt = crypto_ofb_decrypt;
														
 
															+
														
 
															+	inst->free = crypto_ofb_free;
														
 
															+
														
 
															+	err = skcipher_register_instance(tmpl, inst);
														
 
															+	if (err)
														
 
															+		goto err_drop_spawn;
														
 
															+
														
 
															+out:
														
 
															+	return err;
														
 
															+
														
 
															+err_drop_spawn:
														
 
															+	crypto_drop_spawn(spawn);
														
 
															+err_free_inst:
														
 
															+	kfree(inst);
														
 
															+	goto out;
														
 
															+}
														
 
															+
														
 
															+static struct crypto_template crypto_ofb_tmpl = {
														
 
															+	.name = "ofb",
														
 
															+	.create = crypto_ofb_create,
														
 
															+	.module = THIS_MODULE,
														
 
															+};
														
 
															+
														
 
															+static int __init crypto_ofb_module_init(void)
														
 
															+{
														
 
															+	return crypto_register_template(&crypto_ofb_tmpl);
														
 
															+}
														
 
															+
														
 
															+static void __exit crypto_ofb_module_exit(void)
														
 
															+{
														
 
															+	crypto_unregister_template(&crypto_ofb_tmpl);
														
 
															+}
														
 
															+
														
 
															+module_init(crypto_ofb_module_init);
														
 
															+module_exit(crypto_ofb_module_exit);
														
 
															+
														
 
															+MODULE_LICENSE("GPL");
														
 
															+MODULE_DESCRIPTION("OFB block cipher algorithm");
														
 
															+MODULE_ALIAS_CRYPTO("ofb");
														
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -50,6 +50,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 
															 	}
														
 
															 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
														
 
															+	crypto_stat_rng_seed(tfm, err);
														
 
															 out:
														
 
															 	kzfree(buf);
														
 
															 	return err;
														
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
 
															 	pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
														
 
															 			ctx->key_size - 1 - req->src_len, req->src);
														
 
															-	req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
														
 
															-	if (!req_ctx->out_buf) {
														
 
															-		kfree(req_ctx->in_buf);
														
 
															-		return -ENOMEM;
														
 
															-	}
														
 
															-
														
 
															-	pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf,
														
 
															-			ctx->key_size, NULL);
														
 
															-
														
 
															 	akcipher_request_set_tfm(&req_ctx->child_req, ctx->child);
														
 
															 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
														
 
															 			pkcs1pad_encrypt_sign_complete_cb, req);
														
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -73,9 +73,9 @@ static int seqiv_aead_encrypt(struct aead_request *req)
 
															 	info = req->iv;
														
 
															 	if (req->src != req->dst) {
														
 
															-		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
														
 
															+		SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
														
 
															-		skcipher_request_set_tfm(nreq, ctx->sknull);
														
 
															+		skcipher_request_set_sync_tfm(nreq, ctx->sknull);
														
 
															 		skcipher_request_set_callback(nreq, req->base.flags,
														
 
															 					      NULL, NULL);
														
 
															 		skcipher_request_set_crypt(nreq, req->src, req->dst,
														
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -73,13 +73,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(crypto_shash_setkey);
														
 
															-static inline unsigned int shash_align_buffer_size(unsigned len,
														
 
															-						   unsigned long mask)
														
 
															-{
														
 
															-	typedef u8 __aligned_largest u8_aligned;
														
 
															-	return len + (mask & ~(__alignof__(u8_aligned) - 1));
														
 
															-}
														
 
															-
														
 
															 static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
														
 
															 				  unsigned int len)
														
 
															 {
														
@@ -88,11 +81,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 
															 	unsigned long alignmask = crypto_shash_alignmask(tfm);
														
 
															 	unsigned int unaligned_len = alignmask + 1 -
														
 
															 				     ((unsigned long)data & alignmask);
														
 
															-	u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
														
 
															-		__aligned_largest;
														
 
															+	/*
														
 
															+	 * We cannot count on __aligned() working for large values:
														
 
															+	 * https://patchwork.kernel.org/patch/9507697/
														
 
															+	 */
														
 
															+	u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
														
 
															 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
														
 
															 	int err;
														
 
															+	if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															 	if (unaligned_len > len)
														
 
															 		unaligned_len = len;
														
@@ -124,11 +123,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
 
															 	unsigned long alignmask = crypto_shash_alignmask(tfm);
														
 
															 	struct shash_alg *shash = crypto_shash_alg(tfm);
														
 
															 	unsigned int ds = crypto_shash_digestsize(tfm);
														
 
															-	u8 ubuf[shash_align_buffer_size(ds, alignmask)]
														
 
															-		__aligned_largest;
														
 
															+	/*
														
 
															+	 * We cannot count on __aligned() working for large values:
														
 
															+	 * https://patchwork.kernel.org/patch/9507697/
														
 
															+	 */
														
 
															+	u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
														
 
															 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
														
 
															 	int err;
														
 
															+	if (WARN_ON(buf + ds > ubuf + sizeof(ubuf)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															 	err = shash->final(desc, buf);
														
 
															 	if (err)
														
 
															 		goto out;
														
@@ -458,9 +463,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
 
															 {
														
 
															 	struct crypto_alg *base = &alg->base;
														
 
															-	if (alg->digestsize > PAGE_SIZE / 8 ||
														
 
															-	    alg->descsize > PAGE_SIZE / 8 ||
														
 
															-	    alg->statesize > PAGE_SIZE / 8)
														
 
															+	if (alg->digestsize > HASH_MAX_DIGESTSIZE ||
														
 
															+	    alg->descsize > HASH_MAX_DESCSIZE ||
														
 
															+	    alg->statesize > HASH_MAX_STATESIZE)
														
 
															 		return -EINVAL;
														
 
															 	base->cra_type = &crypto_shash_type;
														
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -949,6 +949,30 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
														
 
															+struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
														
 
															+				const char *alg_name, u32 type, u32 mask)
														
 
															+{
														
 
															+	struct crypto_skcipher *tfm;
														
 
															+
														
 
															+	/* Only sync algorithms allowed. */
														
 
															+	mask |= CRYPTO_ALG_ASYNC;
														
 
															+
														
 
															+	tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
														
 
															+
														
 
															+	/*
														
 
															+	 * Make sure we do not allocate something that might get used with
														
 
															+	 * an on-stack request: check the request size.
														
 
															+	 */
														
 
															+	if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) >
														
 
															+				    MAX_SYNC_SKCIPHER_REQSIZE)) {
														
 
															+		crypto_free_skcipher(tfm);
														
 
															+		return ERR_PTR(-EINVAL);
														
 
															+	}
														
 
															+
														
 
															+	return (struct crypto_sync_skcipher *)tfm;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher);
														
 
															+
														
 
															 int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
														
 
															 {
														
 
															 	return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
														
--- a/crypto/speck.c
+++ b/crypto/speck.c
@@ -1,307 +0,0 @@
 
															-// SPDX-License-Identifier: GPL-2.0
														
 
															-/*
														
 
															- * Speck: a lightweight block cipher
														
 
															- *
														
 
															- * Copyright (c) 2018 Google, Inc
														
 
															- *
														
 
															- * Speck has 10 variants, including 5 block sizes.  For now we only implement
														
 
															- * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
														
 
															- * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
														
 
															- * and a key size of K bits.  The Speck128 variants are believed to be the most
														
 
															- * secure variants, and they use the same block size and key sizes as AES.  The
														
 
															- * Speck64 variants are less secure, but on 32-bit processors are usually
														
 
															- * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
														
 
															- * secure and/or not as well suited for implementation on either 32-bit or
														
 
															- * 64-bit processors, so are omitted.
														
 
															- *
														
 
															- * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
														
 
															- * https://eprint.iacr.org/2013/404.pdf
														
 
															- *
														
 
															- * In a correspondence, the Speck designers have also clarified that the words
														
 
															- * should be interpreted in little-endian format, and the words should be
														
 
															- * ordered such that the first word of each block is 'y' rather than 'x', and
														
 
															- * the first key word (rather than the last) becomes the first round key.
														
 
															- */
														
 
															-
														
 
															-#include <asm/unaligned.h>
														
 
															-#include <crypto/speck.h>
														
 
															-#include <linux/bitops.h>
														
 
															-#include <linux/crypto.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/module.h>
														
 
															-
														
 
															-/* Speck128 */
														
 
															-
														
 
															-static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
														
 
															-{
														
 
															-	*x = ror64(*x, 8);
														
 
															-	*x += *y;
														
 
															-	*x ^= k;
														
 
															-	*y = rol64(*y, 3);
														
 
															-	*y ^= *x;
														
 
															-}
														
 
															-
														
 
															-static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
														
 
															-{
														
 
															-	*y ^= *x;
														
 
															-	*y = ror64(*y, 3);
														
 
															-	*x ^= k;
														
 
															-	*x -= *y;
														
 
															-	*x = rol64(*x, 8);
														
 
															-}
														
 
															-
														
 
															-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
														
 
															-			     u8 *out, const u8 *in)
														
 
															-{
														
 
															-	u64 y = get_unaligned_le64(in);
														
 
															-	u64 x = get_unaligned_le64(in + 8);
														
 
															-	int i;
														
 
															-
														
 
															-	for (i = 0; i < ctx->nrounds; i++)
														
 
															-		speck128_round(&x, &y, ctx->round_keys[i]);
														
 
															-
														
 
															-	put_unaligned_le64(y, out);
														
 
															-	put_unaligned_le64(x, out + 8);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
														
 
															-
														
 
															-static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
														
 
															-{
														
 
															-	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
														
 
															-}
														
 
															-
														
 
															-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
														
 
															-			     u8 *out, const u8 *in)
														
 
															-{
														
 
															-	u64 y = get_unaligned_le64(in);
														
 
															-	u64 x = get_unaligned_le64(in + 8);
														
 
															-	int i;
														
 
															-
														
 
															-	for (i = ctx->nrounds - 1; i >= 0; i--)
														
 
															-		speck128_unround(&x, &y, ctx->round_keys[i]);
														
 
															-
														
 
															-	put_unaligned_le64(y, out);
														
 
															-	put_unaligned_le64(x, out + 8);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
														
 
															-
														
 
															-static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
														
 
															-{
														
 
															-	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
														
 
															-}
														
 
															-
														
 
															-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
														
 
															-			   unsigned int keylen)
														
 
															-{
														
 
															-	u64 l[3];
														
 
															-	u64 k;
														
 
															-	int i;
														
 
															-
														
 
															-	switch (keylen) {
														
 
															-	case SPECK128_128_KEY_SIZE:
														
 
															-		k = get_unaligned_le64(key);
														
 
															-		l[0] = get_unaligned_le64(key + 8);
														
 
															-		ctx->nrounds = SPECK128_128_NROUNDS;
														
 
															-		for (i = 0; i < ctx->nrounds; i++) {
														
 
															-			ctx->round_keys[i] = k;
														
 
															-			speck128_round(&l[0], &k, i);
														
 
															-		}
														
 
															-		break;
														
 
															-	case SPECK128_192_KEY_SIZE:
														
 
															-		k = get_unaligned_le64(key);
														
 
															-		l[0] = get_unaligned_le64(key + 8);
														
 
															-		l[1] = get_unaligned_le64(key + 16);
														
 
															-		ctx->nrounds = SPECK128_192_NROUNDS;
														
 
															-		for (i = 0; i < ctx->nrounds; i++) {
														
 
															-			ctx->round_keys[i] = k;
														
 
															-			speck128_round(&l[i % 2], &k, i);
														
 
															-		}
														
 
															-		break;
														
 
															-	case SPECK128_256_KEY_SIZE:
														
 
															-		k = get_unaligned_le64(key);
														
 
															-		l[0] = get_unaligned_le64(key + 8);
														
 
															-		l[1] = get_unaligned_le64(key + 16);
														
 
															-		l[2] = get_unaligned_le64(key + 24);
														
 
															-		ctx->nrounds = SPECK128_256_NROUNDS;
														
 
															-		for (i = 0; i < ctx->nrounds; i++) {
														
 
															-			ctx->round_keys[i] = k;
														
 
															-			speck128_round(&l[i % 3], &k, i);
														
 
															-		}
														
 
															-		break;
														
 
															-	default:
														
 
															-		return -EINVAL;
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
														
 
															-
														
 
															-static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
														
 
															-			   unsigned int keylen)
														
 
															-{
														
 
															-	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
														
 
															-}
														
 
															-
														
 
															-/* Speck64 */
														
 
															-
														
 
															-static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
														
 
															-{
														
 
															-	*x = ror32(*x, 8);
														
 
															-	*x += *y;
														
 
															-	*x ^= k;
														
 
															-	*y = rol32(*y, 3);
														
 
															-	*y ^= *x;
														
 
															-}
														
 
															-
														
 
															-static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
														
 
															-{
														
 
															-	*y ^= *x;
														
 
															-	*y = ror32(*y, 3);
														
 
															-	*x ^= k;
														
 
															-	*x -= *y;
														
 
															-	*x = rol32(*x, 8);
														
 
															-}
														
 
															-
														
 
															-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
														
 
															-			    u8 *out, const u8 *in)
														
 
															-{
														
 
															-	u32 y = get_unaligned_le32(in);
														
 
															-	u32 x = get_unaligned_le32(in + 4);
														
 
															-	int i;
														
 
															-
														
 
															-	for (i = 0; i < ctx->nrounds; i++)
														
 
															-		speck64_round(&x, &y, ctx->round_keys[i]);
														
 
															-
														
 
															-	put_unaligned_le32(y, out);
														
 
															-	put_unaligned_le32(x, out + 4);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
														
 
															-
														
 
															-static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
														
 
															-{
														
 
															-	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
														
 
															-}
														
 
															-
														
 
															-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
														
 
															-			    u8 *out, const u8 *in)
														
 
															-{
														
 
															-	u32 y = get_unaligned_le32(in);
														
 
															-	u32 x = get_unaligned_le32(in + 4);
														
 
															-	int i;
														
 
															-
														
 
															-	for (i = ctx->nrounds - 1; i >= 0; i--)
														
 
															-		speck64_unround(&x, &y, ctx->round_keys[i]);
														
 
															-
														
 
															-	put_unaligned_le32(y, out);
														
 
															-	put_unaligned_le32(x, out + 4);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
														
 
															-
														
 
															-static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
														
 
															-{
														
 
															-	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
														
 
															-}
														
 
															-
														
 
															-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
														
 
															-			  unsigned int keylen)
														
 
															-{
														
 
															-	u32 l[3];
														
 
															-	u32 k;
														
 
															-	int i;
														
 
															-
														
 
															-	switch (keylen) {
														
 
															-	case SPECK64_96_KEY_SIZE:
														
 
															-		k = get_unaligned_le32(key);
														
 
															-		l[0] = get_unaligned_le32(key + 4);
														
 
															-		l[1] = get_unaligned_le32(key + 8);
														
 
															-		ctx->nrounds = SPECK64_96_NROUNDS;
														
 
															-		for (i = 0; i < ctx->nrounds; i++) {
														
 
															-			ctx->round_keys[i] = k;
														
 
															-			speck64_round(&l[i % 2], &k, i);
														
 
															-		}
														
 
															-		break;
														
 
															-	case SPECK64_128_KEY_SIZE:
														
 
															-		k = get_unaligned_le32(key);
														
 
															-		l[0] = get_unaligned_le32(key + 4);
														
 
															-		l[1] = get_unaligned_le32(key + 8);
														
 
															-		l[2] = get_unaligned_le32(key + 12);
														
 
															-		ctx->nrounds = SPECK64_128_NROUNDS;
														
 
															-		for (i = 0; i < ctx->nrounds; i++) {
														
 
															-			ctx->round_keys[i] = k;
														
 
															-			speck64_round(&l[i % 3], &k, i);
														
 
															-		}
														
 
															-		break;
														
 
															-	default:
														
 
															-		return -EINVAL;
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
														
 
															-
														
 
															-static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
														
 
															-			  unsigned int keylen)
														
 
															-{
														
 
															-	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
														
 
															-}
														
 
															-
														
 
															-/* Algorithm definitions */
														
 
															-
														
 
															-static struct crypto_alg speck_algs[] = {
														
 
															-	{
														
 
															-		.cra_name		= "speck128",
														
 
															-		.cra_driver_name	= "speck128-generic",
														
 
															-		.cra_priority		= 100,
														
 
															-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
														
 
															-		.cra_blocksize		= SPECK128_BLOCK_SIZE,
														
 
															-		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
														
 
															-		.cra_module		= THIS_MODULE,
														
 
															-		.cra_u			= {
														
 
															-			.cipher = {
														
 
															-				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
														
 
															-				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
														
 
															-				.cia_setkey		= speck128_setkey,
														
 
															-				.cia_encrypt		= speck128_encrypt,
														
 
															-				.cia_decrypt		= speck128_decrypt
														
 
															-			}
														
 
															-		}
														
 
															-	}, {
														
 
															-		.cra_name		= "speck64",
														
 
															-		.cra_driver_name	= "speck64-generic",
														
 
															-		.cra_priority		= 100,
														
 
															-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
														
 
															-		.cra_blocksize		= SPECK64_BLOCK_SIZE,
														
 
															-		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
														
 
															-		.cra_module		= THIS_MODULE,
														
 
															-		.cra_u			= {
														
 
															-			.cipher = {
														
 
															-				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
														
 
															-				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
														
 
															-				.cia_setkey		= speck64_setkey,
														
 
															-				.cia_encrypt		= speck64_encrypt,
														
 
															-				.cia_decrypt		= speck64_decrypt
														
 
															-			}
														
 
															-		}
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-static int __init speck_module_init(void)
														
 
															-{
														
 
															-	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
														
 
															-}
														
 
															-
														
 
															-static void __exit speck_module_exit(void)
														
 
															-{
														
 
															-	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
														
 
															-}
														
 
															-
														
 
															-module_init(speck_module_init);
														
 
															-module_exit(speck_module_exit);
														
 
															-
														
 
															-MODULE_DESCRIPTION("Speck block cipher (generic)");
														
 
															-MODULE_LICENSE("GPL");
														
 
															-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
														
 
															-MODULE_ALIAS_CRYPTO("speck128");
														
 
															-MODULE_ALIAS_CRYPTO("speck128-generic");
														
 
															-MODULE_ALIAS_CRYPTO("speck64");
														
 
															-MODULE_ALIAS_CRYPTO("speck64-generic");
														
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -76,8 +76,7 @@ static char *check[] = {
 
															 	"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
														
 
															 	"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta",  "fcrypt",
														
 
															 	"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
														
 
															-	"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
														
 
															-	NULL
														
 
															+	"lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL
														
 
															 };
														
 
															 static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
														
@@ -1103,6 +1102,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
 
															 			break;
														
 
															 		}
														
 
															+		if (speed[i].klen)
														
 
															+			crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
														
 
															+
														
 
															 		pr_info("test%3u "
														
 
															 			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
														
 
															 			i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
														
@@ -1733,6 +1735,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
															 		ret += tcrypt_test("xts(aes)");
														
 
															 		ret += tcrypt_test("ctr(aes)");
														
 
															 		ret += tcrypt_test("rfc3686(ctr(aes))");
														
 
															+		ret += tcrypt_test("ofb(aes)");
														
 
															 		break;
														
 
															 	case 11:
														
@@ -1878,10 +1881,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
															 		ret += tcrypt_test("ecb(seed)");
														
 
															 		break;
														
 
															-	case 44:
														
 
															-		ret += tcrypt_test("zlib");
														
 
															-		break;
														
 
															-
														
 
															 	case 45:
														
 
															 		ret += tcrypt_test("rfc4309(ccm(aes))");
														
 
															 		break;
														
@@ -2033,6 +2032,8 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
															 		break;
														
 
															 	case 191:
														
 
															 		ret += tcrypt_test("ecb(sm4)");
														
 
															+		ret += tcrypt_test("cbc(sm4)");
														
 
															+		ret += tcrypt_test("ctr(sm4)");
														
 
															 		break;
														
 
															 	case 200:
														
 
															 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
														
@@ -2282,6 +2283,20 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
															 				   num_mb);
														
 
															 		break;
														
 
															+	case 218:
														
 
															+		test_cipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
														
 
															+				speed_template_16);
														
 
															+		test_cipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
														
 
															+				speed_template_16);
														
 
															+		test_cipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
														
 
															+				speed_template_16);
														
 
															+		test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
														
 
															+				speed_template_16);
														
 
															+		test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
														
 
															+				speed_template_16);
														
 
															+		test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
														
 
															+				speed_template_16);
														
 
															+		break;
														
 
															 	case 300:
														
 
															 		if (alg) {
														
 
															 			test_hash_speed(alg, sec, generic_hash_speed_template);