7 years ago · 62606c224d
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,21 +191,11 @@ Currently, the following pairs of encryption modes are supported:
 
				 
			
 
				 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
			
 
				 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
			
 
				-- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
			
 
				 
			
 
				 It is strongly recommended to use AES-256-XTS for contents encryption.
			
 
				 AES-128-CBC was added only for low-powered embedded devices with
			
 
				 crypto accelerators such as CAAM or CESA that do not support XTS.
			
 
				 
			
 
				-Similarly, Speck128/256 support was only added for older or low-end
			
 
				-CPUs which cannot do AES fast enough -- especially ARM CPUs which have
			
 
				-NEON instructions but not the Cryptography Extensions -- and for which
			
 
				-it would not otherwise be feasible to use encryption at all.  It is
			
 
				-not recommended to use Speck on CPUs that have AES instructions.
			
 
				-Speck support is only available if it has been enabled in the crypto
			
 
				-API via CONFIG_CRYPTO_SPECK.  Also, on ARM platforms, to get
			
 
				-acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
			
 
				-
			
 
				 New encryption modes can be added relatively easily, without changes
			
 
				 to individual filesystems.  However, authenticated encryption (AE)
			
 
				 modes are not currently supported because of the difficulty of dealing
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7578,14 +7578,6 @@ S:	Supported
 
				 F:	drivers/infiniband/hw/i40iw/
			
 
				 F:	include/uapi/rdma/i40iw-abi.h
			
 
				 
			
 
				-INTEL SHA MULTIBUFFER DRIVER
			
 
				-M:	Megha Dey <megha.dey@linux.intel.com>
			
 
				-R:	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				-L:	linux-crypto@vger.kernel.org
			
 
				-S:	Supported
			
 
				-F:	arch/x86/crypto/sha*-mb/
			
 
				-F:	crypto/mcryptd.c
			
 
				-
			
 
				 INTEL TELEMETRY DRIVER
			
 
				 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
			
 
				 L:	platform-driver-x86@vger.kernel.org
			
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE
 
				 	depends on KERNEL_MODE_NEON
			
 
				 	select CRYPTO_HASH
			
 
				 	select CRYPTO_CRYPTD
			
 
				+	select CRYPTO_GF128MUL
			
 
				 	help
			
 
				 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
			
 
				 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
			
@@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON
 
				 	select CRYPTO_BLKCIPHER
			
 
				 	select CRYPTO_CHACHA20
			
 
				 
			
 
				-config CRYPTO_SPECK_NEON
			
 
				-	tristate "NEON accelerated Speck cipher algorithms"
			
 
				-	depends on KERNEL_MODE_NEON
			
 
				-	select CRYPTO_BLKCIPHER
			
 
				-	select CRYPTO_SPECK
			
 
				-
			
 
				 endif
			
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 
				 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
			
 
				 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
			
 
				 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
			
 
				-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
			
 
				 
			
 
				 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
			
 
				 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
			
@@ -54,7 +53,6 @@ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 
				 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
			
 
				 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
			
 
				 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
			
 
				-speck-neon-y := speck-neon-core.o speck-neon-glue.o
			
 
				 
			
 
				 ifdef REGENERATE_ARM_CRYPTO
			
 
				 quiet_cmd_perl = PERL    $@
			
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ b/arch/arm/crypto/chacha20-neon-core.S
@@ -18,6 +18,34 @@
 
				  * (at your option) any later version.
			
 
				  */
			
 
				 
			
 
				+ /*
			
 
				+  * NEON doesn't have a rotate instruction.  The alternatives are, more or less:
			
 
				+  *
			
 
				+  * (a)  vshl.u32 + vsri.u32		(needs temporary register)
			
 
				+  * (b)  vshl.u32 + vshr.u32 + vorr	(needs temporary register)
			
 
				+  * (c)  vrev32.16			(16-bit rotations only)
			
 
				+  * (d)  vtbl.8 + vtbl.8		(multiple of 8 bits rotations only,
			
 
				+  *					 needs index vector)
			
 
				+  *
			
 
				+  * ChaCha20 has 16, 12, 8, and 7-bit rotations.  For the 12 and 7-bit
			
 
				+  * rotations, the only choices are (a) and (b).  We use (a) since it takes
			
 
				+  * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
			
 
				+  *
			
 
				+  * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
			
 
				+  * and doesn't need a temporary register.
			
 
				+  *
			
 
				+  * For the 8-bit rotation, we use vtbl.8 + vtbl.8.  On Cortex-A7, this sequence
			
 
				+  * is twice as fast as (a), even when doing (a) on multiple registers
			
 
				+  * simultaneously to eliminate the stall between vshl and vsri.  Also, it
			
 
				+  * parallelizes better when temporary registers are scarce.
			
 
				+  *
			
 
				+  * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
			
 
				+  * (a), so the need to load the rotation table actually makes the vtbl method
			
 
				+  * slightly slower overall on that CPU (~1.3% slower ChaCha20).  Still, it
			
 
				+  * seems to be a good compromise to get a more significant speed boost on some
			
 
				+  * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
			
 
				+  */
			
 
				+
			
 
				 #include <linux/linkage.h>
			
 
				 
			
 
				 	.text
			
@@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon)
 
				 	vmov		q10, q2
			
 
				 	vmov		q11, q3
			
 
				 
			
 
				+	adr		ip, .Lrol8_table
			
 
				 	mov		r3, #10
			
 
				+	vld1.8		{d10}, [ip, :64]
			
 
				 
			
 
				 .Ldoubleround:
			
 
				 	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
			
@@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon)
 
				 
			
 
				 	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
			
 
				 	vadd.i32	q0, q0, q1
			
 
				-	veor		q4, q3, q0
			
 
				-	vshl.u32	q3, q4, #8
			
 
				-	vsri.u32	q3, q4, #24
			
 
				+	veor		q3, q3, q0
			
 
				+	vtbl.8		d6, {d6}, d10
			
 
				+	vtbl.8		d7, {d7}, d10
			
 
				 
			
 
				 	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
			
 
				 	vadd.i32	q2, q2, q3
			
@@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon)
 
				 
			
 
				 	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
			
 
				 	vadd.i32	q0, q0, q1
			
 
				-	veor		q4, q3, q0
			
 
				-	vshl.u32	q3, q4, #8
			
 
				-	vsri.u32	q3, q4, #24
			
 
				+	veor		q3, q3, q0
			
 
				+	vtbl.8		d6, {d6}, d10
			
 
				+	vtbl.8		d7, {d7}, d10
			
 
				 
			
 
				 	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
			
 
				 	vadd.i32	q2, q2, q3
			
@@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon)
 
				 	bx		lr
			
 
				 ENDPROC(chacha20_block_xor_neon)
			
 
				 
			
 
				+	.align		4
			
 
				+.Lctrinc:	.word	0, 1, 2, 3
			
 
				+.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
			
 
				+
			
 
				 	.align		5
			
 
				 ENTRY(chacha20_4block_xor_neon)
			
 
				-	push		{r4-r6, lr}
			
 
				-	mov		ip, sp			// preserve the stack pointer
			
 
				-	sub		r3, sp, #0x20		// allocate a 32 byte buffer
			
 
				-	bic		r3, r3, #0x1f		// aligned to 32 bytes
			
 
				-	mov		sp, r3
			
 
				+	push		{r4-r5}
			
 
				+	mov		r4, sp			// preserve the stack pointer
			
 
				+	sub		ip, sp, #0x20		// allocate a 32 byte buffer
			
 
				+	bic		ip, ip, #0x1f		// aligned to 32 bytes
			
 
				+	mov		sp, ip
			
 
				 
			
 
				 	// r0: Input state matrix, s
			
 
				 	// r1: 4 data blocks output, o
			
@@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon)
 
				 	// This function encrypts four consecutive ChaCha20 blocks by loading
			
 
				 	// the state matrix in NEON registers four times. The algorithm performs
			
 
				 	// each operation on the corresponding word of each state matrix, hence
			
 
				-	// requires no word shuffling. For final XORing step we transpose the
			
 
				-	// matrix by interleaving 32- and then 64-bit words, which allows us to
			
 
				-	// do XOR in NEON registers.
			
 
				+	// requires no word shuffling. The words are re-interleaved before the
			
 
				+	// final addition of the original state and the XORing step.
			
 
				 	//
			
 
				 
			
 
				-	// x0..15[0-3] = s0..3[0..3]
			
 
				-	add		r3, r0, #0x20
			
 
				+	// x0..15[0-3] = s0..15[0-3]
			
 
				+	add		ip, r0, #0x20
			
 
				 	vld1.32		{q0-q1}, [r0]
			
 
				-	vld1.32		{q2-q3}, [r3]
			
 
				+	vld1.32		{q2-q3}, [ip]
			
 
				 
			
 
				-	adr		r3, CTRINC
			
 
				+	adr		r5, .Lctrinc
			
 
				 	vdup.32		q15, d7[1]
			
 
				 	vdup.32		q14, d7[0]
			
 
				-	vld1.32		{q11}, [r3, :128]
			
 
				+	vld1.32		{q4}, [r5, :128]
			
 
				 	vdup.32		q13, d6[1]
			
 
				 	vdup.32		q12, d6[0]
			
 
				-	vadd.i32	q12, q12, q11		// x12 += counter values 0-3
			
 
				 	vdup.32		q11, d5[1]
			
 
				 	vdup.32		q10, d5[0]
			
 
				+	vadd.u32	q12, q12, q4		// x12 += counter values 0-3
			
 
				 	vdup.32		q9, d4[1]
			
 
				 	vdup.32		q8, d4[0]
			
 
				 	vdup.32		q7, d3[1]
			
@@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon)
 
				 	vdup.32		q1, d0[1]
			
 
				 	vdup.32		q0, d0[0]
			
 
				 
			
 
				+	adr		ip, .Lrol8_table
			
 
				 	mov		r3, #10
			
 
				+	b		1f
			
 
				 
			
 
				 .Ldoubleround4:
			
 
				+	vld1.32		{q8-q9}, [sp, :256]
			
 
				+1:
			
 
				 	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
			
 
				 	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
			
 
				 	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
			
@@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon)
 
				 	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
			
 
				 	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
			
 
				 	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
			
 
				+	vld1.8		{d16}, [ip, :64]
			
 
				 	vadd.i32	q0, q0, q4
			
 
				 	vadd.i32	q1, q1, q5
			
 
				 	vadd.i32	q2, q2, q6
			
 
				 	vadd.i32	q3, q3, q7
			
 
				 
			
 
				-	veor		q8, q12, q0
			
 
				-	veor		q9, q13, q1
			
 
				-	vshl.u32	q12, q8, #8
			
 
				-	vshl.u32	q13, q9, #8
			
 
				-	vsri.u32	q12, q8, #24
			
 
				-	vsri.u32	q13, q9, #24
			
 
				+	veor		q12, q12, q0
			
 
				+	veor		q13, q13, q1
			
 
				+	veor		q14, q14, q2
			
 
				+	veor		q15, q15, q3
			
 
				 
			
 
				-	veor		q8, q14, q2
			
 
				-	veor		q9, q15, q3
			
 
				-	vshl.u32	q14, q8, #8
			
 
				-	vshl.u32	q15, q9, #8
			
 
				-	vsri.u32	q14, q8, #24
			
 
				-	vsri.u32	q15, q9, #24
			
 
				+	vtbl.8		d24, {d24}, d16
			
 
				+	vtbl.8		d25, {d25}, d16
			
 
				+	vtbl.8		d26, {d26}, d16
			
 
				+	vtbl.8		d27, {d27}, d16
			
 
				+	vtbl.8		d28, {d28}, d16
			
 
				+	vtbl.8		d29, {d29}, d16
			
 
				+	vtbl.8		d30, {d30}, d16
			
 
				+	vtbl.8		d31, {d31}, d16
			
 
				 
			
 
				 	vld1.32		{q8-q9}, [sp, :256]
			
 
				 
			
@@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon)
 
				 	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
			
 
				 	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
			
 
				 	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
			
 
				+	vld1.8		{d16}, [ip, :64]
			
 
				 	vadd.i32	q0, q0, q5
			
 
				 	vadd.i32	q1, q1, q6
			
 
				 	vadd.i32	q2, q2, q7
			
 
				 	vadd.i32	q3, q3, q4
			
 
				 
			
 
				-	veor		q8, q15, q0
			
 
				-	veor		q9, q12, q1
			
 
				-	vshl.u32	q15, q8, #8
			
 
				-	vshl.u32	q12, q9, #8
			
 
				-	vsri.u32	q15, q8, #24
			
 
				-	vsri.u32	q12, q9, #24
			
 
				+	veor		q15, q15, q0
			
 
				+	veor		q12, q12, q1
			
 
				+	veor		q13, q13, q2
			
 
				+	veor		q14, q14, q3
			
 
				 
			
 
				-	veor		q8, q13, q2
			
 
				-	veor		q9, q14, q3
			
 
				-	vshl.u32	q13, q8, #8
			
 
				-	vshl.u32	q14, q9, #8
			
 
				-	vsri.u32	q13, q8, #24
			
 
				-	vsri.u32	q14, q9, #24
			
 
				+	vtbl.8		d30, {d30}, d16
			
 
				+	vtbl.8		d31, {d31}, d16
			
 
				+	vtbl.8		d24, {d24}, d16
			
 
				+	vtbl.8		d25, {d25}, d16
			
 
				+	vtbl.8		d26, {d26}, d16
			
 
				+	vtbl.8		d27, {d27}, d16
			
 
				+	vtbl.8		d28, {d28}, d16
			
 
				+	vtbl.8		d29, {d29}, d16
			
 
				 
			
 
				 	vld1.32		{q8-q9}, [sp, :256]
			
 
				 
			
@@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon)
 
				 	vsri.u32	q6, q9, #25
			
 
				 
			
 
				 	subs		r3, r3, #1
			
 
				-	beq		0f
			
 
				-
			
 
				-	vld1.32		{q8-q9}, [sp, :256]
			
 
				-	b		.Ldoubleround4
			
 
				-
			
 
				-	// x0[0-3] += s0[0]
			
 
				-	// x1[0-3] += s0[1]
			
 
				-	// x2[0-3] += s0[2]
			
 
				-	// x3[0-3] += s0[3]
			
 
				-0:	ldmia		r0!, {r3-r6}
			
 
				-	vdup.32		q8, r3
			
 
				-	vdup.32		q9, r4
			
 
				-	vadd.i32	q0, q0, q8
			
 
				-	vadd.i32	q1, q1, q9
			
 
				-	vdup.32		q8, r5
			
 
				-	vdup.32		q9, r6
			
 
				-	vadd.i32	q2, q2, q8
			
 
				-	vadd.i32	q3, q3, q9
			
 
				-
			
 
				-	// x4[0-3] += s1[0]
			
 
				-	// x5[0-3] += s1[1]
			
 
				-	// x6[0-3] += s1[2]
			
 
				-	// x7[0-3] += s1[3]
			
 
				-	ldmia		r0!, {r3-r6}
			
 
				-	vdup.32		q8, r3
			
 
				-	vdup.32		q9, r4
			
 
				-	vadd.i32	q4, q4, q8
			
 
				-	vadd.i32	q5, q5, q9
			
 
				-	vdup.32		q8, r5
			
 
				-	vdup.32		q9, r6
			
 
				-	vadd.i32	q6, q6, q8
			
 
				-	vadd.i32	q7, q7, q9
			
 
				-
			
 
				-	// interleave 32-bit words in state n, n+1
			
 
				-	vzip.32		q0, q1
			
 
				-	vzip.32		q2, q3
			
 
				-	vzip.32		q4, q5
			
 
				-	vzip.32		q6, q7
			
 
				-
			
 
				-	// interleave 64-bit words in state n, n+2
			
 
				+	bne		.Ldoubleround4
			
 
				+
			
 
				+	// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
			
 
				+	// x8..9[0-3] are on the stack.
			
 
				+
			
 
				+	// Re-interleave the words in the first two rows of each block (x0..7).
			
 
				+	// Also add the counter values 0-3 to x12[0-3].
			
 
				+	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
			
 
				+	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
			
 
				+	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
			
 
				+	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
			
 
				+	vzip.32		q6, q7			// => (6 7 6 7) (6 7 6 7)
			
 
				+	  vadd.u32	q12, q8			// x12 += counter values 0-3
			
 
				 	vswp		d1, d4
			
 
				 	vswp		d3, d6
			
 
				+	  vld1.32	{q8-q9}, [r0]!		// load s0..7
			
 
				 	vswp		d9, d12
			
 
				 	vswp		d11, d14
			
 
				 
			
 
				-	// xor with corresponding input, write to output
			
 
				+	// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
			
 
				+	// after XORing the first 32 bytes.
			
 
				+	vswp		q1, q4
			
 
				+
			
 
				+	// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
			
 
				+
			
 
				+	// x0..3[0-3] += s0..3[0-3]	(add orig state to 1st row of each block)
			
 
				+	vadd.u32	q0, q0, q8
			
 
				+	vadd.u32	q2, q2, q8
			
 
				+	vadd.u32	q4, q4, q8
			
 
				+	vadd.u32	q3, q3, q8
			
 
				+
			
 
				+	// x4..7[0-3] += s4..7[0-3]	(add orig state to 2nd row of each block)
			
 
				+	vadd.u32	q1, q1, q9
			
 
				+	vadd.u32	q6, q6, q9
			
 
				+	vadd.u32	q5, q5, q9
			
 
				+	vadd.u32	q7, q7, q9
			
 
				+
			
 
				+	// XOR first 32 bytes using keystream from first two rows of first block
			
 
				 	vld1.8		{q8-q9}, [r2]!
			
 
				 	veor		q8, q8, q0
			
 
				-	veor		q9, q9, q4
			
 
				+	veor		q9, q9, q1
			
 
				 	vst1.8		{q8-q9}, [r1]!
			
 
				 
			
 
				+	// Re-interleave the words in the last two rows of each block (x8..15).
			
 
				 	vld1.32		{q8-q9}, [sp, :256]
			
 
				-
			
 
				-	// x8[0-3] += s2[0]
			
 
				-	// x9[0-3] += s2[1]
			
 
				-	// x10[0-3] += s2[2]
			
 
				-	// x11[0-3] += s2[3]
			
 
				-	ldmia		r0!, {r3-r6}
			
 
				-	vdup.32		q0, r3
			
 
				-	vdup.32		q4, r4
			
 
				-	vadd.i32	q8, q8, q0
			
 
				-	vadd.i32	q9, q9, q4
			
 
				-	vdup.32		q0, r5
			
 
				-	vdup.32		q4, r6
			
 
				-	vadd.i32	q10, q10, q0
			
 
				-	vadd.i32	q11, q11, q4
			
 
				-
			
 
				-	// x12[0-3] += s3[0]
			
 
				-	// x13[0-3] += s3[1]
			
 
				-	// x14[0-3] += s3[2]
			
 
				-	// x15[0-3] += s3[3]
			
 
				-	ldmia		r0!, {r3-r6}
			
 
				-	vdup.32		q0, r3
			
 
				-	vdup.32		q4, r4
			
 
				-	adr		r3, CTRINC
			
 
				-	vadd.i32	q12, q12, q0
			
 
				-	vld1.32		{q0}, [r3, :128]
			
 
				-	vadd.i32	q13, q13, q4
			
 
				-	vadd.i32	q12, q12, q0		// x12 += counter values 0-3
			
 
				-
			
 
				-	vdup.32		q0, r5
			
 
				-	vdup.32		q4, r6
			
 
				-	vadd.i32	q14, q14, q0
			
 
				-	vadd.i32	q15, q15, q4
			
 
				-
			
 
				-	// interleave 32-bit words in state n, n+1
			
 
				-	vzip.32		q8, q9
			
 
				-	vzip.32		q10, q11
			
 
				-	vzip.32		q12, q13
			
 
				-	vzip.32		q14, q15
			
 
				-
			
 
				-	// interleave 64-bit words in state n, n+2
			
 
				-	vswp		d17, d20
			
 
				-	vswp		d19, d22
			
 
				+	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
			
 
				+	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
			
 
				+	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
			
 
				+	vzip.32		q10, q11	// => (10 11 10 11) (10 11 10 11)
			
 
				+	  vld1.32	{q0-q1}, [r0]	// load s8..15
			
 
				 	vswp		d25, d28
			
 
				 	vswp		d27, d30
			
 
				+	vswp		d17, d20
			
 
				+	vswp		d19, d22
			
 
				+
			
 
				+	// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
			
 
				+
			
 
				+	// x8..11[0-3] += s8..11[0-3]	(add orig state to 3rd row of each block)
			
 
				+	vadd.u32	q8,  q8,  q0
			
 
				+	vadd.u32	q10, q10, q0
			
 
				+	vadd.u32	q9,  q9,  q0
			
 
				+	vadd.u32	q11, q11, q0
			
 
				+
			
 
				+	// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
			
 
				+	vadd.u32	q12, q12, q1
			
 
				+	vadd.u32	q14, q14, q1
			
 
				+	vadd.u32	q13, q13, q1
			
 
				+	vadd.u32	q15, q15, q1
			
 
				 
			
 
				-	vmov		q4, q1
			
 
				+	// XOR the rest of the data with the keystream
			
 
				 
			
 
				 	vld1.8		{q0-q1}, [r2]!
			
 
				 	veor		q0, q0, q8
			
@@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon)
 
				 	vst1.8		{q0-q1}, [r1]!
			
 
				 
			
 
				 	vld1.8		{q0-q1}, [r2]
			
 
				+	  mov		sp, r4		// restore original stack pointer
			
 
				 	veor		q0, q0, q11
			
 
				 	veor		q1, q1, q15
			
 
				 	vst1.8		{q0-q1}, [r1]
			
 
				 
			
 
				-	mov		sp, ip
			
 
				-	pop		{r4-r6, pc}
			
 
				+	pop		{r4-r5}
			
 
				+	bx		lr
			
 
				 ENDPROC(chacha20_4block_xor_neon)
			
 
				-
			
 
				-	.align		4
			
 
				-CTRINC:	.word		0, 1, 2, 3
			
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void)
 
				 				  ARRAY_SIZE(crc32_pmull_algs));
			
 
				 }
			
 
				 
			
 
				-static const struct cpu_feature crc32_cpu_feature[] = {
			
 
				+static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
			
 
				 	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
			
 
				 };
			
 
				 MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
			
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -63,6 +63,33 @@
 
				 	k48		.req	d31
			
 
				 	SHASH2_p64	.req	d31
			
 
				 
			
 
				+	HH		.req	q10
			
 
				+	HH3		.req	q11
			
 
				+	HH4		.req	q12
			
 
				+	HH34		.req	q13
			
 
				+
			
 
				+	HH_L		.req	d20
			
 
				+	HH_H		.req	d21
			
 
				+	HH3_L		.req	d22
			
 
				+	HH3_H		.req	d23
			
 
				+	HH4_L		.req	d24
			
 
				+	HH4_H		.req	d25
			
 
				+	HH34_L		.req	d26
			
 
				+	HH34_H		.req	d27
			
 
				+	SHASH2_H	.req	d29
			
 
				+
			
 
				+	XL2		.req	q5
			
 
				+	XM2		.req	q6
			
 
				+	XH2		.req	q7
			
 
				+	T3		.req	q8
			
 
				+
			
 
				+	XL2_L		.req	d10
			
 
				+	XL2_H		.req	d11
			
 
				+	XM2_L		.req	d12
			
 
				+	XM2_H		.req	d13
			
 
				+	T3_L		.req	d16
			
 
				+	T3_H		.req	d17
			
 
				+
			
 
				 	.text
			
 
				 	.fpu		crypto-neon-fp-armv8
			
 
				 
			
@@ -175,12 +202,77 @@
 
				 	beq		0f
			
 
				 	vld1.64		{T1}, [ip]
			
 
				 	teq		r0, #0
			
 
				-	b		1f
			
 
				+	b		3f
			
 
				+
			
 
				+0:	.ifc		\pn, p64
			
 
				+	tst		r0, #3			// skip until #blocks is a
			
 
				+	bne		2f			// round multiple of 4
			
 
				+
			
 
				+	vld1.8		{XL2-XM2}, [r2]!
			
 
				+1:	vld1.8		{T3-T2}, [r2]!
			
 
				+	vrev64.8	XL2, XL2
			
 
				+	vrev64.8	XM2, XM2
			
 
				+
			
 
				+	subs		r0, r0, #4
			
 
				+
			
 
				+	vext.8		T1, XL2, XL2, #8
			
 
				+	veor		XL2_H, XL2_H, XL_L
			
 
				+	veor		XL, XL, T1
			
 
				+
			
 
				+	vrev64.8	T3, T3
			
 
				+	vrev64.8	T1, T2
			
 
				+
			
 
				+	vmull.p64	XH, HH4_H, XL_H			// a1 * b1
			
 
				+	veor		XL2_H, XL2_H, XL_H
			
 
				+	vmull.p64	XL, HH4_L, XL_L			// a0 * b0
			
 
				+	vmull.p64	XM, HH34_H, XL2_H		// (a1 + a0)(b1 + b0)
			
 
				+
			
 
				+	vmull.p64	XH2, HH3_H, XM2_L		// a1 * b1
			
 
				+	veor		XM2_L, XM2_L, XM2_H
			
 
				+	vmull.p64	XL2, HH3_L, XM2_H		// a0 * b0
			
 
				+	vmull.p64	XM2, HH34_L, XM2_L		// (a1 + a0)(b1 + b0)
			
 
				+
			
 
				+	veor		XH, XH, XH2
			
 
				+	veor		XL, XL, XL2
			
 
				+	veor		XM, XM, XM2
			
 
				+
			
 
				+	vmull.p64	XH2, HH_H, T3_L			// a1 * b1
			
 
				+	veor		T3_L, T3_L, T3_H
			
 
				+	vmull.p64	XL2, HH_L, T3_H			// a0 * b0
			
 
				+	vmull.p64	XM2, SHASH2_H, T3_L		// (a1 + a0)(b1 + b0)
			
 
				+
			
 
				+	veor		XH, XH, XH2
			
 
				+	veor		XL, XL, XL2
			
 
				+	veor		XM, XM, XM2
			
 
				+
			
 
				+	vmull.p64	XH2, SHASH_H, T1_L		// a1 * b1
			
 
				+	veor		T1_L, T1_L, T1_H
			
 
				+	vmull.p64	XL2, SHASH_L, T1_H		// a0 * b0
			
 
				+	vmull.p64	XM2, SHASH2_p64, T1_L		// (a1 + a0)(b1 + b0)
			
 
				+
			
 
				+	veor		XH, XH, XH2
			
 
				+	veor		XL, XL, XL2
			
 
				+	veor		XM, XM, XM2
			
 
				 
			
 
				-0:	vld1.64		{T1}, [r2]!
			
 
				+	beq		4f
			
 
				+
			
 
				+	vld1.8		{XL2-XM2}, [r2]!
			
 
				+
			
 
				+	veor		T1, XL, XH
			
 
				+	veor		XM, XM, T1
			
 
				+
			
 
				+	__pmull_reduce_p64
			
 
				+
			
 
				+	veor		T1, T1, XH
			
 
				+	veor		XL, XL, T1
			
 
				+
			
 
				+	b		1b
			
 
				+	.endif
			
 
				+
			
 
				+2:	vld1.64		{T1}, [r2]!
			
 
				 	subs		r0, r0, #1
			
 
				 
			
 
				-1:	/* multiply XL by SHASH in GF(2^128) */
			
 
				+3:	/* multiply XL by SHASH in GF(2^128) */
			
 
				 #ifndef CONFIG_CPU_BIG_ENDIAN
			
 
				 	vrev64.8	T1, T1
			
 
				 #endif
			
@@ -193,7 +285,7 @@
 
				 	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
			
 
				 	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
			
 
				 
			
 
				-	veor		T1, XL, XH
			
 
				+4:	veor		T1, XL, XH
			
 
				 	veor		XM, XM, T1
			
 
				 
			
 
				 	__pmull_reduce_\pn
			
@@ -212,8 +304,14 @@
 
				 	 *			   struct ghash_key const *k, const char *head)
			
 
				 	 */
			
 
				 ENTRY(pmull_ghash_update_p64)
			
 
				-	vld1.64		{SHASH}, [r3]
			
 
				+	vld1.64		{SHASH}, [r3]!
			
 
				+	vld1.64		{HH}, [r3]!
			
 
				+	vld1.64		{HH3-HH4}, [r3]
			
 
				+
			
 
				 	veor		SHASH2_p64, SHASH_L, SHASH_H
			
 
				+	veor		SHASH2_H, HH_L, HH_H
			
 
				+	veor		HH34_L, HH3_L, HH3_H
			
 
				+	veor		HH34_H, HH4_L, HH4_H
			
 
				 
			
 
				 	vmov.i8		MASK, #0xe1
			
 
				 	vshl.u64	MASK, MASK, #57
			
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
			
 
				  *
			
 
				- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
			
 
				+ * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify it
			
 
				  * under the terms of the GNU General Public License version 2 as published
			
@@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash");
 
				 #define GHASH_DIGEST_SIZE	16
			
 
				 
			
 
				 struct ghash_key {
			
 
				-	u64	a;
			
 
				-	u64	b;
			
 
				+	u64	h[2];
			
 
				+	u64	h2[2];
			
 
				+	u64	h3[2];
			
 
				+	u64	h4[2];
			
 
				 };
			
 
				 
			
 
				 struct ghash_desc_ctx {
			
@@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void ghash_reflect(u64 h[], const be128 *k)
			
 
				+{
			
 
				+	u64 carry = be64_to_cpu(k->a) >> 63;
			
 
				+
			
 
				+	h[0] = (be64_to_cpu(k->b) << 1) | carry;
			
 
				+	h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
			
 
				+
			
 
				+	if (carry)
			
 
				+		h[1] ^= 0xc200000000000000UL;
			
 
				+}
			
 
				+
			
 
				 static int ghash_setkey(struct crypto_shash *tfm,
			
 
				 			const u8 *inkey, unsigned int keylen)
			
 
				 {
			
 
				 	struct ghash_key *key = crypto_shash_ctx(tfm);
			
 
				-	u64 a, b;
			
 
				+	be128 h, k;
			
 
				 
			
 
				 	if (keylen != GHASH_BLOCK_SIZE) {
			
 
				 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	/* perform multiplication by 'x' in GF(2^128) */
			
 
				-	b = get_unaligned_be64(inkey);
			
 
				-	a = get_unaligned_be64(inkey + 8);
			
 
				+	memcpy(&k, inkey, GHASH_BLOCK_SIZE);
			
 
				+	ghash_reflect(key->h, &k);
			
 
				+
			
 
				+	h = k;
			
 
				+	gf128mul_lle(&h, &k);
			
 
				+	ghash_reflect(key->h2, &h);
			
 
				 
			
 
				-	key->a = (a << 1) | (b >> 63);
			
 
				-	key->b = (b << 1) | (a >> 63);
			
 
				+	gf128mul_lle(&h, &k);
			
 
				+	ghash_reflect(key->h3, &h);
			
 
				 
			
 
				-	if (b >> 63)
			
 
				-		key->b ^= 0xc200000000000000UL;
			
 
				+	gf128mul_lle(&h, &k);
			
 
				+	ghash_reflect(key->h4, &h);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/arch/arm/crypto/speck-neon-core.S
+++ b/arch/arm/crypto/speck-neon-core.S
@@ -1,434 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
			
 
				- *
			
 
				- * Copyright (c) 2018 Google, Inc
			
 
				- *
			
 
				- * Author: Eric Biggers <ebiggers@google.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-
			
 
				-	.text
			
 
				-	.fpu		neon
			
 
				-
			
 
				-	// arguments
			
 
				-	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
			
 
				-	NROUNDS		.req	r1	// int nrounds
			
 
				-	DST		.req	r2	// void *dst
			
 
				-	SRC		.req	r3	// const void *src
			
 
				-	NBYTES		.req	r4	// unsigned int nbytes
			
 
				-	TWEAK		.req	r5	// void *tweak
			
 
				-
			
 
				-	// registers which hold the data being encrypted/decrypted
			
 
				-	X0		.req	q0
			
 
				-	X0_L		.req	d0
			
 
				-	X0_H		.req	d1
			
 
				-	Y0		.req	q1
			
 
				-	Y0_H		.req	d3
			
 
				-	X1		.req	q2
			
 
				-	X1_L		.req	d4
			
 
				-	X1_H		.req	d5
			
 
				-	Y1		.req	q3
			
 
				-	Y1_H		.req	d7
			
 
				-	X2		.req	q4
			
 
				-	X2_L		.req	d8
			
 
				-	X2_H		.req	d9
			
 
				-	Y2		.req	q5
			
 
				-	Y2_H		.req	d11
			
 
				-	X3		.req	q6
			
 
				-	X3_L		.req	d12
			
 
				-	X3_H		.req	d13
			
 
				-	Y3		.req	q7
			
 
				-	Y3_H		.req	d15
			
 
				-
			
 
				-	// the round key, duplicated in all lanes
			
 
				-	ROUND_KEY	.req	q8
			
 
				-	ROUND_KEY_L	.req	d16
			
 
				-	ROUND_KEY_H	.req	d17
			
 
				-
			
 
				-	// index vector for vtbl-based 8-bit rotates
			
 
				-	ROTATE_TABLE	.req	d18
			
 
				-
			
 
				-	// multiplication table for updating XTS tweaks
			
 
				-	GF128MUL_TABLE	.req	d19
			
 
				-	GF64MUL_TABLE	.req	d19
			
 
				-
			
 
				-	// current XTS tweak value(s)
			
 
				-	TWEAKV		.req	q10
			
 
				-	TWEAKV_L	.req	d20
			
 
				-	TWEAKV_H	.req	d21
			
 
				-
			
 
				-	TMP0		.req	q12
			
 
				-	TMP0_L		.req	d24
			
 
				-	TMP0_H		.req	d25
			
 
				-	TMP1		.req	q13
			
 
				-	TMP2		.req	q14
			
 
				-	TMP3		.req	q15
			
 
				-
			
 
				-	.align		4
			
 
				-.Lror64_8_table:
			
 
				-	.byte		1, 2, 3, 4, 5, 6, 7, 0
			
 
				-.Lror32_8_table:
			
 
				-	.byte		1, 2, 3, 0, 5, 6, 7, 4
			
 
				-.Lrol64_8_table:
			
 
				-	.byte		7, 0, 1, 2, 3, 4, 5, 6
			
 
				-.Lrol32_8_table:
			
 
				-	.byte		3, 0, 1, 2, 7, 4, 5, 6
			
 
				-.Lgf128mul_table:
			
 
				-	.byte		0, 0x87
			
 
				-	.fill		14
			
 
				-.Lgf64mul_table:
			
 
				-	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
			
 
				-	.fill		12
			
 
				-
			
 
				-/*
			
 
				- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
			
 
				- *
			
 
				- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
			
 
				- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
			
 
				- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
			
 
				- *
			
 
				- * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
			
 
				- * the vtbl approach is faster on some processors and the same speed on others.
			
 
				- */
			
 
				-.macro _speck_round_128bytes	n
			
 
				-
			
 
				-	// x = ror(x, 8)
			
 
				-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
			
 
				-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
			
 
				-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
			
 
				-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
			
 
				-
			
 
				-	// x += y
			
 
				-	vadd.u\n	X0, Y0
			
 
				-	vadd.u\n	X1, Y1
			
 
				-	vadd.u\n	X2, Y2
			
 
				-	vadd.u\n	X3, Y3
			
 
				-
			
 
				-	// x ^= k
			
 
				-	veor		X0, ROUND_KEY
			
 
				-	veor		X1, ROUND_KEY
			
 
				-	veor		X2, ROUND_KEY
			
 
				-	veor		X3, ROUND_KEY
			
 
				-
			
 
				-	// y = rol(y, 3)
			
 
				-	vshl.u\n	TMP0, Y0, #3
			
 
				-	vshl.u\n	TMP1, Y1, #3
			
 
				-	vshl.u\n	TMP2, Y2, #3
			
 
				-	vshl.u\n	TMP3, Y3, #3
			
 
				-	vsri.u\n	TMP0, Y0, #(\n - 3)
			
 
				-	vsri.u\n	TMP1, Y1, #(\n - 3)
			
 
				-	vsri.u\n	TMP2, Y2, #(\n - 3)
			
 
				-	vsri.u\n	TMP3, Y3, #(\n - 3)
			
 
				-
			
 
				-	// y ^= x
			
 
				-	veor		Y0, TMP0, X0
			
 
				-	veor		Y1, TMP1, X1
			
 
				-	veor		Y2, TMP2, X2
			
 
				-	veor		Y3, TMP3, X3
			
 
				-.endm
			
 
				-
			
 
				-/*
			
 
				- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
			
 
				- *
			
 
				- * This is the inverse of _speck_round_128bytes().
			
 
				- */
			
 
				-.macro _speck_unround_128bytes	n
			
 
				-
			
 
				-	// y ^= x
			
 
				-	veor		TMP0, Y0, X0
			
 
				-	veor		TMP1, Y1, X1
			
 
				-	veor		TMP2, Y2, X2
			
 
				-	veor		TMP3, Y3, X3
			
 
				-
			
 
				-	// y = ror(y, 3)
			
 
				-	vshr.u\n	Y0, TMP0, #3
			
 
				-	vshr.u\n	Y1, TMP1, #3
			
 
				-	vshr.u\n	Y2, TMP2, #3
			
 
				-	vshr.u\n	Y3, TMP3, #3
			
 
				-	vsli.u\n	Y0, TMP0, #(\n - 3)
			
 
				-	vsli.u\n	Y1, TMP1, #(\n - 3)
			
 
				-	vsli.u\n	Y2, TMP2, #(\n - 3)
			
 
				-	vsli.u\n	Y3, TMP3, #(\n - 3)
			
 
				-
			
 
				-	// x ^= k
			
 
				-	veor		X0, ROUND_KEY
			
 
				-	veor		X1, ROUND_KEY
			
 
				-	veor		X2, ROUND_KEY
			
 
				-	veor		X3, ROUND_KEY
			
 
				-
			
 
				-	// x -= y
			
 
				-	vsub.u\n	X0, Y0
			
 
				-	vsub.u\n	X1, Y1
			
 
				-	vsub.u\n	X2, Y2
			
 
				-	vsub.u\n	X3, Y3
			
 
				-
			
 
				-	// x = rol(x, 8);
			
 
				-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
			
 
				-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
			
 
				-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
			
 
				-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
			
 
				-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
			
 
				-.endm
			
 
				-
			
 
				-.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
			
 
				-
			
 
				-	// Load the next source block
			
 
				-	vld1.8		{\dst_reg}, [SRC]!
			
 
				-
			
 
				-	// Save the current tweak in the tweak buffer
			
 
				-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
			
 
				-
			
 
				-	// XOR the next source block with the current tweak
			
 
				-	veor		\dst_reg, TWEAKV
			
 
				-
			
 
				-	/*
			
 
				-	 * Calculate the next tweak by multiplying the current one by x,
			
 
				-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
			
 
				-	 */
			
 
				-	vshr.u64	\tmp, TWEAKV, #63
			
 
				-	vshl.u64	TWEAKV, #1
			
 
				-	veor		TWEAKV_H, \tmp\()_L
			
 
				-	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
			
 
				-	veor		TWEAKV_L, \tmp\()_H
			
 
				-.endm
			
 
				-
			
 
				-.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
			
 
				-
			
 
				-	// Load the next two source blocks
			
 
				-	vld1.8		{\dst_reg}, [SRC]!
			
 
				-
			
 
				-	// Save the current two tweaks in the tweak buffer
			
 
				-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
			
 
				-
			
 
				-	// XOR the next two source blocks with the current two tweaks
			
 
				-	veor		\dst_reg, TWEAKV
			
 
				-
			
 
				-	/*
			
 
				-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
			
 
				-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
			
 
				-	 */
			
 
				-	vshr.u64	\tmp, TWEAKV, #62
			
 
				-	vshl.u64	TWEAKV, #2
			
 
				-	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
			
 
				-	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
			
 
				-	veor		TWEAKV, \tmp
			
 
				-.endm
			
 
				-
			
 
				-/*
			
 
				- * _speck_xts_crypt() - Speck-XTS encryption/decryption
			
 
				- *
			
 
				- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
			
 
				- * using Speck-XTS, specifically the variant with a block size of '2n' and round
			
 
				- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
			
 
				- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
			
 
				- * nonzero multiple of 128.
			
 
				- */
			
 
				-.macro _speck_xts_crypt	n, decrypting
			
 
				-	push		{r4-r7}
			
 
				-	mov		r7, sp
			
 
				-
			
 
				-	/*
			
 
				-	 * The first four parameters were passed in registers r0-r3.  Load the
			
 
				-	 * additional parameters, which were passed on the stack.
			
 
				-	 */
			
 
				-	ldr		NBYTES, [sp, #16]
			
 
				-	ldr		TWEAK, [sp, #20]
			
 
				-
			
 
				-	/*
			
 
				-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
			
 
				-	 * round key rather than the first, since for decryption the round keys
			
 
				-	 * are used in reverse order.
			
 
				-	 */
			
 
				-.if \decrypting
			
 
				-.if \n == 64
			
 
				-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
			
 
				-	sub		ROUND_KEYS, #8
			
 
				-.else
			
 
				-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
			
 
				-	sub		ROUND_KEYS, #4
			
 
				-.endif
			
 
				-.endif
			
 
				-
			
 
				-	// Load the index vector for vtbl-based 8-bit rotates
			
 
				-.if \decrypting
			
 
				-	ldr		r12, =.Lrol\n\()_8_table
			
 
				-.else
			
 
				-	ldr		r12, =.Lror\n\()_8_table
			
 
				-.endif
			
 
				-	vld1.8		{ROTATE_TABLE}, [r12:64]
			
 
				-
			
 
				-	// One-time XTS preparation
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate stack space to store 128 bytes worth of tweaks.  For
			
 
				-	 * performance, this space is aligned to a 16-byte boundary so that we
			
 
				-	 * can use the load/store instructions that declare 16-byte alignment.
			
 
				-	 * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
			
 
				-	 */
			
 
				-	sub		r12, sp, #128
			
 
				-	bic		r12, #0xf
			
 
				-	mov		sp, r12
			
 
				-
			
 
				-.if \n == 64
			
 
				-	// Load first tweak
			
 
				-	vld1.8		{TWEAKV}, [TWEAK]
			
 
				-
			
 
				-	// Load GF(2^128) multiplication table
			
 
				-	ldr		r12, =.Lgf128mul_table
			
 
				-	vld1.8		{GF128MUL_TABLE}, [r12:64]
			
 
				-.else
			
 
				-	// Load first tweak
			
 
				-	vld1.8		{TWEAKV_L}, [TWEAK]
			
 
				-
			
 
				-	// Load GF(2^64) multiplication table
			
 
				-	ldr		r12, =.Lgf64mul_table
			
 
				-	vld1.8		{GF64MUL_TABLE}, [r12:64]
			
 
				-
			
 
				-	// Calculate second tweak, packing it together with the first
			
 
				-	vshr.u64	TMP0_L, TWEAKV_L, #63
			
 
				-	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
			
 
				-	vshl.u64	TWEAKV_H, TWEAKV_L, #1
			
 
				-	veor		TWEAKV_H, TMP0_L
			
 
				-.endif
			
 
				-
			
 
				-.Lnext_128bytes_\@:
			
 
				-
			
 
				-	/*
			
 
				-	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
			
 
				-	 * values, and save the tweaks on the stack for later.  Then
			
 
				-	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
			
 
				-	 * that the X[0-3] registers contain only the second halves of blocks,
			
 
				-	 * and the Y[0-3] registers contain only the first halves of blocks.
			
 
				-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
			
 
				-	 */
			
 
				-	mov		r12, sp
			
 
				-.if \n == 64
			
 
				-	_xts128_precrypt_one	X0, r12, TMP0
			
 
				-	_xts128_precrypt_one	Y0, r12, TMP0
			
 
				-	_xts128_precrypt_one	X1, r12, TMP0
			
 
				-	_xts128_precrypt_one	Y1, r12, TMP0
			
 
				-	_xts128_precrypt_one	X2, r12, TMP0
			
 
				-	_xts128_precrypt_one	Y2, r12, TMP0
			
 
				-	_xts128_precrypt_one	X3, r12, TMP0
			
 
				-	_xts128_precrypt_one	Y3, r12, TMP0
			
 
				-	vswp		X0_L, Y0_H
			
 
				-	vswp		X1_L, Y1_H
			
 
				-	vswp		X2_L, Y2_H
			
 
				-	vswp		X3_L, Y3_H
			
 
				-.else
			
 
				-	_xts64_precrypt_two	X0, r12, TMP0
			
 
				-	_xts64_precrypt_two	Y0, r12, TMP0
			
 
				-	_xts64_precrypt_two	X1, r12, TMP0
			
 
				-	_xts64_precrypt_two	Y1, r12, TMP0
			
 
				-	_xts64_precrypt_two	X2, r12, TMP0
			
 
				-	_xts64_precrypt_two	Y2, r12, TMP0
			
 
				-	_xts64_precrypt_two	X3, r12, TMP0
			
 
				-	_xts64_precrypt_two	Y3, r12, TMP0
			
 
				-	vuzp.32		Y0, X0
			
 
				-	vuzp.32		Y1, X1
			
 
				-	vuzp.32		Y2, X2
			
 
				-	vuzp.32		Y3, X3
			
 
				-.endif
			
 
				-
			
 
				-	// Do the cipher rounds
			
 
				-
			
 
				-	mov		r12, ROUND_KEYS
			
 
				-	mov		r6, NROUNDS
			
 
				-
			
 
				-.Lnext_round_\@:
			
 
				-.if \decrypting
			
 
				-.if \n == 64
			
 
				-	vld1.64		ROUND_KEY_L, [r12]
			
 
				-	sub		r12, #8
			
 
				-	vmov		ROUND_KEY_H, ROUND_KEY_L
			
 
				-.else
			
 
				-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
			
 
				-	sub		r12, #4
			
 
				-.endif
			
 
				-	_speck_unround_128bytes	\n
			
 
				-.else
			
 
				-.if \n == 64
			
 
				-	vld1.64		ROUND_KEY_L, [r12]!
			
 
				-	vmov		ROUND_KEY_H, ROUND_KEY_L
			
 
				-.else
			
 
				-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
			
 
				-.endif
			
 
				-	_speck_round_128bytes	\n
			
 
				-.endif
			
 
				-	subs		r6, r6, #1
			
 
				-	bne		.Lnext_round_\@
			
 
				-
			
 
				-	// Re-interleave the 'x' and 'y' elements of each block
			
 
				-.if \n == 64
			
 
				-	vswp		X0_L, Y0_H
			
 
				-	vswp		X1_L, Y1_H
			
 
				-	vswp		X2_L, Y2_H
			
 
				-	vswp		X3_L, Y3_H
			
 
				-.else
			
 
				-	vzip.32		Y0, X0
			
 
				-	vzip.32		Y1, X1
			
 
				-	vzip.32		Y2, X2
			
 
				-	vzip.32		Y3, X3
			
 
				-.endif
			
 
				-
			
 
				-	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
			
 
				-	mov		r12, sp
			
 
				-	vld1.8		{TMP0, TMP1}, [r12:128]!
			
 
				-	vld1.8		{TMP2, TMP3}, [r12:128]!
			
 
				-	veor		X0, TMP0
			
 
				-	veor		Y0, TMP1
			
 
				-	veor		X1, TMP2
			
 
				-	veor		Y1, TMP3
			
 
				-	vld1.8		{TMP0, TMP1}, [r12:128]!
			
 
				-	vld1.8		{TMP2, TMP3}, [r12:128]!
			
 
				-	veor		X2, TMP0
			
 
				-	veor		Y2, TMP1
			
 
				-	veor		X3, TMP2
			
 
				-	veor		Y3, TMP3
			
 
				-
			
 
				-	// Store the ciphertext in the destination buffer
			
 
				-	vst1.8		{X0, Y0}, [DST]!
			
 
				-	vst1.8		{X1, Y1}, [DST]!
			
 
				-	vst1.8		{X2, Y2}, [DST]!
			
 
				-	vst1.8		{X3, Y3}, [DST]!
			
 
				-
			
 
				-	// Continue if there are more 128-byte chunks remaining, else return
			
 
				-	subs		NBYTES, #128
			
 
				-	bne		.Lnext_128bytes_\@
			
 
				-
			
 
				-	// Store the next tweak
			
 
				-.if \n == 64
			
 
				-	vst1.8		{TWEAKV}, [TWEAK]
			
 
				-.else
			
 
				-	vst1.8		{TWEAKV_L}, [TWEAK]
			
 
				-.endif
			
 
				-
			
 
				-	mov		sp, r7
			
 
				-	pop		{r4-r7}
			
 
				-	bx		lr
			
 
				-.endm
			
 
				-
			
 
				-ENTRY(speck128_xts_encrypt_neon)
			
 
				-	_speck_xts_crypt	n=64, decrypting=0
			
 
				-ENDPROC(speck128_xts_encrypt_neon)
			
 
				-
			
 
				-ENTRY(speck128_xts_decrypt_neon)
			
 
				-	_speck_xts_crypt	n=64, decrypting=1
			
 
				-ENDPROC(speck128_xts_decrypt_neon)
			
 
				-
			
 
				-ENTRY(speck64_xts_encrypt_neon)
			
 
				-	_speck_xts_crypt	n=32, decrypting=0
			
 
				-ENDPROC(speck64_xts_encrypt_neon)
			
 
				-
			
 
				-ENTRY(speck64_xts_decrypt_neon)
			
 
				-	_speck_xts_crypt	n=32, decrypting=1
			
 
				-ENDPROC(speck64_xts_decrypt_neon)
			
--- a/arch/arm/crypto/speck-neon-glue.c
+++ b/arch/arm/crypto/speck-neon-glue.c
@@ -1,288 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
			
 
				- *
			
 
				- * Copyright (c) 2018 Google, Inc
			
 
				- *
			
 
				- * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
			
 
				- * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
			
 
				- * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
			
 
				- * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
			
 
				- * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
			
 
				- * OCB and PMAC"), represented as 0x1B.
			
 
				- */
			
 
				-
			
 
				-#include <asm/hwcap.h>
			
 
				-#include <asm/neon.h>
			
 
				-#include <asm/simd.h>
			
 
				-#include <crypto/algapi.h>
			
 
				-#include <crypto/gf128mul.h>
			
 
				-#include <crypto/internal/skcipher.h>
			
 
				-#include <crypto/speck.h>
			
 
				-#include <crypto/xts.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/module.h>
			
 
				-
			
 
				-/* The assembly functions only handle multiples of 128 bytes */
			
 
				-#define SPECK_NEON_CHUNK_SIZE	128
			
 
				-
			
 
				-/* Speck128 */
			
 
				-
			
 
				-struct speck128_xts_tfm_ctx {
			
 
				-	struct speck128_tfm_ctx main_key;
			
 
				-	struct speck128_tfm_ctx tweak_key;
			
 
				-};
			
 
				-
			
 
				-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
			
 
				-					  void *dst, const void *src,
			
 
				-					  unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
			
 
				-					  void *dst, const void *src,
			
 
				-					  unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
			
 
				-				     u8 *, const u8 *);
			
 
				-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
			
 
				-					  const void *, unsigned int, void *);
			
 
				-
			
 
				-static __always_inline int
			
 
				-__speck128_xts_crypt(struct skcipher_request *req,
			
 
				-		     speck128_crypt_one_t crypt_one,
			
 
				-		     speck128_xts_crypt_many_t crypt_many)
			
 
				-{
			
 
				-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct skcipher_walk walk;
			
 
				-	le128 tweak;
			
 
				-	int err;
			
 
				-
			
 
				-	err = skcipher_walk_virt(&walk, req, true);
			
 
				-
			
 
				-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
			
 
				-
			
 
				-	while (walk.nbytes > 0) {
			
 
				-		unsigned int nbytes = walk.nbytes;
			
 
				-		u8 *dst = walk.dst.virt.addr;
			
 
				-		const u8 *src = walk.src.virt.addr;
			
 
				-
			
 
				-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
			
 
				-			unsigned int count;
			
 
				-
			
 
				-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
			
 
				-			kernel_neon_begin();
			
 
				-			(*crypt_many)(ctx->main_key.round_keys,
			
 
				-				      ctx->main_key.nrounds,
			
 
				-				      dst, src, count, &tweak);
			
 
				-			kernel_neon_end();
			
 
				-			dst += count;
			
 
				-			src += count;
			
 
				-			nbytes -= count;
			
 
				-		}
			
 
				-
			
 
				-		/* Handle any remainder with generic code */
			
 
				-		while (nbytes >= sizeof(tweak)) {
			
 
				-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
			
 
				-			(*crypt_one)(&ctx->main_key, dst, dst);
			
 
				-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
			
 
				-			gf128mul_x_ble(&tweak, &tweak);
			
 
				-
			
 
				-			dst += sizeof(tweak);
			
 
				-			src += sizeof(tweak);
			
 
				-			nbytes -= sizeof(tweak);
			
 
				-		}
			
 
				-		err = skcipher_walk_done(&walk, nbytes);
			
 
				-	}
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int speck128_xts_encrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
			
 
				-				    speck128_xts_encrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck128_xts_decrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
			
 
				-				    speck128_xts_decrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				-			       unsigned int keylen)
			
 
				-{
			
 
				-	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	int err;
			
 
				-
			
 
				-	err = xts_verify_key(tfm, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	keylen /= 2;
			
 
				-
			
 
				-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
			
 
				-}
			
 
				-
			
 
				-/* Speck64 */
			
 
				-
			
 
				-struct speck64_xts_tfm_ctx {
			
 
				-	struct speck64_tfm_ctx main_key;
			
 
				-	struct speck64_tfm_ctx tweak_key;
			
 
				-};
			
 
				-
			
 
				-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
			
 
				-					 void *dst, const void *src,
			
 
				-					 unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
			
 
				-					 void *dst, const void *src,
			
 
				-					 unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
			
 
				-				    u8 *, const u8 *);
			
 
				-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
			
 
				-					 const void *, unsigned int, void *);
			
 
				-
			
 
				-static __always_inline int
			
 
				-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
			
 
				-		    speck64_xts_crypt_many_t crypt_many)
			
 
				-{
			
 
				-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct skcipher_walk walk;
			
 
				-	__le64 tweak;
			
 
				-	int err;
			
 
				-
			
 
				-	err = skcipher_walk_virt(&walk, req, true);
			
 
				-
			
 
				-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
			
 
				-
			
 
				-	while (walk.nbytes > 0) {
			
 
				-		unsigned int nbytes = walk.nbytes;
			
 
				-		u8 *dst = walk.dst.virt.addr;
			
 
				-		const u8 *src = walk.src.virt.addr;
			
 
				-
			
 
				-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
			
 
				-			unsigned int count;
			
 
				-
			
 
				-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
			
 
				-			kernel_neon_begin();
			
 
				-			(*crypt_many)(ctx->main_key.round_keys,
			
 
				-				      ctx->main_key.nrounds,
			
 
				-				      dst, src, count, &tweak);
			
 
				-			kernel_neon_end();
			
 
				-			dst += count;
			
 
				-			src += count;
			
 
				-			nbytes -= count;
			
 
				-		}
			
 
				-
			
 
				-		/* Handle any remainder with generic code */
			
 
				-		while (nbytes >= sizeof(tweak)) {
			
 
				-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
			
 
				-			(*crypt_one)(&ctx->main_key, dst, dst);
			
 
				-			*(__le64 *)dst ^= tweak;
			
 
				-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
			
 
				-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
			
 
				-					     0x1B : 0));
			
 
				-			dst += sizeof(tweak);
			
 
				-			src += sizeof(tweak);
			
 
				-			nbytes -= sizeof(tweak);
			
 
				-		}
			
 
				-		err = skcipher_walk_done(&walk, nbytes);
			
 
				-	}
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int speck64_xts_encrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
			
 
				-				   speck64_xts_encrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck64_xts_decrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
			
 
				-				   speck64_xts_decrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				-			      unsigned int keylen)
			
 
				-{
			
 
				-	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	int err;
			
 
				-
			
 
				-	err = xts_verify_key(tfm, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	keylen /= 2;
			
 
				-
			
 
				-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
			
 
				-}
			
 
				-
			
 
				-static struct skcipher_alg speck_algs[] = {
			
 
				-	{
			
 
				-		.base.cra_name		= "xts(speck128)",
			
 
				-		.base.cra_driver_name	= "xts-speck128-neon",
			
 
				-		.base.cra_priority	= 300,
			
 
				-		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
			
 
				-		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
			
 
				-		.base.cra_alignmask	= 7,
			
 
				-		.base.cra_module	= THIS_MODULE,
			
 
				-		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
			
 
				-		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
			
 
				-		.ivsize			= SPECK128_BLOCK_SIZE,
			
 
				-		.walksize		= SPECK_NEON_CHUNK_SIZE,
			
 
				-		.setkey			= speck128_xts_setkey,
			
 
				-		.encrypt		= speck128_xts_encrypt,
			
 
				-		.decrypt		= speck128_xts_decrypt,
			
 
				-	}, {
			
 
				-		.base.cra_name		= "xts(speck64)",
			
 
				-		.base.cra_driver_name	= "xts-speck64-neon",
			
 
				-		.base.cra_priority	= 300,
			
 
				-		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
			
 
				-		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
			
 
				-		.base.cra_alignmask	= 7,
			
 
				-		.base.cra_module	= THIS_MODULE,
			
 
				-		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
			
 
				-		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
			
 
				-		.ivsize			= SPECK64_BLOCK_SIZE,
			
 
				-		.walksize		= SPECK_NEON_CHUNK_SIZE,
			
 
				-		.setkey			= speck64_xts_setkey,
			
 
				-		.encrypt		= speck64_xts_encrypt,
			
 
				-		.decrypt		= speck64_xts_decrypt,
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static int __init speck_neon_module_init(void)
			
 
				-{
			
 
				-	if (!(elf_hwcap & HWCAP_NEON))
			
 
				-		return -ENODEV;
			
 
				-	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
			
 
				-}
			
 
				-
			
 
				-static void __exit speck_neon_module_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
			
 
				-}
			
 
				-
			
 
				-module_init(speck_neon_module_init);
			
 
				-module_exit(speck_neon_module_exit);
			
 
				-
			
 
				-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
			
 
				-MODULE_ALIAS_CRYPTO("xts(speck128)");
			
 
				-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
			
 
				-MODULE_ALIAS_CRYPTO("xts(speck64)");
			
 
				-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
			
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -698,6 +698,7 @@ CONFIG_MEMTEST=y
 
				 CONFIG_SECURITY=y
			
 
				 CONFIG_CRYPTO_ECHAINIV=y
			
 
				 CONFIG_CRYPTO_ANSI_CPRNG=y
			
 
				+CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=y
			
 
				 CONFIG_ARM64_CRYPTO=y
			
 
				 CONFIG_CRYPTO_SHA1_ARM64_CE=y
			
 
				 CONFIG_CRYPTO_SHA2_ARM64_CE=y
			
@@ -706,7 +707,6 @@ CONFIG_CRYPTO_SHA3_ARM64=m
 
				 CONFIG_CRYPTO_SM3_ARM64_CE=m
			
 
				 CONFIG_CRYPTO_GHASH_ARM64_CE=y
			
 
				 CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
			
 
				-CONFIG_CRYPTO_CRC32_ARM64_CE=m
			
 
				 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
			
 
				 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
			
 
				 CONFIG_CRYPTO_CHACHA20_NEON=m
			
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -66,11 +66,6 @@ config CRYPTO_CRCT10DIF_ARM64_CE
 
				 	depends on KERNEL_MODE_NEON && CRC_T10DIF
			
 
				 	select CRYPTO_HASH
			
 
				 
			
 
				-config CRYPTO_CRC32_ARM64_CE
			
 
				-	tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
			
 
				-	depends on CRC32
			
 
				-	select CRYPTO_HASH
			
 
				-
			
 
				 config CRYPTO_AES_ARM64
			
 
				 	tristate "AES core cipher using scalar instructions"
			
 
				 	select CRYPTO_AES
			
@@ -119,10 +114,4 @@ config CRYPTO_AES_ARM64_BS
 
				 	select CRYPTO_AES_ARM64
			
 
				 	select CRYPTO_SIMD
			
 
				 
			
 
				-config CRYPTO_SPECK_NEON
			
 
				-	tristate "NEON accelerated Speck cipher algorithms"
			
 
				-	depends on KERNEL_MODE_NEON
			
 
				-	select CRYPTO_BLKCIPHER
			
 
				-	select CRYPTO_SPECK
			
 
				-
			
 
				 endif
			
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,9 +32,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
				 obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
			
 
				 crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
			
 
				 
			
 
				-obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
			
 
				-crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
			
 
				-
			
 
				 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
			
 
				 aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
			
 
				 
			
@@ -56,9 +53,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 
				 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
			
 
				 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
			
 
				 
			
 
				-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
			
 
				-speck-neon-y := speck-neon-core.o speck-neon-glue.o
			
 
				-
			
 
				 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
			
 
				 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
			
 
				 
			
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -17,6 +17,11 @@
 
				 
			
 
				 	.arch		armv8-a+crypto
			
 
				 
			
 
				+	xtsmask		.req	v16
			
 
				+
			
 
				+	.macro		xts_reload_mask, tmp
			
 
				+	.endm
			
 
				+
			
 
				 	/* preload all round keys */
			
 
				 	.macro		load_round_keys, rounds, rk
			
 
				 	cmp		\rounds, #12
			
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
 
				 #include <crypto/internal/hash.h>
			
 
				 #include <crypto/internal/simd.h>
			
 
				 #include <crypto/internal/skcipher.h>
			
 
				+#include <crypto/scatterwalk.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/cpufeature.h>
			
 
				 #include <crypto/xts.h>
			
@@ -31,6 +32,8 @@
 
				 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
			
 
				 #define aes_cbc_encrypt		ce_aes_cbc_encrypt
			
 
				 #define aes_cbc_decrypt		ce_aes_cbc_decrypt
			
 
				+#define aes_cbc_cts_encrypt	ce_aes_cbc_cts_encrypt
			
 
				+#define aes_cbc_cts_decrypt	ce_aes_cbc_cts_decrypt
			
 
				 #define aes_ctr_encrypt		ce_aes_ctr_encrypt
			
 
				 #define aes_xts_encrypt		ce_aes_xts_encrypt
			
 
				 #define aes_xts_decrypt		ce_aes_xts_decrypt
			
@@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 
				 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
			
 
				 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
			
 
				 #define aes_cbc_decrypt		neon_aes_cbc_decrypt
			
 
				+#define aes_cbc_cts_encrypt	neon_aes_cbc_cts_encrypt
			
 
				+#define aes_cbc_cts_decrypt	neon_aes_cbc_cts_decrypt
			
 
				 #define aes_ctr_encrypt		neon_aes_ctr_encrypt
			
 
				 #define aes_xts_encrypt		neon_aes_xts_encrypt
			
 
				 #define aes_xts_decrypt		neon_aes_xts_decrypt
			
@@ -63,30 +68,41 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 
				 MODULE_LICENSE("GPL v2");
			
 
				 
			
 
				 /* defined in aes-modes.S */
			
 
				-asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				 				int rounds, int blocks);
			
 
				-asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				 				int rounds, int blocks);
			
 
				 
			
 
				-asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				 				int rounds, int blocks, u8 iv[]);
			
 
				-asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				 				int rounds, int blocks, u8 iv[]);
			
 
				 
			
 
				-asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				+				int rounds, int bytes, u8 const iv[]);
			
 
				+asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				+				int rounds, int bytes, u8 const iv[]);
			
 
				+
			
 
				+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				 				int rounds, int blocks, u8 ctr[]);
			
 
				 
			
 
				-asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
			
 
				-				int rounds, int blocks, u8 const rk2[], u8 iv[],
			
 
				+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
			
 
				+				int rounds, int blocks, u32 const rk2[], u8 iv[],
			
 
				 				int first);
			
 
				-asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
			
 
				-				int rounds, int blocks, u8 const rk2[], u8 iv[],
			
 
				+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
			
 
				+				int rounds, int blocks, u32 const rk2[], u8 iv[],
			
 
				 				int first);
			
 
				 
			
 
				 asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
			
 
				 			       int blocks, u8 dg[], int enc_before,
			
 
				 			       int enc_after);
			
 
				 
			
 
				+struct cts_cbc_req_ctx {
			
 
				+	struct scatterlist sg_src[2];
			
 
				+	struct scatterlist sg_dst[2];
			
 
				+	struct skcipher_request subreq;
			
 
				+};
			
 
				+
			
 
				 struct crypto_aes_xts_ctx {
			
 
				 	struct crypto_aes_ctx key1;
			
 
				 	struct crypto_aes_ctx __aligned(8) key2;
			
@@ -142,7 +158,7 @@ static int ecb_encrypt(struct skcipher_request *req)
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key_enc, rounds, blocks);
			
 
				+				ctx->key_enc, rounds, blocks);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
@@ -162,7 +178,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key_dec, rounds, blocks);
			
 
				+				ctx->key_dec, rounds, blocks);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
@@ -182,7 +198,7 @@ static int cbc_encrypt(struct skcipher_request *req)
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
			
 
				+				ctx->key_enc, rounds, blocks, walk.iv);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
@@ -202,13 +218,149 @@ static int cbc_decrypt(struct skcipher_request *req)
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
			
 
				+				ctx->key_dec, rounds, blocks, walk.iv);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int cts_cbc_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
			
 
				+	int err, rounds = 6 + ctx->key_length / 4;
			
 
				+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
			
 
				+	struct scatterlist *src = req->src, *dst = req->dst;
			
 
				+	struct skcipher_walk walk;
			
 
				+
			
 
				+	skcipher_request_set_tfm(&rctx->subreq, tfm);
			
 
				+
			
 
				+	if (req->cryptlen <= AES_BLOCK_SIZE) {
			
 
				+		if (req->cryptlen < AES_BLOCK_SIZE)
			
 
				+			return -EINVAL;
			
 
				+		cbc_blocks = 1;
			
 
				+	}
			
 
				+
			
 
				+	if (cbc_blocks > 0) {
			
 
				+		unsigned int blocks;
			
 
				+
			
 
				+		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
			
 
				+					   cbc_blocks * AES_BLOCK_SIZE,
			
 
				+					   req->iv);
			
 
				+
			
 
				+		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
			
 
				+
			
 
				+		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				+			kernel_neon_begin();
			
 
				+			aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				+					ctx->key_enc, rounds, blocks, walk.iv);
			
 
				+			kernel_neon_end();
			
 
				+			err = skcipher_walk_done(&walk,
			
 
				+						 walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		}
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+
			
 
				+		if (req->cryptlen == AES_BLOCK_SIZE)
			
 
				+			return 0;
			
 
				+
			
 
				+		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
			
 
				+					     rctx->subreq.cryptlen);
			
 
				+		if (req->dst != req->src)
			
 
				+			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
			
 
				+					       rctx->subreq.cryptlen);
			
 
				+	}
			
 
				+
			
 
				+	/* handle ciphertext stealing */
			
 
				+	skcipher_request_set_crypt(&rctx->subreq, src, dst,
			
 
				+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
			
 
				+				   req->iv);
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				+			    ctx->key_enc, rounds, walk.nbytes, walk.iv);
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return skcipher_walk_done(&walk, 0);
			
 
				+}
			
 
				+
			
 
				+static int cts_cbc_decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
			
 
				+	int err, rounds = 6 + ctx->key_length / 4;
			
 
				+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
			
 
				+	struct scatterlist *src = req->src, *dst = req->dst;
			
 
				+	struct skcipher_walk walk;
			
 
				+
			
 
				+	skcipher_request_set_tfm(&rctx->subreq, tfm);
			
 
				+
			
 
				+	if (req->cryptlen <= AES_BLOCK_SIZE) {
			
 
				+		if (req->cryptlen < AES_BLOCK_SIZE)
			
 
				+			return -EINVAL;
			
 
				+		cbc_blocks = 1;
			
 
				+	}
			
 
				+
			
 
				+	if (cbc_blocks > 0) {
			
 
				+		unsigned int blocks;
			
 
				+
			
 
				+		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
			
 
				+					   cbc_blocks * AES_BLOCK_SIZE,
			
 
				+					   req->iv);
			
 
				+
			
 
				+		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
			
 
				+
			
 
				+		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				+			kernel_neon_begin();
			
 
				+			aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				+					ctx->key_dec, rounds, blocks, walk.iv);
			
 
				+			kernel_neon_end();
			
 
				+			err = skcipher_walk_done(&walk,
			
 
				+						 walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		}
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+
			
 
				+		if (req->cryptlen == AES_BLOCK_SIZE)
			
 
				+			return 0;
			
 
				+
			
 
				+		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
			
 
				+					     rctx->subreq.cryptlen);
			
 
				+		if (req->dst != req->src)
			
 
				+			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
			
 
				+					       rctx->subreq.cryptlen);
			
 
				+	}
			
 
				+
			
 
				+	/* handle ciphertext stealing */
			
 
				+	skcipher_request_set_crypt(&rctx->subreq, src, dst,
			
 
				+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
			
 
				+				   req->iv);
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				+			    ctx->key_dec, rounds, walk.nbytes, walk.iv);
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return skcipher_walk_done(&walk, 0);
			
 
				+}
			
 
				+
			
 
				 static int ctr_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
@@ -222,7 +374,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
			
 
				+				ctx->key_enc, rounds, blocks, walk.iv);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
@@ -238,7 +390,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
				 		blocks = -1;
			
 
				 
			
 
				 		kernel_neon_begin();
			
 
				-		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
			
 
				+		aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
			
 
				 				blocks, walk.iv);
			
 
				 		kernel_neon_end();
			
 
				 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
			
@@ -272,8 +424,8 @@ static int xts_encrypt(struct skcipher_request *req)
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key1.key_enc, rounds, blocks,
			
 
				-				(u8 *)ctx->key2.key_enc, walk.iv, first);
			
 
				+				ctx->key1.key_enc, rounds, blocks,
			
 
				+				ctx->key2.key_enc, walk.iv, first);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
@@ -294,8 +446,8 @@ static int xts_decrypt(struct skcipher_request *req)
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		kernel_neon_begin();
			
 
				 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				-				(u8 *)ctx->key1.key_dec, rounds, blocks,
			
 
				-				(u8 *)ctx->key2.key_enc, walk.iv, first);
			
 
				+				ctx->key1.key_dec, rounds, blocks,
			
 
				+				ctx->key2.key_enc, walk.iv, first);
			
 
				 		kernel_neon_end();
			
 
				 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
@@ -334,6 +486,24 @@ static struct skcipher_alg aes_algs[] = { {
 
				 	.setkey		= skcipher_aes_setkey,
			
 
				 	.encrypt	= cbc_encrypt,
			
 
				 	.decrypt	= cbc_decrypt,
			
 
				+}, {
			
 
				+	.base = {
			
 
				+		.cra_name		= "__cts(cbc(aes))",
			
 
				+		.cra_driver_name	= "__cts-cbc-aes-" MODE,
			
 
				+		.cra_priority		= PRIO,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				+	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.walksize	= 2 * AES_BLOCK_SIZE,
			
 
				+	.setkey		= skcipher_aes_setkey,
			
 
				+	.encrypt	= cts_cbc_encrypt,
			
 
				+	.decrypt	= cts_cbc_decrypt,
			
 
				+	.init		= cts_cbc_init_tfm,
			
 
				 }, {
			
 
				 	.base = {
			
 
				 		.cra_name		= "__ctr(aes)",
			
@@ -412,7 +582,6 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
				 {
			
 
				 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
			
 
				 	be128 *consts = (be128 *)ctx->consts;
			
 
				-	u8 *rk = (u8 *)ctx->key.key_enc;
			
 
				 	int rounds = 6 + key_len / 4;
			
 
				 	int err;
			
 
				 
			
@@ -422,7 +591,8 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
				 
			
 
				 	/* encrypt the zero vector */
			
 
				 	kernel_neon_begin();
			
 
				-	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
			
 
				+	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
			
 
				+			rounds, 1);
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	cmac_gf128_mul_by_x(consts, consts);
			
@@ -441,7 +611,6 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
				 	};
			
 
				 
			
 
				 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
			
 
				-	u8 *rk = (u8 *)ctx->key.key_enc;
			
 
				 	int rounds = 6 + key_len / 4;
			
 
				 	u8 key[AES_BLOCK_SIZE];
			
 
				 	int err;
			
@@ -451,8 +620,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
				 		return err;
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				-	aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
			
 
				-	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
			
 
				+	aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
			
 
				+	aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return cbcmac_setkey(tfm, key, sizeof(key));
			
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
 
				 	.align		4
			
 
				 
			
 
				 aes_encrypt_block4x:
			
 
				-	encrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
			
 
				+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
			
 
				 	ret
			
 
				 ENDPROC(aes_encrypt_block4x)
			
 
				 
			
 
				 aes_decrypt_block4x:
			
 
				-	decrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
			
 
				+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
			
 
				 	ret
			
 
				 ENDPROC(aes_decrypt_block4x)
			
 
				 
			
@@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
 
				 	 */
			
 
				 
			
 
				 AES_ENTRY(aes_ecb_encrypt)
			
 
				-	frame_push	5
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				 
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-
			
 
				-.Lecbencrestart:
			
 
				-	enc_prepare	w22, x21, x5
			
 
				+	enc_prepare	w3, x2, x5
			
 
				 
			
 
				 .LecbencloopNx:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lecbenc1x
			
 
				-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
			
 
				+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
			
 
				 	bl		aes_encrypt_block4x
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				-	cond_yield_neon	.Lecbencrestart
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	b		.LecbencloopNx
			
 
				 .Lecbenc1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lecbencout
			
 
				 .Lecbencloop:
			
 
				-	ld1		{v0.16b}, [x20], #16		/* get next pt block */
			
 
				-	encrypt_block	v0, w22, x21, x5, w6
			
 
				-	st1		{v0.16b}, [x19], #16
			
 
				-	subs		w23, w23, #1
			
 
				+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
			
 
				+	encrypt_block	v0, w3, x2, x5, w6
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	subs		w4, w4, #1
			
 
				 	bne		.Lecbencloop
			
 
				 .Lecbencout:
			
 
				-	frame_pop
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				 	ret
			
 
				 AES_ENDPROC(aes_ecb_encrypt)
			
 
				 
			
 
				 
			
 
				 AES_ENTRY(aes_ecb_decrypt)
			
 
				-	frame_push	5
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				 
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-
			
 
				-.Lecbdecrestart:
			
 
				-	dec_prepare	w22, x21, x5
			
 
				+	dec_prepare	w3, x2, x5
			
 
				 
			
 
				 .LecbdecloopNx:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lecbdec1x
			
 
				-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
			
 
				+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
			
 
				 	bl		aes_decrypt_block4x
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				-	cond_yield_neon	.Lecbdecrestart
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	b		.LecbdecloopNx
			
 
				 .Lecbdec1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lecbdecout
			
 
				 .Lecbdecloop:
			
 
				-	ld1		{v0.16b}, [x20], #16		/* get next ct block */
			
 
				-	decrypt_block	v0, w22, x21, x5, w6
			
 
				-	st1		{v0.16b}, [x19], #16
			
 
				-	subs		w23, w23, #1
			
 
				+	ld1		{v0.16b}, [x1], #16		/* get next ct block */
			
 
				+	decrypt_block	v0, w3, x2, x5, w6
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	subs		w4, w4, #1
			
 
				 	bne		.Lecbdecloop
			
 
				 .Lecbdecout:
			
 
				-	frame_pop
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				 	ret
			
 
				 AES_ENDPROC(aes_ecb_decrypt)
			
 
				 
			
@@ -108,162 +94,211 @@ AES_ENDPROC(aes_ecb_decrypt)
 
				 	 */
			
 
				 
			
 
				 AES_ENTRY(aes_cbc_encrypt)
			
 
				-	frame_push	6
			
 
				-
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-	mov		x24, x5
			
 
				-
			
 
				-.Lcbcencrestart:
			
 
				-	ld1		{v4.16b}, [x24]			/* get iv */
			
 
				-	enc_prepare	w22, x21, x6
			
 
				+	ld1		{v4.16b}, [x5]			/* get iv */
			
 
				+	enc_prepare	w3, x2, x6
			
 
				 
			
 
				 .Lcbcencloop4x:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lcbcenc1x
			
 
				-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
			
 
				+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
			
 
				 	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
			
 
				-	encrypt_block	v0, w22, x21, x6, w7
			
 
				+	encrypt_block	v0, w3, x2, x6, w7
			
 
				 	eor		v1.16b, v1.16b, v0.16b
			
 
				-	encrypt_block	v1, w22, x21, x6, w7
			
 
				+	encrypt_block	v1, w3, x2, x6, w7
			
 
				 	eor		v2.16b, v2.16b, v1.16b
			
 
				-	encrypt_block	v2, w22, x21, x6, w7
			
 
				+	encrypt_block	v2, w3, x2, x6, w7
			
 
				 	eor		v3.16b, v3.16b, v2.16b
			
 
				-	encrypt_block	v3, w22, x21, x6, w7
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				+	encrypt_block	v3, w3, x2, x6, w7
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	mov		v4.16b, v3.16b
			
 
				-	st1		{v4.16b}, [x24]			/* return iv */
			
 
				-	cond_yield_neon	.Lcbcencrestart
			
 
				 	b		.Lcbcencloop4x
			
 
				 .Lcbcenc1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lcbcencout
			
 
				 .Lcbcencloop:
			
 
				-	ld1		{v0.16b}, [x20], #16		/* get next pt block */
			
 
				+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
			
 
				 	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
			
 
				-	encrypt_block	v4, w22, x21, x6, w7
			
 
				-	st1		{v4.16b}, [x19], #16
			
 
				-	subs		w23, w23, #1
			
 
				+	encrypt_block	v4, w3, x2, x6, w7
			
 
				+	st1		{v4.16b}, [x0], #16
			
 
				+	subs		w4, w4, #1
			
 
				 	bne		.Lcbcencloop
			
 
				 .Lcbcencout:
			
 
				-	st1		{v4.16b}, [x24]			/* return iv */
			
 
				-	frame_pop
			
 
				+	st1		{v4.16b}, [x5]			/* return iv */
			
 
				 	ret
			
 
				 AES_ENDPROC(aes_cbc_encrypt)
			
 
				 
			
 
				 
			
 
				 AES_ENTRY(aes_cbc_decrypt)
			
 
				-	frame_push	6
			
 
				-
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-	mov		x24, x5
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				 
			
 
				-.Lcbcdecrestart:
			
 
				-	ld1		{v7.16b}, [x24]			/* get iv */
			
 
				-	dec_prepare	w22, x21, x6
			
 
				+	ld1		{v7.16b}, [x5]			/* get iv */
			
 
				+	dec_prepare	w3, x2, x6
			
 
				 
			
 
				 .LcbcdecloopNx:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lcbcdec1x
			
 
				-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
			
 
				+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
			
 
				 	mov		v4.16b, v0.16b
			
 
				 	mov		v5.16b, v1.16b
			
 
				 	mov		v6.16b, v2.16b
			
 
				 	bl		aes_decrypt_block4x
			
 
				-	sub		x20, x20, #16
			
 
				+	sub		x1, x1, #16
			
 
				 	eor		v0.16b, v0.16b, v7.16b
			
 
				 	eor		v1.16b, v1.16b, v4.16b
			
 
				-	ld1		{v7.16b}, [x20], #16		/* reload 1 ct block */
			
 
				+	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
			
 
				 	eor		v2.16b, v2.16b, v5.16b
			
 
				 	eor		v3.16b, v3.16b, v6.16b
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				-	st1		{v7.16b}, [x24]			/* return iv */
			
 
				-	cond_yield_neon	.Lcbcdecrestart
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	b		.LcbcdecloopNx
			
 
				 .Lcbcdec1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lcbcdecout
			
 
				 .Lcbcdecloop:
			
 
				-	ld1		{v1.16b}, [x20], #16		/* get next ct block */
			
 
				+	ld1		{v1.16b}, [x1], #16		/* get next ct block */
			
 
				 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
			
 
				-	decrypt_block	v0, w22, x21, x6, w7
			
 
				+	decrypt_block	v0, w3, x2, x6, w7
			
 
				 	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
			
 
				 	mov		v7.16b, v1.16b			/* ct is next iv */
			
 
				-	st1		{v0.16b}, [x19], #16
			
 
				-	subs		w23, w23, #1
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	subs		w4, w4, #1
			
 
				 	bne		.Lcbcdecloop
			
 
				 .Lcbcdecout:
			
 
				-	st1		{v7.16b}, [x24]			/* return iv */
			
 
				-	frame_pop
			
 
				+	st1		{v7.16b}, [x5]			/* return iv */
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				 	ret
			
 
				 AES_ENDPROC(aes_cbc_decrypt)
			
 
				 
			
 
				 
			
 
				+	/*
			
 
				+	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				+	 *		       int rounds, int bytes, u8 const iv[])
			
 
				+	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
			
 
				+	 *		       int rounds, int bytes, u8 const iv[])
			
 
				+	 */
			
 
				+
			
 
				+AES_ENTRY(aes_cbc_cts_encrypt)
			
 
				+	adr_l		x8, .Lcts_permute_table
			
 
				+	sub		x4, x4, #16
			
 
				+	add		x9, x8, #32
			
 
				+	add		x8, x8, x4
			
 
				+	sub		x9, x9, x4
			
 
				+	ld1		{v3.16b}, [x8]
			
 
				+	ld1		{v4.16b}, [x9]
			
 
				+
			
 
				+	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
			
 
				+	ld1		{v1.16b}, [x1]
			
 
				+
			
 
				+	ld1		{v5.16b}, [x5]			/* get iv */
			
 
				+	enc_prepare	w3, x2, x6
			
 
				+
			
 
				+	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
			
 
				+	tbl		v1.16b, {v1.16b}, v4.16b
			
 
				+	encrypt_block	v0, w3, x2, x6, w7
			
 
				+
			
 
				+	eor		v1.16b, v1.16b, v0.16b
			
 
				+	tbl		v0.16b, {v0.16b}, v3.16b
			
 
				+	encrypt_block	v1, w3, x2, x6, w7
			
 
				+
			
 
				+	add		x4, x0, x4
			
 
				+	st1		{v0.16b}, [x4]			/* overlapping stores */
			
 
				+	st1		{v1.16b}, [x0]
			
 
				+	ret
			
 
				+AES_ENDPROC(aes_cbc_cts_encrypt)
			
 
				+
			
 
				+AES_ENTRY(aes_cbc_cts_decrypt)
			
 
				+	adr_l		x8, .Lcts_permute_table
			
 
				+	sub		x4, x4, #16
			
 
				+	add		x9, x8, #32
			
 
				+	add		x8, x8, x4
			
 
				+	sub		x9, x9, x4
			
 
				+	ld1		{v3.16b}, [x8]
			
 
				+	ld1		{v4.16b}, [x9]
			
 
				+
			
 
				+	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
			
 
				+	ld1		{v1.16b}, [x1]
			
 
				+
			
 
				+	ld1		{v5.16b}, [x5]			/* get iv */
			
 
				+	dec_prepare	w3, x2, x6
			
 
				+
			
 
				+	tbl		v2.16b, {v1.16b}, v4.16b
			
 
				+	decrypt_block	v0, w3, x2, x6, w7
			
 
				+	eor		v2.16b, v2.16b, v0.16b
			
 
				+
			
 
				+	tbx		v0.16b, {v1.16b}, v4.16b
			
 
				+	tbl		v2.16b, {v2.16b}, v3.16b
			
 
				+	decrypt_block	v0, w3, x2, x6, w7
			
 
				+	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
			
 
				+
			
 
				+	add		x4, x0, x4
			
 
				+	st1		{v2.16b}, [x4]			/* overlapping stores */
			
 
				+	st1		{v0.16b}, [x0]
			
 
				+	ret
			
 
				+AES_ENDPROC(aes_cbc_cts_decrypt)
			
 
				+
			
 
				+	.section	".rodata", "a"
			
 
				+	.align		6
			
 
				+.Lcts_permute_table:
			
 
				+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
			
 
				+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
			
 
				+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
			
 
				+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
			
 
				+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
			
 
				+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
			
 
				+	.previous
			
 
				+
			
 
				+
			
 
				 	/*
			
 
				 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
			
 
				 	 *		   int blocks, u8 ctr[])
			
 
				 	 */
			
 
				 
			
 
				 AES_ENTRY(aes_ctr_encrypt)
			
 
				-	frame_push	6
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				 
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-	mov		x24, x5
			
 
				-
			
 
				-.Lctrrestart:
			
 
				-	enc_prepare	w22, x21, x6
			
 
				-	ld1		{v4.16b}, [x24]
			
 
				+	enc_prepare	w3, x2, x6
			
 
				+	ld1		{v4.16b}, [x5]
			
 
				 
			
 
				 	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
			
 
				 	rev		x6, x6
			
 
				+	cmn		w6, w4			/* 32 bit overflow? */
			
 
				+	bcs		.Lctrloop
			
 
				 .LctrloopNx:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lctr1x
			
 
				-	cmn		w6, #4			/* 32 bit overflow? */
			
 
				-	bcs		.Lctr1x
			
 
				-	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
			
 
				-	dup		v7.4s, w6
			
 
				+	add		w7, w6, #1
			
 
				 	mov		v0.16b, v4.16b
			
 
				-	add		v7.4s, v7.4s, v8.4s
			
 
				+	add		w8, w6, #2
			
 
				 	mov		v1.16b, v4.16b
			
 
				-	rev32		v8.16b, v7.16b
			
 
				+	add		w9, w6, #3
			
 
				 	mov		v2.16b, v4.16b
			
 
				+	rev		w7, w7
			
 
				 	mov		v3.16b, v4.16b
			
 
				-	mov		v1.s[3], v8.s[0]
			
 
				-	mov		v2.s[3], v8.s[1]
			
 
				-	mov		v3.s[3], v8.s[2]
			
 
				-	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
			
 
				+	rev		w8, w8
			
 
				+	mov		v1.s[3], w7
			
 
				+	rev		w9, w9
			
 
				+	mov		v2.s[3], w8
			
 
				+	mov		v3.s[3], w9
			
 
				+	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
			
 
				 	bl		aes_encrypt_block4x
			
 
				 	eor		v0.16b, v5.16b, v0.16b
			
 
				-	ld1		{v5.16b}, [x20], #16		/* get 1 input block  */
			
 
				+	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
			
 
				 	eor		v1.16b, v6.16b, v1.16b
			
 
				 	eor		v2.16b, v7.16b, v2.16b
			
 
				 	eor		v3.16b, v5.16b, v3.16b
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	add		x6, x6, #4
			
 
				 	rev		x7, x6
			
 
				 	ins		v4.d[1], x7
			
 
				-	cbz		w23, .Lctrout
			
 
				-	st1		{v4.16b}, [x24]		/* return next CTR value */
			
 
				-	cond_yield_neon	.Lctrrestart
			
 
				+	cbz		w4, .Lctrout
			
 
				 	b		.LctrloopNx
			
 
				 .Lctr1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lctrout
			
 
				 .Lctrloop:
			
 
				 	mov		v0.16b, v4.16b
			
 
				-	encrypt_block	v0, w22, x21, x8, w7
			
 
				+	encrypt_block	v0, w3, x2, x8, w7
			
 
				 
			
 
				 	adds		x6, x6, #1		/* increment BE ctr */
			
 
				 	rev		x7, x6
			
@@ -271,22 +306,22 @@ AES_ENTRY(aes_ctr_encrypt)
 
				 	bcs		.Lctrcarry		/* overflow? */
			
 
				 
			
 
				 .Lctrcarrydone:
			
 
				-	subs		w23, w23, #1
			
 
				+	subs		w4, w4, #1
			
 
				 	bmi		.Lctrtailblock		/* blocks <0 means tail block */
			
 
				-	ld1		{v3.16b}, [x20], #16
			
 
				+	ld1		{v3.16b}, [x1], #16
			
 
				 	eor		v3.16b, v0.16b, v3.16b
			
 
				-	st1		{v3.16b}, [x19], #16
			
 
				+	st1		{v3.16b}, [x0], #16
			
 
				 	bne		.Lctrloop
			
 
				 
			
 
				 .Lctrout:
			
 
				-	st1		{v4.16b}, [x24]		/* return next CTR value */
			
 
				-.Lctrret:
			
 
				-	frame_pop
			
 
				+	st1		{v4.16b}, [x5]		/* return next CTR value */
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				 	ret
			
 
				 
			
 
				 .Lctrtailblock:
			
 
				-	st1		{v0.16b}, [x19]
			
 
				-	b		.Lctrret
			
 
				+	st1		{v0.16b}, [x0]
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				+	ret
			
 
				 
			
 
				 .Lctrcarry:
			
 
				 	umov		x7, v4.d[0]		/* load upper word of ctr  */
			
@@ -296,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
 
				 	ins		v4.d[0], x7
			
 
				 	b		.Lctrcarrydone
			
 
				 AES_ENDPROC(aes_ctr_encrypt)
			
 
				-	.ltorg
			
 
				 
			
 
				 
			
 
				 	/*
			
@@ -306,150 +340,132 @@ AES_ENDPROC(aes_ctr_encrypt)
 
				 	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
			
 
				 	 */
			
 
				 
			
 
				-	.macro		next_tweak, out, in, const, tmp
			
 
				+	.macro		next_tweak, out, in, tmp
			
 
				 	sshr		\tmp\().2d,  \in\().2d,   #63
			
 
				-	and		\tmp\().16b, \tmp\().16b, \const\().16b
			
 
				+	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
			
 
				 	add		\out\().2d,  \in\().2d,   \in\().2d
			
 
				 	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
			
 
				 	eor		\out\().16b, \out\().16b, \tmp\().16b
			
 
				 	.endm
			
 
				 
			
 
				-.Lxts_mul_x:
			
 
				-CPU_LE(	.quad		1, 0x87		)
			
 
				-CPU_BE(	.quad		0x87, 1		)
			
 
				+	.macro		xts_load_mask, tmp
			
 
				+	movi		xtsmask.2s, #0x1
			
 
				+	movi		\tmp\().2s, #0x87
			
 
				+	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
			
 
				+	.endm
			
 
				 
			
 
				 AES_ENTRY(aes_xts_encrypt)
			
 
				-	frame_push	6
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				 
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-	mov		x24, x6
			
 
				-
			
 
				-	ld1		{v4.16b}, [x24]
			
 
				+	ld1		{v4.16b}, [x6]
			
 
				+	xts_load_mask	v8
			
 
				 	cbz		w7, .Lxtsencnotfirst
			
 
				 
			
 
				 	enc_prepare	w3, x5, x8
			
 
				 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
			
 
				 	enc_switch_key	w3, x2, x8
			
 
				-	ldr		q7, .Lxts_mul_x
			
 
				 	b		.LxtsencNx
			
 
				 
			
 
				-.Lxtsencrestart:
			
 
				-	ld1		{v4.16b}, [x24]
			
 
				 .Lxtsencnotfirst:
			
 
				-	enc_prepare	w22, x21, x8
			
 
				+	enc_prepare	w3, x2, x8
			
 
				 .LxtsencloopNx:
			
 
				-	ldr		q7, .Lxts_mul_x
			
 
				-	next_tweak	v4, v4, v7, v8
			
 
				+	next_tweak	v4, v4, v8
			
 
				 .LxtsencNx:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lxtsenc1x
			
 
				-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
			
 
				-	next_tweak	v5, v4, v7, v8
			
 
				+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
			
 
				+	next_tweak	v5, v4, v8
			
 
				 	eor		v0.16b, v0.16b, v4.16b
			
 
				-	next_tweak	v6, v5, v7, v8
			
 
				+	next_tweak	v6, v5, v8
			
 
				 	eor		v1.16b, v1.16b, v5.16b
			
 
				 	eor		v2.16b, v2.16b, v6.16b
			
 
				-	next_tweak	v7, v6, v7, v8
			
 
				+	next_tweak	v7, v6, v8
			
 
				 	eor		v3.16b, v3.16b, v7.16b
			
 
				 	bl		aes_encrypt_block4x
			
 
				 	eor		v3.16b, v3.16b, v7.16b
			
 
				 	eor		v0.16b, v0.16b, v4.16b
			
 
				 	eor		v1.16b, v1.16b, v5.16b
			
 
				 	eor		v2.16b, v2.16b, v6.16b
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	mov		v4.16b, v7.16b
			
 
				-	cbz		w23, .Lxtsencout
			
 
				-	st1		{v4.16b}, [x24]
			
 
				-	cond_yield_neon	.Lxtsencrestart
			
 
				+	cbz		w4, .Lxtsencout
			
 
				+	xts_reload_mask	v8
			
 
				 	b		.LxtsencloopNx
			
 
				 .Lxtsenc1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lxtsencout
			
 
				 .Lxtsencloop:
			
 
				-	ld1		{v1.16b}, [x20], #16
			
 
				+	ld1		{v1.16b}, [x1], #16
			
 
				 	eor		v0.16b, v1.16b, v4.16b
			
 
				-	encrypt_block	v0, w22, x21, x8, w7
			
 
				+	encrypt_block	v0, w3, x2, x8, w7
			
 
				 	eor		v0.16b, v0.16b, v4.16b
			
 
				-	st1		{v0.16b}, [x19], #16
			
 
				-	subs		w23, w23, #1
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	subs		w4, w4, #1
			
 
				 	beq		.Lxtsencout
			
 
				-	next_tweak	v4, v4, v7, v8
			
 
				+	next_tweak	v4, v4, v8
			
 
				 	b		.Lxtsencloop
			
 
				 .Lxtsencout:
			
 
				-	st1		{v4.16b}, [x24]
			
 
				-	frame_pop
			
 
				+	st1		{v4.16b}, [x6]
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				 	ret
			
 
				 AES_ENDPROC(aes_xts_encrypt)
			
 
				 
			
 
				 
			
 
				 AES_ENTRY(aes_xts_decrypt)
			
 
				-	frame_push	6
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				 
			
 
				-	mov		x19, x0
			
 
				-	mov		x20, x1
			
 
				-	mov		x21, x2
			
 
				-	mov		x22, x3
			
 
				-	mov		x23, x4
			
 
				-	mov		x24, x6
			
 
				-
			
 
				-	ld1		{v4.16b}, [x24]
			
 
				+	ld1		{v4.16b}, [x6]
			
 
				+	xts_load_mask	v8
			
 
				 	cbz		w7, .Lxtsdecnotfirst
			
 
				 
			
 
				 	enc_prepare	w3, x5, x8
			
 
				 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
			
 
				 	dec_prepare	w3, x2, x8
			
 
				-	ldr		q7, .Lxts_mul_x
			
 
				 	b		.LxtsdecNx
			
 
				 
			
 
				-.Lxtsdecrestart:
			
 
				-	ld1		{v4.16b}, [x24]
			
 
				 .Lxtsdecnotfirst:
			
 
				-	dec_prepare	w22, x21, x8
			
 
				+	dec_prepare	w3, x2, x8
			
 
				 .LxtsdecloopNx:
			
 
				-	ldr		q7, .Lxts_mul_x
			
 
				-	next_tweak	v4, v4, v7, v8
			
 
				+	next_tweak	v4, v4, v8
			
 
				 .LxtsdecNx:
			
 
				-	subs		w23, w23, #4
			
 
				+	subs		w4, w4, #4
			
 
				 	bmi		.Lxtsdec1x
			
 
				-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
			
 
				-	next_tweak	v5, v4, v7, v8
			
 
				+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
			
 
				+	next_tweak	v5, v4, v8
			
 
				 	eor		v0.16b, v0.16b, v4.16b
			
 
				-	next_tweak	v6, v5, v7, v8
			
 
				+	next_tweak	v6, v5, v8
			
 
				 	eor		v1.16b, v1.16b, v5.16b
			
 
				 	eor		v2.16b, v2.16b, v6.16b
			
 
				-	next_tweak	v7, v6, v7, v8
			
 
				+	next_tweak	v7, v6, v8
			
 
				 	eor		v3.16b, v3.16b, v7.16b
			
 
				 	bl		aes_decrypt_block4x
			
 
				 	eor		v3.16b, v3.16b, v7.16b
			
 
				 	eor		v0.16b, v0.16b, v4.16b
			
 
				 	eor		v1.16b, v1.16b, v5.16b
			
 
				 	eor		v2.16b, v2.16b, v6.16b
			
 
				-	st1		{v0.16b-v3.16b}, [x19], #64
			
 
				+	st1		{v0.16b-v3.16b}, [x0], #64
			
 
				 	mov		v4.16b, v7.16b
			
 
				-	cbz		w23, .Lxtsdecout
			
 
				-	st1		{v4.16b}, [x24]
			
 
				-	cond_yield_neon	.Lxtsdecrestart
			
 
				+	cbz		w4, .Lxtsdecout
			
 
				+	xts_reload_mask	v8
			
 
				 	b		.LxtsdecloopNx
			
 
				 .Lxtsdec1x:
			
 
				-	adds		w23, w23, #4
			
 
				+	adds		w4, w4, #4
			
 
				 	beq		.Lxtsdecout
			
 
				 .Lxtsdecloop:
			
 
				-	ld1		{v1.16b}, [x20], #16
			
 
				+	ld1		{v1.16b}, [x1], #16
			
 
				 	eor		v0.16b, v1.16b, v4.16b
			
 
				-	decrypt_block	v0, w22, x21, x8, w7
			
 
				+	decrypt_block	v0, w3, x2, x8, w7
			
 
				 	eor		v0.16b, v0.16b, v4.16b
			
 
				-	st1		{v0.16b}, [x19], #16
			
 
				-	subs		w23, w23, #1
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	subs		w4, w4, #1
			
 
				 	beq		.Lxtsdecout
			
 
				-	next_tweak	v4, v4, v7, v8
			
 
				+	next_tweak	v4, v4, v8
			
 
				 	b		.Lxtsdecloop
			
 
				 .Lxtsdecout:
			
 
				-	st1		{v4.16b}, [x24]
			
 
				-	frame_pop
			
 
				+	st1		{v4.16b}, [x6]
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				 	ret
			
 
				 AES_ENDPROC(aes_xts_decrypt)
			
 
				 
			
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -14,6 +14,12 @@
 
				 #define AES_ENTRY(func)		ENTRY(neon_ ## func)
			
 
				 #define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
			
 
				 
			
 
				+	xtsmask		.req	v7
			
 
				+
			
 
				+	.macro		xts_reload_mask, tmp
			
 
				+	xts_load_mask	\tmp
			
 
				+	.endm
			
 
				+
			
 
				 	/* multiply by polynomial 'x' in GF(2^8) */
			
 
				 	.macro		mul_by_x, out, in, temp, const
			
 
				 	sshr		\temp, \in, #7
			
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -1,287 +0,0 @@
 
				-/*
			
 
				- * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
			
 
				- *
			
 
				- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 as
			
 
				- * published by the Free Software Foundation.
			
 
				- */
			
 
				-
			
 
				-/* GPL HEADER START
			
 
				- *
			
 
				- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 only,
			
 
				- * as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License version 2 for more details (a copy is included
			
 
				- * in the LICENSE file that accompanied this code).
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				- *
			
 
				- * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				- * information or have any questions.
			
 
				- *
			
 
				- * GPL HEADER END
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Copyright 2012 Xyratex Technology Limited
			
 
				- *
			
 
				- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
			
 
				- * calculation.
			
 
				- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
			
 
				- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
			
 
				- * at:
			
 
				- * http://www.intel.com/products/processor/manuals/
			
 
				- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
			
 
				- * Volume 2B: Instruction Set Reference, N-Z
			
 
				- *
			
 
				- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
			
 
				- *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/assembler.h>
			
 
				-
			
 
				-	.section	".rodata", "a"
			
 
				-	.align		6
			
 
				-	.cpu		generic+crypto+crc
			
 
				-
			
 
				-.Lcrc32_constants:
			
 
				-	/*
			
 
				-	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
			
 
				-	 * #define CONSTANT_R1  0x154442bd4LL
			
 
				-	 *
			
 
				-	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
			
 
				-	 * #define CONSTANT_R2  0x1c6e41596LL
			
 
				-	 */
			
 
				-	.octa		0x00000001c6e415960000000154442bd4
			
 
				-
			
 
				-	/*
			
 
				-	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
			
 
				-	 * #define CONSTANT_R3  0x1751997d0LL
			
 
				-	 *
			
 
				-	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
			
 
				-	 * #define CONSTANT_R4  0x0ccaa009eLL
			
 
				-	 */
			
 
				-	.octa		0x00000000ccaa009e00000001751997d0
			
 
				-
			
 
				-	/*
			
 
				-	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
			
 
				-	 * #define CONSTANT_R5  0x163cd6124LL
			
 
				-	 */
			
 
				-	.quad		0x0000000163cd6124
			
 
				-	.quad		0x00000000FFFFFFFF
			
 
				-
			
 
				-	/*
			
 
				-	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
			
 
				-	 *
			
 
				-	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
			
 
				-	 *                                                      = 0x1F7011641LL
			
 
				-	 * #define CONSTANT_RU  0x1F7011641LL
			
 
				-	 */
			
 
				-	.octa		0x00000001F701164100000001DB710641
			
 
				-
			
 
				-.Lcrc32c_constants:
			
 
				-	.octa		0x000000009e4addf800000000740eef02
			
 
				-	.octa		0x000000014cd00bd600000000f20c0dfe
			
 
				-	.quad		0x00000000dd45aab8
			
 
				-	.quad		0x00000000FFFFFFFF
			
 
				-	.octa		0x00000000dea713f10000000105ec76f0
			
 
				-
			
 
				-	vCONSTANT	.req	v0
			
 
				-	dCONSTANT	.req	d0
			
 
				-	qCONSTANT	.req	q0
			
 
				-
			
 
				-	BUF		.req	x19
			
 
				-	LEN		.req	x20
			
 
				-	CRC		.req	x21
			
 
				-	CONST		.req	x22
			
 
				-
			
 
				-	vzr		.req	v9
			
 
				-
			
 
				-	/**
			
 
				-	 * Calculate crc32
			
 
				-	 * BUF - buffer
			
 
				-	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
			
 
				-	 * CRC - initial crc32
			
 
				-	 * return %eax crc32
			
 
				-	 * uint crc32_pmull_le(unsigned char const *buffer,
			
 
				-	 *                     size_t len, uint crc32)
			
 
				-	 */
			
 
				-	.text
			
 
				-ENTRY(crc32_pmull_le)
			
 
				-	adr_l		x3, .Lcrc32_constants
			
 
				-	b		0f
			
 
				-
			
 
				-ENTRY(crc32c_pmull_le)
			
 
				-	adr_l		x3, .Lcrc32c_constants
			
 
				-
			
 
				-0:	frame_push	4, 64
			
 
				-
			
 
				-	mov		BUF, x0
			
 
				-	mov		LEN, x1
			
 
				-	mov		CRC, x2
			
 
				-	mov		CONST, x3
			
 
				-
			
 
				-	bic		LEN, LEN, #15
			
 
				-	ld1		{v1.16b-v4.16b}, [BUF], #0x40
			
 
				-	movi		vzr.16b, #0
			
 
				-	fmov		dCONSTANT, CRC
			
 
				-	eor		v1.16b, v1.16b, vCONSTANT.16b
			
 
				-	sub		LEN, LEN, #0x40
			
 
				-	cmp		LEN, #0x40
			
 
				-	b.lt		less_64
			
 
				-
			
 
				-	ldr		qCONSTANT, [CONST]
			
 
				-
			
 
				-loop_64:		/* 64 bytes Full cache line folding */
			
 
				-	sub		LEN, LEN, #0x40
			
 
				-
			
 
				-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				-	pmull2		v6.1q, v2.2d, vCONSTANT.2d
			
 
				-	pmull2		v7.1q, v3.2d, vCONSTANT.2d
			
 
				-	pmull2		v8.1q, v4.2d, vCONSTANT.2d
			
 
				-
			
 
				-	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				-	pmull		v2.1q, v2.1d, vCONSTANT.1d
			
 
				-	pmull		v3.1q, v3.1d, vCONSTANT.1d
			
 
				-	pmull		v4.1q, v4.1d, vCONSTANT.1d
			
 
				-
			
 
				-	eor		v1.16b, v1.16b, v5.16b
			
 
				-	ld1		{v5.16b}, [BUF], #0x10
			
 
				-	eor		v2.16b, v2.16b, v6.16b
			
 
				-	ld1		{v6.16b}, [BUF], #0x10
			
 
				-	eor		v3.16b, v3.16b, v7.16b
			
 
				-	ld1		{v7.16b}, [BUF], #0x10
			
 
				-	eor		v4.16b, v4.16b, v8.16b
			
 
				-	ld1		{v8.16b}, [BUF], #0x10
			
 
				-
			
 
				-	eor		v1.16b, v1.16b, v5.16b
			
 
				-	eor		v2.16b, v2.16b, v6.16b
			
 
				-	eor		v3.16b, v3.16b, v7.16b
			
 
				-	eor		v4.16b, v4.16b, v8.16b
			
 
				-
			
 
				-	cmp		LEN, #0x40
			
 
				-	b.lt		less_64
			
 
				-
			
 
				-	if_will_cond_yield_neon
			
 
				-	stp		q1, q2, [sp, #.Lframe_local_offset]
			
 
				-	stp		q3, q4, [sp, #.Lframe_local_offset + 32]
			
 
				-	do_cond_yield_neon
			
 
				-	ldp		q1, q2, [sp, #.Lframe_local_offset]
			
 
				-	ldp		q3, q4, [sp, #.Lframe_local_offset + 32]
			
 
				-	ldr		qCONSTANT, [CONST]
			
 
				-	movi		vzr.16b, #0
			
 
				-	endif_yield_neon
			
 
				-	b		loop_64
			
 
				-
			
 
				-less_64:		/* Folding cache line into 128bit */
			
 
				-	ldr		qCONSTANT, [CONST, #16]
			
 
				-
			
 
				-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				-	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				-	eor		v1.16b, v1.16b, v5.16b
			
 
				-	eor		v1.16b, v1.16b, v2.16b
			
 
				-
			
 
				-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				-	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				-	eor		v1.16b, v1.16b, v5.16b
			
 
				-	eor		v1.16b, v1.16b, v3.16b
			
 
				-
			
 
				-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				-	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				-	eor		v1.16b, v1.16b, v5.16b
			
 
				-	eor		v1.16b, v1.16b, v4.16b
			
 
				-
			
 
				-	cbz		LEN, fold_64
			
 
				-
			
 
				-loop_16:		/* Folding rest buffer into 128bit */
			
 
				-	subs		LEN, LEN, #0x10
			
 
				-
			
 
				-	ld1		{v2.16b}, [BUF], #0x10
			
 
				-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				-	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				-	eor		v1.16b, v1.16b, v5.16b
			
 
				-	eor		v1.16b, v1.16b, v2.16b
			
 
				-
			
 
				-	b.ne		loop_16
			
 
				-
			
 
				-fold_64:
			
 
				-	/* perform the last 64 bit fold, also adds 32 zeroes
			
 
				-	 * to the input stream */
			
 
				-	ext		v2.16b, v1.16b, v1.16b, #8
			
 
				-	pmull2		v2.1q, v2.2d, vCONSTANT.2d
			
 
				-	ext		v1.16b, v1.16b, vzr.16b, #8
			
 
				-	eor		v1.16b, v1.16b, v2.16b
			
 
				-
			
 
				-	/* final 32-bit fold */
			
 
				-	ldr		dCONSTANT, [CONST, #32]
			
 
				-	ldr		d3, [CONST, #40]
			
 
				-
			
 
				-	ext		v2.16b, v1.16b, vzr.16b, #4
			
 
				-	and		v1.16b, v1.16b, v3.16b
			
 
				-	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				-	eor		v1.16b, v1.16b, v2.16b
			
 
				-
			
 
				-	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
			
 
				-	ldr		qCONSTANT, [CONST, #48]
			
 
				-
			
 
				-	and		v2.16b, v1.16b, v3.16b
			
 
				-	ext		v2.16b, vzr.16b, v2.16b, #8
			
 
				-	pmull2		v2.1q, v2.2d, vCONSTANT.2d
			
 
				-	and		v2.16b, v2.16b, v3.16b
			
 
				-	pmull		v2.1q, v2.1d, vCONSTANT.1d
			
 
				-	eor		v1.16b, v1.16b, v2.16b
			
 
				-	mov		w0, v1.s[1]
			
 
				-
			
 
				-	frame_pop
			
 
				-	ret
			
 
				-ENDPROC(crc32_pmull_le)
			
 
				-ENDPROC(crc32c_pmull_le)
			
 
				-
			
 
				-	.macro		__crc32, c
			
 
				-0:	subs		x2, x2, #16
			
 
				-	b.mi		8f
			
 
				-	ldp		x3, x4, [x1], #16
			
 
				-CPU_BE(	rev		x3, x3		)
			
 
				-CPU_BE(	rev		x4, x4		)
			
 
				-	crc32\c\()x	w0, w0, x3
			
 
				-	crc32\c\()x	w0, w0, x4
			
 
				-	b.ne		0b
			
 
				-	ret
			
 
				-
			
 
				-8:	tbz		x2, #3, 4f
			
 
				-	ldr		x3, [x1], #8
			
 
				-CPU_BE(	rev		x3, x3		)
			
 
				-	crc32\c\()x	w0, w0, x3
			
 
				-4:	tbz		x2, #2, 2f
			
 
				-	ldr		w3, [x1], #4
			
 
				-CPU_BE(	rev		w3, w3		)
			
 
				-	crc32\c\()w	w0, w0, w3
			
 
				-2:	tbz		x2, #1, 1f
			
 
				-	ldrh		w3, [x1], #2
			
 
				-CPU_BE(	rev16		w3, w3		)
			
 
				-	crc32\c\()h	w0, w0, w3
			
 
				-1:	tbz		x2, #0, 0f
			
 
				-	ldrb		w3, [x1]
			
 
				-	crc32\c\()b	w0, w0, w3
			
 
				-0:	ret
			
 
				-	.endm
			
 
				-
			
 
				-	.align		5
			
 
				-ENTRY(crc32_armv8_le)
			
 
				-	__crc32
			
 
				-ENDPROC(crc32_armv8_le)
			
 
				-
			
 
				-	.align		5
			
 
				-ENTRY(crc32c_armv8_le)
			
 
				-	__crc32		c
			
 
				-ENDPROC(crc32c_armv8_le)
			
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -1,244 +0,0 @@
 
				-/*
			
 
				- * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
			
 
				- *
			
 
				- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 as
			
 
				- * published by the Free Software Foundation.
			
 
				- */
			
 
				-
			
 
				-#include <linux/cpufeature.h>
			
 
				-#include <linux/crc32.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/string.h>
			
 
				-
			
 
				-#include <crypto/internal/hash.h>
			
 
				-
			
 
				-#include <asm/hwcap.h>
			
 
				-#include <asm/neon.h>
			
 
				-#include <asm/simd.h>
			
 
				-#include <asm/unaligned.h>
			
 
				-
			
 
				-#define PMULL_MIN_LEN		64L	/* minimum size of buffer
			
 
				-					 * for crc32_pmull_le_16 */
			
 
				-#define SCALE_F			16L	/* size of NEON register */
			
 
				-
			
 
				-asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
			
 
				-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
			
 
				-
			
 
				-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
			
 
				-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
			
 
				-
			
 
				-static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
			
 
				-static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
			
 
				-
			
 
				-static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	u32 *key = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	*key = 0;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	u32 *key = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	*key = ~0;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
			
 
				-			      unsigned int keylen)
			
 
				-{
			
 
				-	u32 *mctx = crypto_shash_ctx(hash);
			
 
				-
			
 
				-	if (keylen != sizeof(u32)) {
			
 
				-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-	*mctx = le32_to_cpup((__le32 *)key);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32_pmull_init(struct shash_desc *desc)
			
 
				-{
			
 
				-	u32 *mctx = crypto_shash_ctx(desc->tfm);
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-
			
 
				-	*crc = *mctx;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32_update(struct shash_desc *desc, const u8 *data,
			
 
				-			unsigned int length)
			
 
				-{
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-
			
 
				-	*crc = crc32_armv8_le(*crc, data, length);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32c_update(struct shash_desc *desc, const u8 *data,
			
 
				-			 unsigned int length)
			
 
				-{
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-
			
 
				-	*crc = crc32c_armv8_le(*crc, data, length);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
			
 
				-			 unsigned int length)
			
 
				-{
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-	unsigned int l;
			
 
				-
			
 
				-	if ((u64)data % SCALE_F) {
			
 
				-		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
			
 
				-
			
 
				-		*crc = fallback_crc32(*crc, data, l);
			
 
				-
			
 
				-		data += l;
			
 
				-		length -= l;
			
 
				-	}
			
 
				-
			
 
				-	if (length >= PMULL_MIN_LEN && may_use_simd()) {
			
 
				-		l = round_down(length, SCALE_F);
			
 
				-
			
 
				-		kernel_neon_begin();
			
 
				-		*crc = crc32_pmull_le(data, l, *crc);
			
 
				-		kernel_neon_end();
			
 
				-
			
 
				-		data += l;
			
 
				-		length -= l;
			
 
				-	}
			
 
				-
			
 
				-	if (length > 0)
			
 
				-		*crc = fallback_crc32(*crc, data, length);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
			
 
				-			 unsigned int length)
			
 
				-{
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-	unsigned int l;
			
 
				-
			
 
				-	if ((u64)data % SCALE_F) {
			
 
				-		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
			
 
				-
			
 
				-		*crc = fallback_crc32c(*crc, data, l);
			
 
				-
			
 
				-		data += l;
			
 
				-		length -= l;
			
 
				-	}
			
 
				-
			
 
				-	if (length >= PMULL_MIN_LEN && may_use_simd()) {
			
 
				-		l = round_down(length, SCALE_F);
			
 
				-
			
 
				-		kernel_neon_begin();
			
 
				-		*crc = crc32c_pmull_le(data, l, *crc);
			
 
				-		kernel_neon_end();
			
 
				-
			
 
				-		data += l;
			
 
				-		length -= l;
			
 
				-	}
			
 
				-
			
 
				-	if (length > 0) {
			
 
				-		*crc = fallback_crc32c(*crc, data, length);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
			
 
				-{
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-
			
 
				-	put_unaligned_le32(*crc, out);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
			
 
				-{
			
 
				-	u32 *crc = shash_desc_ctx(desc);
			
 
				-
			
 
				-	put_unaligned_le32(~*crc, out);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static struct shash_alg crc32_pmull_algs[] = { {
			
 
				-	.setkey			= crc32_pmull_setkey,
			
 
				-	.init			= crc32_pmull_init,
			
 
				-	.update			= crc32_update,
			
 
				-	.final			= crc32_pmull_final,
			
 
				-	.descsize		= sizeof(u32),
			
 
				-	.digestsize		= sizeof(u32),
			
 
				-
			
 
				-	.base.cra_ctxsize	= sizeof(u32),
			
 
				-	.base.cra_init		= crc32_pmull_cra_init,
			
 
				-	.base.cra_name		= "crc32",
			
 
				-	.base.cra_driver_name	= "crc32-arm64-ce",
			
 
				-	.base.cra_priority	= 200,
			
 
				-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
			
 
				-	.base.cra_blocksize	= 1,
			
 
				-	.base.cra_module	= THIS_MODULE,
			
 
				-}, {
			
 
				-	.setkey			= crc32_pmull_setkey,
			
 
				-	.init			= crc32_pmull_init,
			
 
				-	.update			= crc32c_update,
			
 
				-	.final			= crc32c_pmull_final,
			
 
				-	.descsize		= sizeof(u32),
			
 
				-	.digestsize		= sizeof(u32),
			
 
				-
			
 
				-	.base.cra_ctxsize	= sizeof(u32),
			
 
				-	.base.cra_init		= crc32c_pmull_cra_init,
			
 
				-	.base.cra_name		= "crc32c",
			
 
				-	.base.cra_driver_name	= "crc32c-arm64-ce",
			
 
				-	.base.cra_priority	= 200,
			
 
				-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
			
 
				-	.base.cra_blocksize	= 1,
			
 
				-	.base.cra_module	= THIS_MODULE,
			
 
				-} };
			
 
				-
			
 
				-static int __init crc32_pmull_mod_init(void)
			
 
				-{
			
 
				-	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
			
 
				-		crc32_pmull_algs[0].update = crc32_pmull_update;
			
 
				-		crc32_pmull_algs[1].update = crc32c_pmull_update;
			
 
				-
			
 
				-		if (elf_hwcap & HWCAP_CRC32) {
			
 
				-			fallback_crc32 = crc32_armv8_le;
			
 
				-			fallback_crc32c = crc32c_armv8_le;
			
 
				-		} else {
			
 
				-			fallback_crc32 = crc32_le;
			
 
				-			fallback_crc32c = __crc32c_le;
			
 
				-		}
			
 
				-	} else if (!(elf_hwcap & HWCAP_CRC32)) {
			
 
				-		return -ENODEV;
			
 
				-	}
			
 
				-	return crypto_register_shashes(crc32_pmull_algs,
			
 
				-				       ARRAY_SIZE(crc32_pmull_algs));
			
 
				-}
			
 
				-
			
 
				-static void __exit crc32_pmull_mod_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_shashes(crc32_pmull_algs,
			
 
				-				  ARRAY_SIZE(crc32_pmull_algs));
			
 
				-}
			
 
				-
			
 
				-static const struct cpu_feature crc32_cpu_feature[] = {
			
 
				-	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
			
 
				-};
			
 
				-MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
			
 
				-
			
 
				-module_init(crc32_pmull_mod_init);
			
 
				-module_exit(crc32_pmull_mod_exit);
			
 
				-
			
 
				-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				-MODULE_LICENSE("GPL v2");
			
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -80,7 +80,186 @@
 
				 
			
 
				 	vzr		.req	v13
			
 
				 
			
 
				-ENTRY(crc_t10dif_pmull)
			
 
				+	ad		.req	v14
			
 
				+	bd		.req	v10
			
 
				+
			
 
				+	k00_16		.req	v15
			
 
				+	k32_48		.req	v16
			
 
				+
			
 
				+	t3		.req	v17
			
 
				+	t4		.req	v18
			
 
				+	t5		.req	v19
			
 
				+	t6		.req	v20
			
 
				+	t7		.req	v21
			
 
				+	t8		.req	v22
			
 
				+	t9		.req	v23
			
 
				+
			
 
				+	perm1		.req	v24
			
 
				+	perm2		.req	v25
			
 
				+	perm3		.req	v26
			
 
				+	perm4		.req	v27
			
 
				+
			
 
				+	bd1		.req	v28
			
 
				+	bd2		.req	v29
			
 
				+	bd3		.req	v30
			
 
				+	bd4		.req	v31
			
 
				+
			
 
				+	.macro		__pmull_init_p64
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		__pmull_pre_p64, bd
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		__pmull_init_p8
			
 
				+	// k00_16 := 0x0000000000000000_000000000000ffff
			
 
				+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
			
 
				+	movi		k32_48.2d, #0xffffffff
			
 
				+	mov		k32_48.h[2], k32_48.h[0]
			
 
				+	ushr		k00_16.2d, k32_48.2d, #32
			
 
				+
			
 
				+	// prepare the permutation vectors
			
 
				+	mov_q		x5, 0x080f0e0d0c0b0a09
			
 
				+	movi		perm4.8b, #8
			
 
				+	dup		perm1.2d, x5
			
 
				+	eor		perm1.16b, perm1.16b, perm4.16b
			
 
				+	ushr		perm2.2d, perm1.2d, #8
			
 
				+	ushr		perm3.2d, perm1.2d, #16
			
 
				+	ushr		perm4.2d, perm1.2d, #24
			
 
				+	sli		perm2.2d, perm1.2d, #56
			
 
				+	sli		perm3.2d, perm1.2d, #48
			
 
				+	sli		perm4.2d, perm1.2d, #40
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		__pmull_pre_p8, bd
			
 
				+	tbl		bd1.16b, {\bd\().16b}, perm1.16b
			
 
				+	tbl		bd2.16b, {\bd\().16b}, perm2.16b
			
 
				+	tbl		bd3.16b, {\bd\().16b}, perm3.16b
			
 
				+	tbl		bd4.16b, {\bd\().16b}, perm4.16b
			
 
				+	.endm
			
 
				+
			
 
				+__pmull_p8_core:
			
 
				+.L__pmull_p8_core:
			
 
				+	ext		t4.8b, ad.8b, ad.8b, #1			// A1
			
 
				+	ext		t5.8b, ad.8b, ad.8b, #2			// A2
			
 
				+	ext		t6.8b, ad.8b, ad.8b, #3			// A3
			
 
				+
			
 
				+	pmull		t4.8h, t4.8b, bd.8b			// F = A1*B
			
 
				+	pmull		t8.8h, ad.8b, bd1.8b			// E = A*B1
			
 
				+	pmull		t5.8h, t5.8b, bd.8b			// H = A2*B
			
 
				+	pmull		t7.8h, ad.8b, bd2.8b			// G = A*B2
			
 
				+	pmull		t6.8h, t6.8b, bd.8b			// J = A3*B
			
 
				+	pmull		t9.8h, ad.8b, bd3.8b			// I = A*B3
			
 
				+	pmull		t3.8h, ad.8b, bd4.8b			// K = A*B4
			
 
				+	b		0f
			
 
				+
			
 
				+.L__pmull_p8_core2:
			
 
				+	tbl		t4.16b, {ad.16b}, perm1.16b		// A1
			
 
				+	tbl		t5.16b, {ad.16b}, perm2.16b		// A2
			
 
				+	tbl		t6.16b, {ad.16b}, perm3.16b		// A3
			
 
				+
			
 
				+	pmull2		t4.8h, t4.16b, bd.16b			// F = A1*B
			
 
				+	pmull2		t8.8h, ad.16b, bd1.16b			// E = A*B1
			
 
				+	pmull2		t5.8h, t5.16b, bd.16b			// H = A2*B
			
 
				+	pmull2		t7.8h, ad.16b, bd2.16b			// G = A*B2
			
 
				+	pmull2		t6.8h, t6.16b, bd.16b			// J = A3*B
			
 
				+	pmull2		t9.8h, ad.16b, bd3.16b			// I = A*B3
			
 
				+	pmull2		t3.8h, ad.16b, bd4.16b			// K = A*B4
			
 
				+
			
 
				+0:	eor		t4.16b, t4.16b, t8.16b			// L = E + F
			
 
				+	eor		t5.16b, t5.16b, t7.16b			// M = G + H
			
 
				+	eor		t6.16b, t6.16b, t9.16b			// N = I + J
			
 
				+
			
 
				+	uzp1		t8.2d, t4.2d, t5.2d
			
 
				+	uzp2		t4.2d, t4.2d, t5.2d
			
 
				+	uzp1		t7.2d, t6.2d, t3.2d
			
 
				+	uzp2		t6.2d, t6.2d, t3.2d
			
 
				+
			
 
				+	// t4 = (L) (P0 + P1) << 8
			
 
				+	// t5 = (M) (P2 + P3) << 16
			
 
				+	eor		t8.16b, t8.16b, t4.16b
			
 
				+	and		t4.16b, t4.16b, k32_48.16b
			
 
				+
			
 
				+	// t6 = (N) (P4 + P5) << 24
			
 
				+	// t7 = (K) (P6 + P7) << 32
			
 
				+	eor		t7.16b, t7.16b, t6.16b
			
 
				+	and		t6.16b, t6.16b, k00_16.16b
			
 
				+
			
 
				+	eor		t8.16b, t8.16b, t4.16b
			
 
				+	eor		t7.16b, t7.16b, t6.16b
			
 
				+
			
 
				+	zip2		t5.2d, t8.2d, t4.2d
			
 
				+	zip1		t4.2d, t8.2d, t4.2d
			
 
				+	zip2		t3.2d, t7.2d, t6.2d
			
 
				+	zip1		t6.2d, t7.2d, t6.2d
			
 
				+
			
 
				+	ext		t4.16b, t4.16b, t4.16b, #15
			
 
				+	ext		t5.16b, t5.16b, t5.16b, #14
			
 
				+	ext		t6.16b, t6.16b, t6.16b, #13
			
 
				+	ext		t3.16b, t3.16b, t3.16b, #12
			
 
				+
			
 
				+	eor		t4.16b, t4.16b, t5.16b
			
 
				+	eor		t6.16b, t6.16b, t3.16b
			
 
				+	ret
			
 
				+ENDPROC(__pmull_p8_core)
			
 
				+
			
 
				+	.macro		__pmull_p8, rq, ad, bd, i
			
 
				+	.ifnc		\bd, v10
			
 
				+	.err
			
 
				+	.endif
			
 
				+	mov		ad.16b, \ad\().16b
			
 
				+	.ifb		\i
			
 
				+	pmull		\rq\().8h, \ad\().8b, bd.8b		// D = A*B
			
 
				+	.else
			
 
				+	pmull2		\rq\().8h, \ad\().16b, bd.16b		// D = A*B
			
 
				+	.endif
			
 
				+
			
 
				+	bl		.L__pmull_p8_core\i
			
 
				+
			
 
				+	eor		\rq\().16b, \rq\().16b, t4.16b
			
 
				+	eor		\rq\().16b, \rq\().16b, t6.16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		fold64, p, reg1, reg2
			
 
				+	ldp		q11, q12, [arg2], #0x20
			
 
				+
			
 
				+	__pmull_\p	v8, \reg1, v10, 2
			
 
				+	__pmull_\p	\reg1, \reg1, v10
			
 
				+
			
 
				+CPU_LE(	rev64		v11.16b, v11.16b		)
			
 
				+CPU_LE(	rev64		v12.16b, v12.16b		)
			
 
				+
			
 
				+	__pmull_\p	v9, \reg2, v10, 2
			
 
				+	__pmull_\p	\reg2, \reg2, v10
			
 
				+
			
 
				+CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
			
 
				+CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
			
 
				+
			
 
				+	eor		\reg1\().16b, \reg1\().16b, v8.16b
			
 
				+	eor		\reg2\().16b, \reg2\().16b, v9.16b
			
 
				+	eor		\reg1\().16b, \reg1\().16b, v11.16b
			
 
				+	eor		\reg2\().16b, \reg2\().16b, v12.16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		fold16, p, reg, rk
			
 
				+	__pmull_\p	v8, \reg, v10
			
 
				+	__pmull_\p	\reg, \reg, v10, 2
			
 
				+	.ifnb		\rk
			
 
				+	ldr_l		q10, \rk, x8
			
 
				+	__pmull_pre_\p	v10
			
 
				+	.endif
			
 
				+	eor		v7.16b, v7.16b, v8.16b
			
 
				+	eor		v7.16b, v7.16b, \reg\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		__pmull_p64, rd, rn, rm, n
			
 
				+	.ifb		\n
			
 
				+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
			
 
				+	.else
			
 
				+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
			
 
				+	.endif
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		crc_t10dif_pmull, p
			
 
				 	frame_push	3, 128
			
 
				 
			
 
				 	mov		arg1_low32, w0
			
@@ -89,6 +268,8 @@ ENTRY(crc_t10dif_pmull)
 
				 
			
 
				 	movi		vzr.16b, #0		// init zero register
			
 
				 
			
 
				+	__pmull_init_\p
			
 
				+
			
 
				 	// adjust the 16-bit initial_crc value, scale it to 32 bits
			
 
				 	lsl		arg1_low32, arg1_low32, #16
			
 
				 
			
@@ -96,7 +277,7 @@ ENTRY(crc_t10dif_pmull)
 
				 	cmp		arg3, #256
			
 
				 
			
 
				 	// for sizes less than 128, we can't fold 64B at a time...
			
 
				-	b.lt		_less_than_128
			
 
				+	b.lt		.L_less_than_128_\@
			
 
				 
			
 
				 	// load the initial crc value
			
 
				 	// crc value does not need to be byte-reflected, but it needs
			
@@ -137,6 +318,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
				 	ldr_l		q10, rk3, x8	// xmm10 has rk3 and rk4
			
 
				 					// type of pmull instruction
			
 
				 					// will determine which constant to use
			
 
				+	__pmull_pre_\p	v10
			
 
				 
			
 
				 	//
			
 
				 	// we subtract 256 instead of 128 to save one instruction from the loop
			
@@ -147,41 +329,19 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
				 	// buffer. The _fold_64_B_loop will fold 64B at a time
			
 
				 	// until we have 64+y Bytes of buffer
			
 
				 
			
 
				-
			
 
				 	// fold 64B at a time. This section of the code folds 4 vector
			
 
				 	// registers in parallel
			
 
				-_fold_64_B_loop:
			
 
				+.L_fold_64_B_loop_\@:
			
 
				 
			
 
				-	.macro		fold64, reg1, reg2
			
 
				-	ldp		q11, q12, [arg2], #0x20
			
 
				-
			
 
				-	pmull2		v8.1q, \reg1\().2d, v10.2d
			
 
				-	pmull		\reg1\().1q, \reg1\().1d, v10.1d
			
 
				-
			
 
				-CPU_LE(	rev64		v11.16b, v11.16b		)
			
 
				-CPU_LE(	rev64		v12.16b, v12.16b		)
			
 
				-
			
 
				-	pmull2		v9.1q, \reg2\().2d, v10.2d
			
 
				-	pmull		\reg2\().1q, \reg2\().1d, v10.1d
			
 
				-
			
 
				-CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
			
 
				-CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
			
 
				-
			
 
				-	eor		\reg1\().16b, \reg1\().16b, v8.16b
			
 
				-	eor		\reg2\().16b, \reg2\().16b, v9.16b
			
 
				-	eor		\reg1\().16b, \reg1\().16b, v11.16b
			
 
				-	eor		\reg2\().16b, \reg2\().16b, v12.16b
			
 
				-	.endm
			
 
				-
			
 
				-	fold64		v0, v1
			
 
				-	fold64		v2, v3
			
 
				-	fold64		v4, v5
			
 
				-	fold64		v6, v7
			
 
				+	fold64		\p, v0, v1
			
 
				+	fold64		\p, v2, v3
			
 
				+	fold64		\p, v4, v5
			
 
				+	fold64		\p, v6, v7
			
 
				 
			
 
				 	subs		arg3, arg3, #128
			
 
				 
			
 
				 	// check if there is another 64B in the buffer to be able to fold
			
 
				-	b.lt		_fold_64_B_end
			
 
				+	b.lt		.L_fold_64_B_end_\@
			
 
				 
			
 
				 	if_will_cond_yield_neon
			
 
				 	stp		q0, q1, [sp, #.Lframe_local_offset]
			
@@ -195,11 +355,13 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 
				 	ldp		q6, q7, [sp, #.Lframe_local_offset + 96]
			
 
				 	ldr_l		q10, rk3, x8
			
 
				 	movi		vzr.16b, #0		// init zero register
			
 
				+	__pmull_init_\p
			
 
				+	__pmull_pre_\p	v10
			
 
				 	endif_yield_neon
			
 
				 
			
 
				-	b		_fold_64_B_loop
			
 
				+	b		.L_fold_64_B_loop_\@
			
 
				 
			
 
				-_fold_64_B_end:
			
 
				+.L_fold_64_B_end_\@:
			
 
				 	// at this point, the buffer pointer is pointing at the last y Bytes
			
 
				 	// of the buffer the 64B of folded data is in 4 of the vector
			
 
				 	// registers: v0, v1, v2, v3
			
@@ -208,38 +370,29 @@ _fold_64_B_end:
 
				 	// constants
			
 
				 
			
 
				 	ldr_l		q10, rk9, x8
			
 
				+	__pmull_pre_\p	v10
			
 
				 
			
 
				-	.macro		fold16, reg, rk
			
 
				-	pmull		v8.1q, \reg\().1d, v10.1d
			
 
				-	pmull2		\reg\().1q, \reg\().2d, v10.2d
			
 
				-	.ifnb		\rk
			
 
				-	ldr_l		q10, \rk, x8
			
 
				-	.endif
			
 
				-	eor		v7.16b, v7.16b, v8.16b
			
 
				-	eor		v7.16b, v7.16b, \reg\().16b
			
 
				-	.endm
			
 
				-
			
 
				-	fold16		v0, rk11
			
 
				-	fold16		v1, rk13
			
 
				-	fold16		v2, rk15
			
 
				-	fold16		v3, rk17
			
 
				-	fold16		v4, rk19
			
 
				-	fold16		v5, rk1
			
 
				-	fold16		v6
			
 
				+	fold16		\p, v0, rk11
			
 
				+	fold16		\p, v1, rk13
			
 
				+	fold16		\p, v2, rk15
			
 
				+	fold16		\p, v3, rk17
			
 
				+	fold16		\p, v4, rk19
			
 
				+	fold16		\p, v5, rk1
			
 
				+	fold16		\p, v6
			
 
				 
			
 
				 	// instead of 64, we add 48 to the loop counter to save 1 instruction
			
 
				 	// from the loop instead of a cmp instruction, we use the negative
			
 
				 	// flag with the jl instruction
			
 
				 	adds		arg3, arg3, #(128-16)
			
 
				-	b.lt		_final_reduction_for_128
			
 
				+	b.lt		.L_final_reduction_for_128_\@
			
 
				 
			
 
				 	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
			
 
				 	// and the rest is in memory. We can fold 16 bytes at a time if y>=16
			
 
				 	// continue folding 16B at a time
			
 
				 
			
 
				-_16B_reduction_loop:
			
 
				-	pmull		v8.1q, v7.1d, v10.1d
			
 
				-	pmull2		v7.1q, v7.2d, v10.2d
			
 
				+.L_16B_reduction_loop_\@:
			
 
				+	__pmull_\p	v8, v7, v10
			
 
				+	__pmull_\p	v7, v7, v10, 2
			
 
				 	eor		v7.16b, v7.16b, v8.16b
			
 
				 
			
 
				 	ldr		q0, [arg2], #16
			
@@ -251,22 +404,22 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 
				 	// instead of a cmp instruction, we utilize the flags with the
			
 
				 	// jge instruction equivalent of: cmp arg3, 16-16
			
 
				 	// check if there is any more 16B in the buffer to be able to fold
			
 
				-	b.ge		_16B_reduction_loop
			
 
				+	b.ge		.L_16B_reduction_loop_\@
			
 
				 
			
 
				 	// now we have 16+z bytes left to reduce, where 0<= z < 16.
			
 
				 	// first, we reduce the data in the xmm7 register
			
 
				 
			
 
				-_final_reduction_for_128:
			
 
				+.L_final_reduction_for_128_\@:
			
 
				 	// check if any more data to fold. If not, compute the CRC of
			
 
				 	// the final 128 bits
			
 
				 	adds		arg3, arg3, #16
			
 
				-	b.eq		_128_done
			
 
				+	b.eq		.L_128_done_\@
			
 
				 
			
 
				 	// here we are getting data that is less than 16 bytes.
			
 
				 	// since we know that there was data before the pointer, we can
			
 
				 	// offset the input pointer before the actual point, to receive
			
 
				 	// exactly 16 bytes. after that the registers need to be adjusted.
			
 
				-_get_last_two_regs:
			
 
				+.L_get_last_two_regs_\@:
			
 
				 	add		arg2, arg2, arg3
			
 
				 	ldr		q1, [arg2, #-16]
			
 
				 CPU_LE(	rev64		v1.16b, v1.16b			)
			
@@ -291,47 +444,48 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 
				 	bsl		v0.16b, v2.16b, v1.16b
			
 
				 
			
 
				 	// fold 16 Bytes
			
 
				-	pmull		v8.1q, v7.1d, v10.1d
			
 
				-	pmull2		v7.1q, v7.2d, v10.2d
			
 
				+	__pmull_\p	v8, v7, v10
			
 
				+	__pmull_\p	v7, v7, v10, 2
			
 
				 	eor		v7.16b, v7.16b, v8.16b
			
 
				 	eor		v7.16b, v7.16b, v0.16b
			
 
				 
			
 
				-_128_done:
			
 
				+.L_128_done_\@:
			
 
				 	// compute crc of a 128-bit value
			
 
				 	ldr_l		q10, rk5, x8		// rk5 and rk6 in xmm10
			
 
				+	__pmull_pre_\p	v10
			
 
				 
			
 
				 	// 64b fold
			
 
				 	ext		v0.16b, vzr.16b, v7.16b, #8
			
 
				 	mov		v7.d[0], v7.d[1]
			
 
				-	pmull		v7.1q, v7.1d, v10.1d
			
 
				+	__pmull_\p	v7, v7, v10
			
 
				 	eor		v7.16b, v7.16b, v0.16b
			
 
				 
			
 
				 	// 32b fold
			
 
				 	ext		v0.16b, v7.16b, vzr.16b, #4
			
 
				 	mov		v7.s[3], vzr.s[0]
			
 
				-	pmull2		v0.1q, v0.2d, v10.2d
			
 
				+	__pmull_\p	v0, v0, v10, 2
			
 
				 	eor		v7.16b, v7.16b, v0.16b
			
 
				 
			
 
				 	// barrett reduction
			
 
				-_barrett:
			
 
				 	ldr_l		q10, rk7, x8
			
 
				+	__pmull_pre_\p	v10
			
 
				 	mov		v0.d[0], v7.d[1]
			
 
				 
			
 
				-	pmull		v0.1q, v0.1d, v10.1d
			
 
				+	__pmull_\p	v0, v0, v10
			
 
				 	ext		v0.16b, vzr.16b, v0.16b, #12
			
 
				-	pmull2		v0.1q, v0.2d, v10.2d
			
 
				+	__pmull_\p	v0, v0, v10, 2
			
 
				 	ext		v0.16b, vzr.16b, v0.16b, #12
			
 
				 	eor		v7.16b, v7.16b, v0.16b
			
 
				 	mov		w0, v7.s[1]
			
 
				 
			
 
				-_cleanup:
			
 
				+.L_cleanup_\@:
			
 
				 	// scale the result back to 16 bits
			
 
				 	lsr		x0, x0, #16
			
 
				 	frame_pop
			
 
				 	ret
			
 
				 
			
 
				-_less_than_128:
			
 
				-	cbz		arg3, _cleanup
			
 
				+.L_less_than_128_\@:
			
 
				+	cbz		arg3, .L_cleanup_\@
			
 
				 
			
 
				 	movi		v0.16b, #0
			
 
				 	mov		v0.s[3], arg1_low32	// get the initial crc value
			
@@ -342,20 +496,21 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
				 	eor		v7.16b, v7.16b, v0.16b	// xor the initial crc value
			
 
				 
			
 
				 	cmp		arg3, #16
			
 
				-	b.eq		_128_done		// exactly 16 left
			
 
				-	b.lt		_less_than_16_left
			
 
				+	b.eq		.L_128_done_\@		// exactly 16 left
			
 
				+	b.lt		.L_less_than_16_left_\@
			
 
				 
			
 
				 	ldr_l		q10, rk1, x8		// rk1 and rk2 in xmm10
			
 
				+	__pmull_pre_\p	v10
			
 
				 
			
 
				 	// update the counter. subtract 32 instead of 16 to save one
			
 
				 	// instruction from the loop
			
 
				 	subs		arg3, arg3, #32
			
 
				-	b.ge		_16B_reduction_loop
			
 
				+	b.ge		.L_16B_reduction_loop_\@
			
 
				 
			
 
				 	add		arg3, arg3, #16
			
 
				-	b		_get_last_two_regs
			
 
				+	b		.L_get_last_two_regs_\@
			
 
				 
			
 
				-_less_than_16_left:
			
 
				+.L_less_than_16_left_\@:
			
 
				 	// shl r9, 4
			
 
				 	adr_l		x0, tbl_shf_table + 16
			
 
				 	sub		x0, x0, arg3
			
@@ -363,8 +518,17 @@ _less_than_16_left:
 
				 	movi		v9.16b, #0x80
			
 
				 	eor		v0.16b, v0.16b, v9.16b
			
 
				 	tbl		v7.16b, {v7.16b}, v0.16b
			
 
				-	b		_128_done
			
 
				-ENDPROC(crc_t10dif_pmull)
			
 
				+	b		.L_128_done_\@
			
 
				+	.endm
			
 
				+
			
 
				+ENTRY(crc_t10dif_pmull_p8)
			
 
				+	crc_t10dif_pmull	p8
			
 
				+ENDPROC(crc_t10dif_pmull_p8)
			
 
				+
			
 
				+	.align		5
			
 
				+ENTRY(crc_t10dif_pmull_p64)
			
 
				+	crc_t10dif_pmull	p64
			
 
				+ENDPROC(crc_t10dif_pmull_p64)
			
 
				 
			
 
				 // precomputed constants
			
 
				 // these constants are precomputed from the poly:
			
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -22,7 +22,10 @@
 
				 
			
 
				 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
			
 
				 
			
 
				-asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
			
 
				+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
			
 
				+asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
			
 
				+
			
 
				+static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
			
 
				 
			
 
				 static int crct10dif_init(struct shash_desc *desc)
			
 
				 {
			
@@ -85,6 +88,11 @@ static struct shash_alg crc_t10dif_alg = {
 
				 
			
 
				 static int __init crc_t10dif_mod_init(void)
			
 
				 {
			
 
				+	if (elf_hwcap & HWCAP_PMULL)
			
 
				+		crc_t10dif_pmull = crc_t10dif_pmull_p64;
			
 
				+	else
			
 
				+		crc_t10dif_pmull = crc_t10dif_pmull_p8;
			
 
				+
			
 
				 	return crypto_register_shash(&crc_t10dif_alg);
			
 
				 }
			
 
				 
			
@@ -93,8 +101,10 @@ static void __exit crc_t10dif_mod_exit(void)
 
				 	crypto_unregister_shash(&crc_t10dif_alg);
			
 
				 }
			
 
				 
			
 
				-module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
			
 
				+module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
			
 
				 module_exit(crc_t10dif_mod_exit);
			
 
				 
			
 
				 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				 MODULE_LICENSE("GPL v2");
			
 
				+MODULE_ALIAS_CRYPTO("crct10dif");
			
 
				+MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
			
--- a/arch/arm64/crypto/speck-neon-core.S
+++ b/arch/arm64/crypto/speck-neon-core.S
@@ -1,352 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
			
 
				- *
			
 
				- * Copyright (c) 2018 Google, Inc
			
 
				- *
			
 
				- * Author: Eric Biggers <ebiggers@google.com>
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-
			
 
				-	.text
			
 
				-
			
 
				-	// arguments
			
 
				-	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
			
 
				-	NROUNDS		.req	w1	// int nrounds
			
 
				-	NROUNDS_X	.req	x1
			
 
				-	DST		.req	x2	// void *dst
			
 
				-	SRC		.req	x3	// const void *src
			
 
				-	NBYTES		.req	w4	// unsigned int nbytes
			
 
				-	TWEAK		.req	x5	// void *tweak
			
 
				-
			
 
				-	// registers which hold the data being encrypted/decrypted
			
 
				-	// (underscores avoid a naming collision with ARM64 registers x0-x3)
			
 
				-	X_0		.req	v0
			
 
				-	Y_0		.req	v1
			
 
				-	X_1		.req	v2
			
 
				-	Y_1		.req	v3
			
 
				-	X_2		.req	v4
			
 
				-	Y_2		.req	v5
			
 
				-	X_3		.req	v6
			
 
				-	Y_3		.req	v7
			
 
				-
			
 
				-	// the round key, duplicated in all lanes
			
 
				-	ROUND_KEY	.req	v8
			
 
				-
			
 
				-	// index vector for tbl-based 8-bit rotates
			
 
				-	ROTATE_TABLE	.req	v9
			
 
				-	ROTATE_TABLE_Q	.req	q9
			
 
				-
			
 
				-	// temporary registers
			
 
				-	TMP0		.req	v10
			
 
				-	TMP1		.req	v11
			
 
				-	TMP2		.req	v12
			
 
				-	TMP3		.req	v13
			
 
				-
			
 
				-	// multiplication table for updating XTS tweaks
			
 
				-	GFMUL_TABLE	.req	v14
			
 
				-	GFMUL_TABLE_Q	.req	q14
			
 
				-
			
 
				-	// next XTS tweak value(s)
			
 
				-	TWEAKV_NEXT	.req	v15
			
 
				-
			
 
				-	// XTS tweaks for the blocks currently being encrypted/decrypted
			
 
				-	TWEAKV0		.req	v16
			
 
				-	TWEAKV1		.req	v17
			
 
				-	TWEAKV2		.req	v18
			
 
				-	TWEAKV3		.req	v19
			
 
				-	TWEAKV4		.req	v20
			
 
				-	TWEAKV5		.req	v21
			
 
				-	TWEAKV6		.req	v22
			
 
				-	TWEAKV7		.req	v23
			
 
				-
			
 
				-	.align		4
			
 
				-.Lror64_8_table:
			
 
				-	.octa		0x080f0e0d0c0b0a090007060504030201
			
 
				-.Lror32_8_table:
			
 
				-	.octa		0x0c0f0e0d080b0a090407060500030201
			
 
				-.Lrol64_8_table:
			
 
				-	.octa		0x0e0d0c0b0a09080f0605040302010007
			
 
				-.Lrol32_8_table:
			
 
				-	.octa		0x0e0d0c0f0a09080b0605040702010003
			
 
				-.Lgf128mul_table:
			
 
				-	.octa		0x00000000000000870000000000000001
			
 
				-.Lgf64mul_table:
			
 
				-	.octa		0x0000000000000000000000002d361b00
			
 
				-
			
 
				-/*
			
 
				- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
			
 
				- *
			
 
				- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
			
 
				- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
			
 
				- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
			
 
				- * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
			
 
				- */
			
 
				-.macro _speck_round_128bytes	n, lanes
			
 
				-
			
 
				-	// x = ror(x, 8)
			
 
				-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
			
 
				-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
			
 
				-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
			
 
				-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
			
 
				-
			
 
				-	// x += y
			
 
				-	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
			
 
				-	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
			
 
				-	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
			
 
				-	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
			
 
				-
			
 
				-	// x ^= k
			
 
				-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
			
 
				-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
			
 
				-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
			
 
				-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
			
 
				-
			
 
				-	// y = rol(y, 3)
			
 
				-	shl		TMP0.\lanes, Y_0.\lanes, #3
			
 
				-	shl		TMP1.\lanes, Y_1.\lanes, #3
			
 
				-	shl		TMP2.\lanes, Y_2.\lanes, #3
			
 
				-	shl		TMP3.\lanes, Y_3.\lanes, #3
			
 
				-	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
			
 
				-	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
			
 
				-	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
			
 
				-	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
			
 
				-
			
 
				-	// y ^= x
			
 
				-	eor		Y_0.16b, TMP0.16b, X_0.16b
			
 
				-	eor		Y_1.16b, TMP1.16b, X_1.16b
			
 
				-	eor		Y_2.16b, TMP2.16b, X_2.16b
			
 
				-	eor		Y_3.16b, TMP3.16b, X_3.16b
			
 
				-.endm
			
 
				-
			
 
				-/*
			
 
				- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
			
 
				- *
			
 
				- * This is the inverse of _speck_round_128bytes().
			
 
				- */
			
 
				-.macro _speck_unround_128bytes	n, lanes
			
 
				-
			
 
				-	// y ^= x
			
 
				-	eor		TMP0.16b, Y_0.16b, X_0.16b
			
 
				-	eor		TMP1.16b, Y_1.16b, X_1.16b
			
 
				-	eor		TMP2.16b, Y_2.16b, X_2.16b
			
 
				-	eor		TMP3.16b, Y_3.16b, X_3.16b
			
 
				-
			
 
				-	// y = ror(y, 3)
			
 
				-	ushr		Y_0.\lanes, TMP0.\lanes, #3
			
 
				-	ushr		Y_1.\lanes, TMP1.\lanes, #3
			
 
				-	ushr		Y_2.\lanes, TMP2.\lanes, #3
			
 
				-	ushr		Y_3.\lanes, TMP3.\lanes, #3
			
 
				-	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
			
 
				-	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
			
 
				-	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
			
 
				-	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
			
 
				-
			
 
				-	// x ^= k
			
 
				-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
			
 
				-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
			
 
				-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
			
 
				-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
			
 
				-
			
 
				-	// x -= y
			
 
				-	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
			
 
				-	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
			
 
				-	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
			
 
				-	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
			
 
				-
			
 
				-	// x = rol(x, 8)
			
 
				-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
			
 
				-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
			
 
				-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
			
 
				-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
			
 
				-.endm
			
 
				-
			
 
				-.macro _next_xts_tweak	next, cur, tmp, n
			
 
				-.if \n == 64
			
 
				-	/*
			
 
				-	 * Calculate the next tweak by multiplying the current one by x,
			
 
				-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
			
 
				-	 */
			
 
				-	sshr		\tmp\().2d, \cur\().2d, #63
			
 
				-	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
			
 
				-	shl		\next\().2d, \cur\().2d, #1
			
 
				-	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
			
 
				-	eor		\next\().16b, \next\().16b, \tmp\().16b
			
 
				-.else
			
 
				-	/*
			
 
				-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
			
 
				-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
			
 
				-	 */
			
 
				-	ushr		\tmp\().2d, \cur\().2d, #62
			
 
				-	shl		\next\().2d, \cur\().2d, #2
			
 
				-	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
			
 
				-	eor		\next\().16b, \next\().16b, \tmp\().16b
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-/*
			
 
				- * _speck_xts_crypt() - Speck-XTS encryption/decryption
			
 
				- *
			
 
				- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
			
 
				- * using Speck-XTS, specifically the variant with a block size of '2n' and round
			
 
				- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
			
 
				- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
			
 
				- * nonzero multiple of 128.
			
 
				- */
			
 
				-.macro _speck_xts_crypt	n, lanes, decrypting
			
 
				-
			
 
				-	/*
			
 
				-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
			
 
				-	 * round key rather than the first, since for decryption the round keys
			
 
				-	 * are used in reverse order.
			
 
				-	 */
			
 
				-.if \decrypting
			
 
				-	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
			
 
				-.if \n == 64
			
 
				-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
			
 
				-	sub		ROUND_KEYS, ROUND_KEYS, #8
			
 
				-.else
			
 
				-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
			
 
				-	sub		ROUND_KEYS, ROUND_KEYS, #4
			
 
				-.endif
			
 
				-.endif
			
 
				-
			
 
				-	// Load the index vector for tbl-based 8-bit rotates
			
 
				-.if \decrypting
			
 
				-	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
			
 
				-.else
			
 
				-	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
			
 
				-.endif
			
 
				-
			
 
				-	// One-time XTS preparation
			
 
				-.if \n == 64
			
 
				-	// Load first tweak
			
 
				-	ld1		{TWEAKV0.16b}, [TWEAK]
			
 
				-
			
 
				-	// Load GF(2^128) multiplication table
			
 
				-	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
			
 
				-.else
			
 
				-	// Load first tweak
			
 
				-	ld1		{TWEAKV0.8b}, [TWEAK]
			
 
				-
			
 
				-	// Load GF(2^64) multiplication table
			
 
				-	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
			
 
				-
			
 
				-	// Calculate second tweak, packing it together with the first
			
 
				-	ushr		TMP0.2d, TWEAKV0.2d, #63
			
 
				-	shl		TMP1.2d, TWEAKV0.2d, #1
			
 
				-	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
			
 
				-	eor		TMP0.8b, TMP0.8b, TMP1.8b
			
 
				-	mov		TWEAKV0.d[1], TMP0.d[0]
			
 
				-.endif
			
 
				-
			
 
				-.Lnext_128bytes_\@:
			
 
				-
			
 
				-	// Calculate XTS tweaks for next 128 bytes
			
 
				-	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
			
 
				-	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
			
 
				-
			
 
				-	// Load the next source blocks into {X,Y}[0-3]
			
 
				-	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
			
 
				-	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
			
 
				-
			
 
				-	// XOR the source blocks with their XTS tweaks
			
 
				-	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
			
 
				-	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
			
 
				-	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
			
 
				-	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
			
 
				-	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
			
 
				-	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
			
 
				-	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
			
 
				-	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
			
 
				-
			
 
				-	/*
			
 
				-	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
			
 
				-	 * that the X[0-3] registers contain only the second halves of blocks,
			
 
				-	 * and the Y[0-3] registers contain only the first halves of blocks.
			
 
				-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
			
 
				-	 */
			
 
				-	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
			
 
				-	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
			
 
				-	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
			
 
				-	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
			
 
				-	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
			
 
				-	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
			
 
				-	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
			
 
				-	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
			
 
				-
			
 
				-	// Do the cipher rounds
			
 
				-	mov		x6, ROUND_KEYS
			
 
				-	mov		w7, NROUNDS
			
 
				-.Lnext_round_\@:
			
 
				-.if \decrypting
			
 
				-	ld1r		{ROUND_KEY.\lanes}, [x6]
			
 
				-	sub		x6, x6, #( \n / 8 )
			
 
				-	_speck_unround_128bytes	\n, \lanes
			
 
				-.else
			
 
				-	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
			
 
				-	_speck_round_128bytes	\n, \lanes
			
 
				-.endif
			
 
				-	subs		w7, w7, #1
			
 
				-	bne		.Lnext_round_\@
			
 
				-
			
 
				-	// Re-interleave the 'x' and 'y' elements of each block
			
 
				-	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
			
 
				-	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
			
 
				-	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
			
 
				-	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
			
 
				-	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
			
 
				-	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
			
 
				-	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
			
 
				-	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
			
 
				-
			
 
				-	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
			
 
				-	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
			
 
				-	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
			
 
				-	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
			
 
				-	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
			
 
				-	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
			
 
				-	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
			
 
				-	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
			
 
				-	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
			
 
				-	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
			
 
				-
			
 
				-	// Store the ciphertext in the destination buffer
			
 
				-	st1		{X_0.16b-Y_1.16b}, [DST], #64
			
 
				-	st1		{X_2.16b-Y_3.16b}, [DST], #64
			
 
				-
			
 
				-	// Continue if there are more 128-byte chunks remaining
			
 
				-	subs		NBYTES, NBYTES, #128
			
 
				-	bne		.Lnext_128bytes_\@
			
 
				-
			
 
				-	// Store the next tweak and return
			
 
				-.if \n == 64
			
 
				-	st1		{TWEAKV_NEXT.16b}, [TWEAK]
			
 
				-.else
			
 
				-	st1		{TWEAKV_NEXT.8b}, [TWEAK]
			
 
				-.endif
			
 
				-	ret
			
 
				-.endm
			
 
				-
			
 
				-ENTRY(speck128_xts_encrypt_neon)
			
 
				-	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
			
 
				-ENDPROC(speck128_xts_encrypt_neon)
			
 
				-
			
 
				-ENTRY(speck128_xts_decrypt_neon)
			
 
				-	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
			
 
				-ENDPROC(speck128_xts_decrypt_neon)
			
 
				-
			
 
				-ENTRY(speck64_xts_encrypt_neon)
			
 
				-	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
			
 
				-ENDPROC(speck64_xts_encrypt_neon)
			
 
				-
			
 
				-ENTRY(speck64_xts_decrypt_neon)
			
 
				-	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
			
 
				-ENDPROC(speck64_xts_decrypt_neon)
			
--- a/arch/arm64/crypto/speck-neon-glue.c
+++ b/arch/arm64/crypto/speck-neon-glue.c
@@ -1,282 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
			
 
				- * (64-bit version; based on the 32-bit version)
			
 
				- *
			
 
				- * Copyright (c) 2018 Google, Inc
			
 
				- */
			
 
				-
			
 
				-#include <asm/hwcap.h>
			
 
				-#include <asm/neon.h>
			
 
				-#include <asm/simd.h>
			
 
				-#include <crypto/algapi.h>
			
 
				-#include <crypto/gf128mul.h>
			
 
				-#include <crypto/internal/skcipher.h>
			
 
				-#include <crypto/speck.h>
			
 
				-#include <crypto/xts.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/module.h>
			
 
				-
			
 
				-/* The assembly functions only handle multiples of 128 bytes */
			
 
				-#define SPECK_NEON_CHUNK_SIZE	128
			
 
				-
			
 
				-/* Speck128 */
			
 
				-
			
 
				-struct speck128_xts_tfm_ctx {
			
 
				-	struct speck128_tfm_ctx main_key;
			
 
				-	struct speck128_tfm_ctx tweak_key;
			
 
				-};
			
 
				-
			
 
				-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
			
 
				-					  void *dst, const void *src,
			
 
				-					  unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
			
 
				-					  void *dst, const void *src,
			
 
				-					  unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
			
 
				-				     u8 *, const u8 *);
			
 
				-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
			
 
				-					  const void *, unsigned int, void *);
			
 
				-
			
 
				-static __always_inline int
			
 
				-__speck128_xts_crypt(struct skcipher_request *req,
			
 
				-		     speck128_crypt_one_t crypt_one,
			
 
				-		     speck128_xts_crypt_many_t crypt_many)
			
 
				-{
			
 
				-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct skcipher_walk walk;
			
 
				-	le128 tweak;
			
 
				-	int err;
			
 
				-
			
 
				-	err = skcipher_walk_virt(&walk, req, true);
			
 
				-
			
 
				-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
			
 
				-
			
 
				-	while (walk.nbytes > 0) {
			
 
				-		unsigned int nbytes = walk.nbytes;
			
 
				-		u8 *dst = walk.dst.virt.addr;
			
 
				-		const u8 *src = walk.src.virt.addr;
			
 
				-
			
 
				-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
			
 
				-			unsigned int count;
			
 
				-
			
 
				-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
			
 
				-			kernel_neon_begin();
			
 
				-			(*crypt_many)(ctx->main_key.round_keys,
			
 
				-				      ctx->main_key.nrounds,
			
 
				-				      dst, src, count, &tweak);
			
 
				-			kernel_neon_end();
			
 
				-			dst += count;
			
 
				-			src += count;
			
 
				-			nbytes -= count;
			
 
				-		}
			
 
				-
			
 
				-		/* Handle any remainder with generic code */
			
 
				-		while (nbytes >= sizeof(tweak)) {
			
 
				-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
			
 
				-			(*crypt_one)(&ctx->main_key, dst, dst);
			
 
				-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
			
 
				-			gf128mul_x_ble(&tweak, &tweak);
			
 
				-
			
 
				-			dst += sizeof(tweak);
			
 
				-			src += sizeof(tweak);
			
 
				-			nbytes -= sizeof(tweak);
			
 
				-		}
			
 
				-		err = skcipher_walk_done(&walk, nbytes);
			
 
				-	}
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int speck128_xts_encrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
			
 
				-				    speck128_xts_encrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck128_xts_decrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
			
 
				-				    speck128_xts_decrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				-			       unsigned int keylen)
			
 
				-{
			
 
				-	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	int err;
			
 
				-
			
 
				-	err = xts_verify_key(tfm, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	keylen /= 2;
			
 
				-
			
 
				-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
			
 
				-}
			
 
				-
			
 
				-/* Speck64 */
			
 
				-
			
 
				-struct speck64_xts_tfm_ctx {
			
 
				-	struct speck64_tfm_ctx main_key;
			
 
				-	struct speck64_tfm_ctx tweak_key;
			
 
				-};
			
 
				-
			
 
				-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
			
 
				-					 void *dst, const void *src,
			
 
				-					 unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
			
 
				-					 void *dst, const void *src,
			
 
				-					 unsigned int nbytes, void *tweak);
			
 
				-
			
 
				-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
			
 
				-				    u8 *, const u8 *);
			
 
				-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
			
 
				-					 const void *, unsigned int, void *);
			
 
				-
			
 
				-static __always_inline int
			
 
				-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
			
 
				-		    speck64_xts_crypt_many_t crypt_many)
			
 
				-{
			
 
				-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct skcipher_walk walk;
			
 
				-	__le64 tweak;
			
 
				-	int err;
			
 
				-
			
 
				-	err = skcipher_walk_virt(&walk, req, true);
			
 
				-
			
 
				-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
			
 
				-
			
 
				-	while (walk.nbytes > 0) {
			
 
				-		unsigned int nbytes = walk.nbytes;
			
 
				-		u8 *dst = walk.dst.virt.addr;
			
 
				-		const u8 *src = walk.src.virt.addr;
			
 
				-
			
 
				-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
			
 
				-			unsigned int count;
			
 
				-
			
 
				-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
			
 
				-			kernel_neon_begin();
			
 
				-			(*crypt_many)(ctx->main_key.round_keys,
			
 
				-				      ctx->main_key.nrounds,
			
 
				-				      dst, src, count, &tweak);
			
 
				-			kernel_neon_end();
			
 
				-			dst += count;
			
 
				-			src += count;
			
 
				-			nbytes -= count;
			
 
				-		}
			
 
				-
			
 
				-		/* Handle any remainder with generic code */
			
 
				-		while (nbytes >= sizeof(tweak)) {
			
 
				-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
			
 
				-			(*crypt_one)(&ctx->main_key, dst, dst);
			
 
				-			*(__le64 *)dst ^= tweak;
			
 
				-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
			
 
				-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
			
 
				-					     0x1B : 0));
			
 
				-			dst += sizeof(tweak);
			
 
				-			src += sizeof(tweak);
			
 
				-			nbytes -= sizeof(tweak);
			
 
				-		}
			
 
				-		err = skcipher_walk_done(&walk, nbytes);
			
 
				-	}
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int speck64_xts_encrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
			
 
				-				   speck64_xts_encrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck64_xts_decrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
			
 
				-				   speck64_xts_decrypt_neon);
			
 
				-}
			
 
				-
			
 
				-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				-			      unsigned int keylen)
			
 
				-{
			
 
				-	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	int err;
			
 
				-
			
 
				-	err = xts_verify_key(tfm, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	keylen /= 2;
			
 
				-
			
 
				-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
			
 
				-}
			
 
				-
			
 
				-static struct skcipher_alg speck_algs[] = {
			
 
				-	{
			
 
				-		.base.cra_name		= "xts(speck128)",
			
 
				-		.base.cra_driver_name	= "xts-speck128-neon",
			
 
				-		.base.cra_priority	= 300,
			
 
				-		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
			
 
				-		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
			
 
				-		.base.cra_alignmask	= 7,
			
 
				-		.base.cra_module	= THIS_MODULE,
			
 
				-		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
			
 
				-		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
			
 
				-		.ivsize			= SPECK128_BLOCK_SIZE,
			
 
				-		.walksize		= SPECK_NEON_CHUNK_SIZE,
			
 
				-		.setkey			= speck128_xts_setkey,
			
 
				-		.encrypt		= speck128_xts_encrypt,
			
 
				-		.decrypt		= speck128_xts_decrypt,
			
 
				-	}, {
			
 
				-		.base.cra_name		= "xts(speck64)",
			
 
				-		.base.cra_driver_name	= "xts-speck64-neon",
			
 
				-		.base.cra_priority	= 300,
			
 
				-		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
			
 
				-		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
			
 
				-		.base.cra_alignmask	= 7,
			
 
				-		.base.cra_module	= THIS_MODULE,
			
 
				-		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
			
 
				-		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
			
 
				-		.ivsize			= SPECK64_BLOCK_SIZE,
			
 
				-		.walksize		= SPECK_NEON_CHUNK_SIZE,
			
 
				-		.setkey			= speck64_xts_setkey,
			
 
				-		.encrypt		= speck64_xts_encrypt,
			
 
				-		.decrypt		= speck64_xts_decrypt,
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static int __init speck_neon_module_init(void)
			
 
				-{
			
 
				-	if (!(elf_hwcap & HWCAP_ASIMD))
			
 
				-		return -ENODEV;
			
 
				-	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
			
 
				-}
			
 
				-
			
 
				-static void __exit speck_neon_module_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
			
 
				-}
			
 
				-
			
 
				-module_init(speck_neon_module_init);
			
 
				-module_exit(speck_neon_module_exit);
			
 
				-
			
 
				-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
			
 
				-MODULE_ALIAS_CRYPTO("xts(speck128)");
			
 
				-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
			
 
				-MODULE_ALIAS_CRYPTO("xts(speck64)");
			
 
				-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
			
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -657,7 +656,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -614,7 +613,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -635,7 +634,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -616,7 +615,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -638,7 +637,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -720,7 +719,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -629,7 +628,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -607,7 +606,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				 CONFIG_CRYPTO_USER=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_AEGIS128=m
			
@@ -608,7 +607,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_LZO=m
			
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
 
				 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
			
 
				 CONFIG_CRYPTO_PCRYPT=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_LRW=m
			
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
 
				 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
			
 
				 CONFIG_CRYPTO_PCRYPT=m
			
 
				 CONFIG_CRYPTO_CRYPTD=m
			
 
				-CONFIG_CRYPTO_MCRYPTD=m
			
 
				 CONFIG_CRYPTO_TEST=m
			
 
				 CONFIG_CRYPTO_CHACHA20POLY1305=m
			
 
				 CONFIG_CRYPTO_LRW=m
			
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
 
				 	int key_len;
			
 
				 	unsigned long fc;
			
 
				 	union {
			
 
				-		struct crypto_skcipher *blk;
			
 
				+		struct crypto_sync_skcipher *blk;
			
 
				 		struct crypto_cipher *cip;
			
 
				 	} fallback;
			
 
				 };
			
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
 
				 	u8 pcc_key[32];
			
 
				 	int key_len;
			
 
				 	unsigned long fc;
			
 
				-	struct crypto_skcipher *fallback;
			
 
				+	struct crypto_sync_skcipher *fallback;
			
 
				 };
			
 
				 
			
 
				 struct gcm_sg_walk {
			
@@ -184,14 +184,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
 
				 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
			
 
				 	unsigned int ret;
			
 
				 
			
 
				-	crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
			
 
				+	crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
			
 
				 						      CRYPTO_TFM_REQ_MASK);
			
 
				 
			
 
				-	ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
			
 
				+	ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
			
 
				 
			
 
				 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
			
 
				-	tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
			
 
				+	tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
			
 
				 			  CRYPTO_TFM_RES_MASK;
			
 
				 
			
 
				 	return ret;
			
@@ -204,9 +205,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
 
				 	unsigned int ret;
			
 
				 	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
			
 
				 
			
 
				-	skcipher_request_set_tfm(req, sctx->fallback.blk);
			
 
				+	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
			
 
				 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
			
 
				 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
			
 
				 
			
@@ -223,9 +224,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
 
				 	unsigned int ret;
			
 
				 	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
			
 
				 
			
 
				-	skcipher_request_set_tfm(req, sctx->fallback.blk);
			
 
				+	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
			
 
				 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
			
 
				 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
			
 
				 
			
@@ -306,8 +307,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
 
				 	const char *name = tfm->__crt_alg->cra_name;
			
 
				 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
			
 
				-						   CRYPTO_ALG_ASYNC |
			
 
				+	sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
			
 
				 						   CRYPTO_ALG_NEED_FALLBACK);
			
 
				 
			
 
				 	if (IS_ERR(sctx->fallback.blk)) {
			
@@ -323,7 +323,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
 
				 {
			
 
				 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	crypto_free_skcipher(sctx->fallback.blk);
			
 
				+	crypto_free_sync_skcipher(sctx->fallback.blk);
			
 
				 }
			
 
				 
			
 
				 static struct crypto_alg ecb_aes_alg = {
			
@@ -453,14 +453,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
 
				 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
			
 
				 	unsigned int ret;
			
 
				 
			
 
				-	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
			
 
				+	crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
			
 
				 						     CRYPTO_TFM_REQ_MASK);
			
 
				 
			
 
				-	ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
			
 
				+	ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
			
 
				 
			
 
				 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
			
 
				-	tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
			
 
				+	tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
			
 
				 			  CRYPTO_TFM_RES_MASK;
			
 
				 
			
 
				 	return ret;
			
@@ -472,10 +473,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
 
				 {
			
 
				 	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
			
 
				 	unsigned int ret;
			
 
				 
			
 
				-	skcipher_request_set_tfm(req, xts_ctx->fallback);
			
 
				+	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
			
 
				 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
			
 
				 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
			
 
				 
			
@@ -491,10 +492,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
 
				 {
			
 
				 	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
			
 
				 	unsigned int ret;
			
 
				 
			
 
				-	skcipher_request_set_tfm(req, xts_ctx->fallback);
			
 
				+	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
			
 
				 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
			
 
				 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
			
 
				 
			
@@ -611,8 +612,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
 
				 	const char *name = tfm->__crt_alg->cra_name;
			
 
				 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
			
 
				-						  CRYPTO_ALG_ASYNC |
			
 
				+	xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
			
 
				 						  CRYPTO_ALG_NEED_FALLBACK);
			
 
				 
			
 
				 	if (IS_ERR(xts_ctx->fallback)) {
			
@@ -627,7 +627,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
 
				 {
			
 
				 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	crypto_free_skcipher(xts_ctx->fallback);
			
 
				+	crypto_free_sync_skcipher(xts_ctx->fallback);
			
 
				 }
			
 
				 
			
 
				 static struct crypto_alg xts_aes_alg = {
			
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -221,7 +221,6 @@ CONFIG_CRYPTO_SALSA20=m
 
				 CONFIG_CRYPTO_SEED=m
			
 
				 CONFIG_CRYPTO_SERPENT=m
			
 
				 CONFIG_CRYPTO_SM4=m
			
 
				-CONFIG_CRYPTO_SPECK=m
			
 
				 CONFIG_CRYPTO_TEA=m
			
 
				 CONFIG_CRYPTO_TWOFISH=m
			
 
				 CONFIG_CRYPTO_DEFLATE=m
			
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -60,9 +60,6 @@ endif
 
				 ifeq ($(avx2_supported),yes)
			
 
				 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
			
 
				 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
			
 
				-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
			
 
				-	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
			
 
				-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
			
 
				 
			
 
				 	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
			
 
				 endif
			
@@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
 
				 	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
			
 
				 endif
			
 
				 
			
 
				-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
			
 
				+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
			
 
				 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
			
 
				 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
			
 
				 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
			
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 
				 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
			
 
				 			      const u8 *in, unsigned int len, u8 *iv);
			
 
				 
			
 
				-int crypto_fpu_init(void);
			
 
				-void crypto_fpu_exit(void);
			
 
				-
			
 
				 #define AVX_GEN2_OPTSIZE 640
			
 
				 #define AVX_GEN4_OPTSIZE 4096
			
 
				 
			
@@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 
				 	/* Linearize assoc, if not already linear */
			
 
				 	if (req->src->length >= assoclen && req->src->length &&
			
 
				 		(!PageHighMem(sg_page(req->src)) ||
			
 
				-			req->src->offset + req->src->length < PAGE_SIZE)) {
			
 
				+			req->src->offset + req->src->length <= PAGE_SIZE)) {
			
 
				 		scatterwalk_start(&assoc_sg_walk, req->src);
			
 
				 		assoc = scatterwalk_map(&assoc_sg_walk);
			
 
				 	} else {
			
@@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
 
				 static
			
 
				 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
			
 
				 
			
 
				-static struct {
			
 
				-	const char *algname;
			
 
				-	const char *drvname;
			
 
				-	const char *basename;
			
 
				-	struct simd_skcipher_alg *simd;
			
 
				-} aesni_simd_skciphers2[] = {
			
 
				-#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
			
 
				-    IS_BUILTIN(CONFIG_CRYPTO_PCBC)
			
 
				-	{
			
 
				-		.algname	= "pcbc(aes)",
			
 
				-		.drvname	= "pcbc-aes-aesni",
			
 
				-		.basename	= "fpu(pcbc(__aes-aesni))",
			
 
				-	},
			
 
				-#endif
			
 
				-};
			
 
				-
			
 
				 #ifdef CONFIG_X86_64
			
 
				 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
			
 
				 				  unsigned int key_len)
			
@@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
 
				 	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
			
 
				 		    aesni_simd_skciphers[i]; i++)
			
 
				 		simd_skcipher_free(aesni_simd_skciphers[i]);
			
 
				-
			
 
				-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
			
 
				-		if (aesni_simd_skciphers2[i].simd)
			
 
				-			simd_skcipher_free(aesni_simd_skciphers2[i].simd);
			
 
				 }
			
 
				 
			
 
				 static int __init aesni_init(void)
			
@@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				-	err = crypto_fpu_init();
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				 	err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
			
 
				 	if (err)
			
 
				-		goto fpu_exit;
			
 
				+		return err;
			
 
				 
			
 
				 	err = crypto_register_skciphers(aesni_skciphers,
			
 
				 					ARRAY_SIZE(aesni_skciphers));
			
@@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
 
				 		aesni_simd_skciphers[i] = simd;
			
 
				 	}
			
 
				 
			
 
				-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
			
 
				-		algname = aesni_simd_skciphers2[i].algname;
			
 
				-		drvname = aesni_simd_skciphers2[i].drvname;
			
 
				-		basename = aesni_simd_skciphers2[i].basename;
			
 
				-		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				-		err = PTR_ERR(simd);
			
 
				-		if (IS_ERR(simd))
			
 
				-			continue;
			
 
				-
			
 
				-		aesni_simd_skciphers2[i].simd = simd;
			
 
				-	}
			
 
				-
			
 
				 	return 0;
			
 
				 
			
 
				 unregister_simds:
			
@@ -1521,8 +1482,6 @@ unregister_skciphers:
 
				 				    ARRAY_SIZE(aesni_skciphers));
			
 
				 unregister_algs:
			
 
				 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
			
 
				-fpu_exit:
			
 
				-	crypto_fpu_exit();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
 
				 	crypto_unregister_skciphers(aesni_skciphers,
			
 
				 				    ARRAY_SIZE(aesni_skciphers));
			
 
				 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
			
 
				-
			
 
				-	crypto_fpu_exit();
			
 
				 }
			
 
				 
			
 
				 late_initcall(aesni_init);
			
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -1,207 +0,0 @@
 
				-/*
			
 
				- * FPU: Wrapper for blkcipher touching fpu
			
 
				- *
			
 
				- * Copyright (c) Intel Corp.
			
 
				- *   Author: Huang Ying <ying.huang@intel.com>
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify it
			
 
				- * under the terms of the GNU General Public License as published by the Free
			
 
				- * Software Foundation; either version 2 of the License, or (at your option)
			
 
				- * any later version.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#include <crypto/internal/skcipher.h>
			
 
				-#include <linux/err.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <asm/fpu/api.h>
			
 
				-
			
 
				-struct crypto_fpu_ctx {
			
 
				-	struct crypto_skcipher *child;
			
 
				-};
			
 
				-
			
 
				-static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				-			     unsigned int keylen)
			
 
				-{
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				-	struct crypto_skcipher *child = ctx->child;
			
 
				-	int err;
			
 
				-
			
 
				-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				-					 CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_skcipher_setkey(child, key, keylen);
			
 
				-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
			
 
				-					  CRYPTO_TFM_RES_MASK);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int crypto_fpu_encrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct crypto_skcipher *child = ctx->child;
			
 
				-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				-	int err;
			
 
				-
			
 
				-	skcipher_request_set_tfm(subreq, child);
			
 
				-	skcipher_request_set_callback(subreq, 0, NULL, NULL);
			
 
				-	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
 
				-				   req->iv);
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	err = crypto_skcipher_encrypt(subreq);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	skcipher_request_zero(subreq);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int crypto_fpu_decrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct crypto_skcipher *child = ctx->child;
			
 
				-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				-	int err;
			
 
				-
			
 
				-	skcipher_request_set_tfm(subreq, child);
			
 
				-	skcipher_request_set_callback(subreq, 0, NULL, NULL);
			
 
				-	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
 
				-				   req->iv);
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	err = crypto_skcipher_decrypt(subreq);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	skcipher_request_zero(subreq);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
			
 
				-{
			
 
				-	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct crypto_skcipher_spawn *spawn;
			
 
				-	struct crypto_skcipher *cipher;
			
 
				-
			
 
				-	spawn = skcipher_instance_ctx(inst);
			
 
				-	cipher = crypto_spawn_skcipher(spawn);
			
 
				-	if (IS_ERR(cipher))
			
 
				-		return PTR_ERR(cipher);
			
 
				-
			
 
				-	ctx->child = cipher;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
			
 
				-{
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-
			
 
				-	crypto_free_skcipher(ctx->child);
			
 
				-}
			
 
				-
			
 
				-static void crypto_fpu_free(struct skcipher_instance *inst)
			
 
				-{
			
 
				-	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				-	kfree(inst);
			
 
				-}
			
 
				-
			
 
				-static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				-{
			
 
				-	struct crypto_skcipher_spawn *spawn;
			
 
				-	struct skcipher_instance *inst;
			
 
				-	struct crypto_attr_type *algt;
			
 
				-	struct skcipher_alg *alg;
			
 
				-	const char *cipher_name;
			
 
				-	int err;
			
 
				-
			
 
				-	algt = crypto_get_attr_type(tb);
			
 
				-	if (IS_ERR(algt))
			
 
				-		return PTR_ERR(algt);
			
 
				-
			
 
				-	if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
			
 
				-	    algt->mask)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	if (!(algt->mask & CRYPTO_ALG_INTERNAL))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	cipher_name = crypto_attr_alg_name(tb[1]);
			
 
				-	if (IS_ERR(cipher_name))
			
 
				-		return PTR_ERR(cipher_name);
			
 
				-
			
 
				-	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
			
 
				-	if (!inst)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	spawn = skcipher_instance_ctx(inst);
			
 
				-
			
 
				-	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
			
 
				-				   CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
			
 
				-	if (err)
			
 
				-		goto out_free_inst;
			
 
				-
			
 
				-	alg = crypto_skcipher_spawn_alg(spawn);
			
 
				-
			
 
				-	err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
			
 
				-				  &alg->base);
			
 
				-	if (err)
			
 
				-		goto out_drop_skcipher;
			
 
				-
			
 
				-	inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
			
 
				-	inst->alg.base.cra_priority = alg->base.cra_priority;
			
 
				-	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
			
 
				-	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
			
 
				-
			
 
				-	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
			
 
				-	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
			
 
				-	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
			
 
				-
			
 
				-	inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
			
 
				-
			
 
				-	inst->alg.init = crypto_fpu_init_tfm;
			
 
				-	inst->alg.exit = crypto_fpu_exit_tfm;
			
 
				-
			
 
				-	inst->alg.setkey = crypto_fpu_setkey;
			
 
				-	inst->alg.encrypt = crypto_fpu_encrypt;
			
 
				-	inst->alg.decrypt = crypto_fpu_decrypt;
			
 
				-
			
 
				-	inst->free = crypto_fpu_free;
			
 
				-
			
 
				-	err = skcipher_register_instance(tmpl, inst);
			
 
				-	if (err)
			
 
				-		goto out_drop_skcipher;
			
 
				-
			
 
				-out:
			
 
				-	return err;
			
 
				-
			
 
				-out_drop_skcipher:
			
 
				-	crypto_drop_skcipher(spawn);
			
 
				-out_free_inst:
			
 
				-	kfree(inst);
			
 
				-	goto out;
			
 
				-}
			
 
				-
			
 
				-static struct crypto_template crypto_fpu_tmpl = {
			
 
				-	.name = "fpu",
			
 
				-	.create = crypto_fpu_create,
			
 
				-	.module = THIS_MODULE,
			
 
				-};
			
 
				-
			
 
				-int __init crypto_fpu_init(void)
			
 
				-{
			
 
				-	return crypto_register_template(&crypto_fpu_tmpl);
			
 
				-}
			
 
				-
			
 
				-void crypto_fpu_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_template(&crypto_fpu_tmpl);
			
 
				-}
			
 
				-
			
 
				-MODULE_ALIAS_CRYPTO("fpu");
			
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile
@@ -1,14 +0,0 @@
 
				-# SPDX-License-Identifier: GPL-2.0
			
 
				-#
			
 
				-# Arch-specific CryptoAPI modules.
			
 
				-#
			
 
				-
			
 
				-OBJECT_FILES_NON_STANDARD := y
			
 
				-
			
 
				-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
			
 
				-                                $(comma)4)$(comma)%ymm2,yes,no)
			
 
				-ifeq ($(avx2_supported),yes)
			
 
				-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
			
 
				-	sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
			
 
				-	     sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
			
 
				-endif
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -1,1011 +0,0 @@
 
				-/*
			
 
				- * Multi buffer SHA1 algorithm Glue Code
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
			
 
				-
			
 
				-#include <crypto/internal/hash.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/cryptohash.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <crypto/scatterwalk.h>
			
 
				-#include <crypto/sha.h>
			
 
				-#include <crypto/mcryptd.h>
			
 
				-#include <crypto/crypto_wq.h>
			
 
				-#include <asm/byteorder.h>
			
 
				-#include <linux/hardirq.h>
			
 
				-#include <asm/fpu/api.h>
			
 
				-#include "sha1_mb_ctx.h"
			
 
				-
			
 
				-#define FLUSH_INTERVAL 1000 /* in usec */
			
 
				-
			
 
				-static struct mcryptd_alg_state sha1_mb_alg_state;
			
 
				-
			
 
				-struct sha1_mb_ctx {
			
 
				-	struct mcryptd_ahash *mcryptd_tfm;
			
 
				-};
			
 
				-
			
 
				-static inline struct mcryptd_hash_request_ctx
			
 
				-		*cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
			
 
				-{
			
 
				-	struct ahash_request *areq;
			
 
				-
			
 
				-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
			
 
				-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-}
			
 
				-
			
 
				-static inline struct ahash_request
			
 
				-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
			
 
				-{
			
 
				-	return container_of((void *) ctx, struct ahash_request, __ctx);
			
 
				-}
			
 
				-
			
 
				-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
			
 
				-				struct ahash_request *areq)
			
 
				-{
			
 
				-	rctx->flag = HASH_UPDATE;
			
 
				-}
			
 
				-
			
 
				-static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
			
 
				-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
			
 
				-			(struct sha1_mb_mgr *state, struct job_sha1 *job);
			
 
				-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
			
 
				-						(struct sha1_mb_mgr *state);
			
 
				-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
			
 
				-						(struct sha1_mb_mgr *state);
			
 
				-
			
 
				-static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
			
 
				-			 uint64_t total_len)
			
 
				-{
			
 
				-	uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
			
 
				-
			
 
				-	memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
			
 
				-	padblock[i] = 0x80;
			
 
				-
			
 
				-	i += ((SHA1_BLOCK_SIZE - 1) &
			
 
				-	      (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
			
 
				-	     + 1 + SHA1_PADLENGTHFIELD_SIZE;
			
 
				-
			
 
				-#if SHA1_PADLENGTHFIELD_SIZE == 16
			
 
				-	*((uint64_t *) &padblock[i - 16]) = 0;
			
 
				-#endif
			
 
				-
			
 
				-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
			
 
				-
			
 
				-	/* Number of extra blocks to hash */
			
 
				-	return i >> SHA1_LOG2_BLOCK_SIZE;
			
 
				-}
			
 
				-
			
 
				-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
			
 
				-						struct sha1_hash_ctx *ctx)
			
 
				-{
			
 
				-	while (ctx) {
			
 
				-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
			
 
				-			/* Clear PROCESSING bit */
			
 
				-			ctx->status = HASH_CTX_STS_COMPLETE;
			
 
				-			return ctx;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra blocks are empty, begin hashing what remains
			
 
				-		 * in the user's buffer.
			
 
				-		 */
			
 
				-		if (ctx->partial_block_buffer_length == 0 &&
			
 
				-		    ctx->incoming_buffer_length) {
			
 
				-
			
 
				-			const void *buffer = ctx->incoming_buffer;
			
 
				-			uint32_t len = ctx->incoming_buffer_length;
			
 
				-			uint32_t copy_len;
			
 
				-
			
 
				-			/*
			
 
				-			 * Only entire blocks can be hashed.
			
 
				-			 * Copy remainder to extra blocks buffer.
			
 
				-			 */
			
 
				-			copy_len = len & (SHA1_BLOCK_SIZE-1);
			
 
				-
			
 
				-			if (copy_len) {
			
 
				-				len -= copy_len;
			
 
				-				memcpy(ctx->partial_block_buffer,
			
 
				-				       ((const char *) buffer + len),
			
 
				-				       copy_len);
			
 
				-				ctx->partial_block_buffer_length = copy_len;
			
 
				-			}
			
 
				-
			
 
				-			ctx->incoming_buffer_length = 0;
			
 
				-
			
 
				-			/* len should be a multiple of the block size now */
			
 
				-			assert((len % SHA1_BLOCK_SIZE) == 0);
			
 
				-
			
 
				-			/* Set len to the number of blocks to be hashed */
			
 
				-			len >>= SHA1_LOG2_BLOCK_SIZE;
			
 
				-
			
 
				-			if (len) {
			
 
				-
			
 
				-				ctx->job.buffer = (uint8_t *) buffer;
			
 
				-				ctx->job.len = len;
			
 
				-				ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
			
 
				-										&ctx->job);
			
 
				-				continue;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra blocks are not empty, then we are
			
 
				-		 * either on the last block(s) or we need more
			
 
				-		 * user input before continuing.
			
 
				-		 */
			
 
				-		if (ctx->status & HASH_CTX_STS_LAST) {
			
 
				-
			
 
				-			uint8_t *buf = ctx->partial_block_buffer;
			
 
				-			uint32_t n_extra_blocks =
			
 
				-					sha1_pad(buf, ctx->total_length);
			
 
				-
			
 
				-			ctx->status = (HASH_CTX_STS_PROCESSING |
			
 
				-				       HASH_CTX_STS_COMPLETE);
			
 
				-			ctx->job.buffer = buf;
			
 
				-			ctx->job.len = (uint32_t) n_extra_blocks;
			
 
				-			ctx = (struct sha1_hash_ctx *)
			
 
				-				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		ctx->status = HASH_CTX_STS_IDLE;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct sha1_hash_ctx
			
 
				-			*sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
			
 
				-{
			
 
				-	/*
			
 
				-	 * If get_comp_job returns NULL, there are no jobs complete.
			
 
				-	 * If get_comp_job returns a job, verify that it is safe to return to
			
 
				-	 * the user.
			
 
				-	 * If it is not ready, resubmit the job to finish processing.
			
 
				-	 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
			
 
				-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
			
 
				-	 * still need processing.
			
 
				-	 */
			
 
				-	struct sha1_hash_ctx *ctx;
			
 
				-
			
 
				-	ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
			
 
				-	return sha1_ctx_mgr_resubmit(mgr, ctx);
			
 
				-}
			
 
				-
			
 
				-static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
			
 
				-{
			
 
				-	sha1_job_mgr_init(&mgr->mgr);
			
 
				-}
			
 
				-
			
 
				-static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
			
 
				-					  struct sha1_hash_ctx *ctx,
			
 
				-					  const void *buffer,
			
 
				-					  uint32_t len,
			
 
				-					  int flags)
			
 
				-{
			
 
				-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
			
 
				-		/* User should not pass anything other than UPDATE or LAST */
			
 
				-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
			
 
				-		/* Cannot submit to a currently processing job. */
			
 
				-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
			
 
				-		/* Cannot update a finished job. */
			
 
				-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If we made it here, there were no errors during this call to
			
 
				-	 * submit
			
 
				-	 */
			
 
				-	ctx->error = HASH_CTX_ERROR_NONE;
			
 
				-
			
 
				-	/* Store buffer ptr info from user */
			
 
				-	ctx->incoming_buffer = buffer;
			
 
				-	ctx->incoming_buffer_length = len;
			
 
				-
			
 
				-	/*
			
 
				-	 * Store the user's request flags and mark this ctx as currently
			
 
				-	 * being processed.
			
 
				-	 */
			
 
				-	ctx->status = (flags & HASH_LAST) ?
			
 
				-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
			
 
				-			HASH_CTX_STS_PROCESSING;
			
 
				-
			
 
				-	/* Advance byte counter */
			
 
				-	ctx->total_length += len;
			
 
				-
			
 
				-	/*
			
 
				-	 * If there is anything currently buffered in the extra blocks,
			
 
				-	 * append to it until it contains a whole block.
			
 
				-	 * Or if the user's buffer contains less than a whole block,
			
 
				-	 * append as much as possible to the extra block.
			
 
				-	 */
			
 
				-	if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
			
 
				-		/*
			
 
				-		 * Compute how many bytes to copy from user buffer into
			
 
				-		 * extra block
			
 
				-		 */
			
 
				-		uint32_t copy_len = SHA1_BLOCK_SIZE -
			
 
				-					ctx->partial_block_buffer_length;
			
 
				-		if (len < copy_len)
			
 
				-			copy_len = len;
			
 
				-
			
 
				-		if (copy_len) {
			
 
				-			/* Copy and update relevant pointers and counters */
			
 
				-			memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
			
 
				-				buffer, copy_len);
			
 
				-
			
 
				-			ctx->partial_block_buffer_length += copy_len;
			
 
				-			ctx->incoming_buffer = (const void *)
			
 
				-					((const char *)buffer + copy_len);
			
 
				-			ctx->incoming_buffer_length = len - copy_len;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * The extra block should never contain more than 1 block
			
 
				-		 * here
			
 
				-		 */
			
 
				-		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra block buffer contains exactly 1 block, it can
			
 
				-		 * be hashed.
			
 
				-		 */
			
 
				-		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
			
 
				-			ctx->partial_block_buffer_length = 0;
			
 
				-
			
 
				-			ctx->job.buffer = ctx->partial_block_buffer;
			
 
				-			ctx->job.len = 1;
			
 
				-			ctx = (struct sha1_hash_ctx *)
			
 
				-				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return sha1_ctx_mgr_resubmit(mgr, ctx);
			
 
				-}
			
 
				-
			
 
				-static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
			
 
				-{
			
 
				-	struct sha1_hash_ctx *ctx;
			
 
				-
			
 
				-	while (1) {
			
 
				-		ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
			
 
				-
			
 
				-		/* If flush returned 0, there are no more jobs in flight. */
			
 
				-		if (!ctx)
			
 
				-			return NULL;
			
 
				-
			
 
				-		/*
			
 
				-		 * If flush returned a job, resubmit the job to finish
			
 
				-		 * processing.
			
 
				-		 */
			
 
				-		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
			
 
				-
			
 
				-		/*
			
 
				-		 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
			
 
				-		 * returned. Otherwise, all jobs currently being managed by the
			
 
				-		 * sha1_ctx_mgr still need processing. Loop.
			
 
				-		 */
			
 
				-		if (ctx)
			
 
				-			return ctx;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_init(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	hash_ctx_init(sctx);
			
 
				-	sctx->job.result_digest[0] = SHA1_H0;
			
 
				-	sctx->job.result_digest[1] = SHA1_H1;
			
 
				-	sctx->job.result_digest[2] = SHA1_H2;
			
 
				-	sctx->job.result_digest[3] = SHA1_H3;
			
 
				-	sctx->job.result_digest[4] = SHA1_H4;
			
 
				-	sctx->total_length = 0;
			
 
				-	sctx->partial_block_buffer_length = 0;
			
 
				-	sctx->status = HASH_CTX_STS_IDLE;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
			
 
				-{
			
 
				-	int	i;
			
 
				-	struct	sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
			
 
				-	__be32	*dst = (__be32 *) rctx->out;
			
 
				-
			
 
				-	for (i = 0; i < 5; ++i)
			
 
				-		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
			
 
				-			struct mcryptd_alg_cstate *cstate, bool flush)
			
 
				-{
			
 
				-	int	flag = HASH_UPDATE;
			
 
				-	int	nbytes, err = 0;
			
 
				-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
			
 
				-	struct sha1_hash_ctx *sha_ctx;
			
 
				-
			
 
				-	/* more work ? */
			
 
				-	while (!(rctx->flag & HASH_DONE)) {
			
 
				-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
			
 
				-		if (nbytes < 0) {
			
 
				-			err = nbytes;
			
 
				-			goto out;
			
 
				-		}
			
 
				-		/* check if the walk is done */
			
 
				-		if (crypto_ahash_walk_last(&rctx->walk)) {
			
 
				-			rctx->flag |= HASH_DONE;
			
 
				-			if (rctx->flag & HASH_FINAL)
			
 
				-				flag |= HASH_LAST;
			
 
				-
			
 
				-		}
			
 
				-		sha_ctx = (struct sha1_hash_ctx *)
			
 
				-						ahash_request_ctx(&rctx->areq);
			
 
				-		kernel_fpu_begin();
			
 
				-		sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
			
 
				-						rctx->walk.data, nbytes, flag);
			
 
				-		if (!sha_ctx) {
			
 
				-			if (flush)
			
 
				-				sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
			
 
				-		}
			
 
				-		kernel_fpu_end();
			
 
				-		if (sha_ctx)
			
 
				-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		else {
			
 
				-			rctx = NULL;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* copy the results */
			
 
				-	if (rctx->flag & HASH_FINAL)
			
 
				-		sha1_mb_set_results(rctx);
			
 
				-
			
 
				-out:
			
 
				-	*ret_rctx = rctx;
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
			
 
				-			    struct mcryptd_alg_cstate *cstate,
			
 
				-			    int err)
			
 
				-{
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha1_hash_ctx *sha_ctx;
			
 
				-	struct mcryptd_hash_request_ctx *req_ctx;
			
 
				-	int ret;
			
 
				-
			
 
				-	/* remove from work list */
			
 
				-	spin_lock(&cstate->work_lock);
			
 
				-	list_del(&rctx->waiter);
			
 
				-	spin_unlock(&cstate->work_lock);
			
 
				-
			
 
				-	if (irqs_disabled())
			
 
				-		rctx->complete(&req->base, err);
			
 
				-	else {
			
 
				-		local_bh_disable();
			
 
				-		rctx->complete(&req->base, err);
			
 
				-		local_bh_enable();
			
 
				-	}
			
 
				-
			
 
				-	/* check to see if there are other jobs that are done */
			
 
				-	sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
			
 
				-	while (sha_ctx) {
			
 
				-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		ret = sha_finish_walk(&req_ctx, cstate, false);
			
 
				-		if (req_ctx) {
			
 
				-			spin_lock(&cstate->work_lock);
			
 
				-			list_del(&req_ctx->waiter);
			
 
				-			spin_unlock(&cstate->work_lock);
			
 
				-
			
 
				-			req = cast_mcryptd_ctx_to_req(req_ctx);
			
 
				-			if (irqs_disabled())
			
 
				-				req_ctx->complete(&req->base, ret);
			
 
				-			else {
			
 
				-				local_bh_disable();
			
 
				-				req_ctx->complete(&req->base, ret);
			
 
				-				local_bh_enable();
			
 
				-			}
			
 
				-		}
			
 
				-		sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
			
 
				-			     struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	unsigned long next_flush;
			
 
				-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
			
 
				-
			
 
				-	/* initialize tag */
			
 
				-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
			
 
				-	rctx->tag.seq_num = cstate->next_seq_num++;
			
 
				-	next_flush = rctx->tag.arrival + delay;
			
 
				-	rctx->tag.expire = next_flush;
			
 
				-
			
 
				-	spin_lock(&cstate->work_lock);
			
 
				-	list_add_tail(&rctx->waiter, &cstate->work_list);
			
 
				-	spin_unlock(&cstate->work_lock);
			
 
				-
			
 
				-	mcryptd_arm_flusher(cstate, delay);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_update(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha1_hash_ctx *sha_ctx;
			
 
				-	int ret = 0, nbytes;
			
 
				-
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
			
 
				-
			
 
				-	if (nbytes < 0) {
			
 
				-		ret = nbytes;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	if (crypto_ahash_walk_last(&rctx->walk))
			
 
				-		rctx->flag |= HASH_DONE;
			
 
				-
			
 
				-	/* submit */
			
 
				-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	sha1_mb_add_list(rctx, cstate);
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
			
 
				-							nbytes, HASH_UPDATE);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_finup(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha1_hash_ctx *sha_ctx;
			
 
				-	int ret = 0, flag = HASH_UPDATE, nbytes;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
			
 
				-
			
 
				-	if (nbytes < 0) {
			
 
				-		ret = nbytes;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	if (crypto_ahash_walk_last(&rctx->walk)) {
			
 
				-		rctx->flag |= HASH_DONE;
			
 
				-		flag = HASH_LAST;
			
 
				-	}
			
 
				-
			
 
				-	/* submit */
			
 
				-	rctx->flag |= HASH_FINAL;
			
 
				-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	sha1_mb_add_list(rctx, cstate);
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
			
 
				-								nbytes, flag);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_final(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct sha1_hash_ctx *sha_ctx;
			
 
				-	int ret = 0;
			
 
				-	u8 data;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	rctx->flag |= HASH_DONE | HASH_FINAL;
			
 
				-
			
 
				-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	/* flag HASH_FINAL and 0 data size */
			
 
				-	sha1_mb_add_list(rctx, cstate);
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
			
 
				-								HASH_LAST);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_export(struct ahash_request *areq, void *out)
			
 
				-{
			
 
				-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	memcpy(out, sctx, sizeof(*sctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_import(struct ahash_request *areq, const void *in)
			
 
				-{
			
 
				-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	memcpy(sctx, in, sizeof(*sctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct mcryptd_ahash *mcryptd_tfm;
			
 
				-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	struct mcryptd_hash_ctx *mctx;
			
 
				-
			
 
				-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
			
 
				-						CRYPTO_ALG_INTERNAL,
			
 
				-						CRYPTO_ALG_INTERNAL);
			
 
				-	if (IS_ERR(mcryptd_tfm))
			
 
				-		return PTR_ERR(mcryptd_tfm);
			
 
				-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
			
 
				-	mctx->alg_state = &sha1_mb_alg_state;
			
 
				-	ctx->mcryptd_tfm = mcryptd_tfm;
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				sizeof(struct ahash_request) +
			
 
				-				crypto_ahash_reqsize(&mcryptd_tfm->base));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	mcryptd_free_ahash(ctx->mcryptd_tfm);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				sizeof(struct ahash_request) +
			
 
				-				sizeof(struct sha1_hash_ctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	mcryptd_free_ahash(ctx->mcryptd_tfm);
			
 
				-}
			
 
				-
			
 
				-static struct ahash_alg sha1_mb_areq_alg = {
			
 
				-	.init		=	sha1_mb_init,
			
 
				-	.update		=	sha1_mb_update,
			
 
				-	.final		=	sha1_mb_final,
			
 
				-	.finup		=	sha1_mb_finup,
			
 
				-	.export		=	sha1_mb_export,
			
 
				-	.import		=	sha1_mb_import,
			
 
				-	.halg		=	{
			
 
				-		.digestsize	=	SHA1_DIGEST_SIZE,
			
 
				-		.statesize	=	sizeof(struct sha1_hash_ctx),
			
 
				-		.base		=	{
			
 
				-			.cra_name	 = "__sha1-mb",
			
 
				-			.cra_driver_name = "__intel_sha1-mb",
			
 
				-			.cra_priority	 = 100,
			
 
				-			/*
			
 
				-			 * use ASYNC flag as some buffers in multi-buffer
			
 
				-			 * algo may not have completed before hashing thread
			
 
				-			 * sleep
			
 
				-			 */
			
 
				-			.cra_flags	= CRYPTO_ALG_ASYNC |
			
 
				-					  CRYPTO_ALG_INTERNAL,
			
 
				-			.cra_blocksize	= SHA1_BLOCK_SIZE,
			
 
				-			.cra_module	= THIS_MODULE,
			
 
				-			.cra_list	= LIST_HEAD_INIT
			
 
				-					(sha1_mb_areq_alg.halg.base.cra_list),
			
 
				-			.cra_init	= sha1_mb_areq_init_tfm,
			
 
				-			.cra_exit	= sha1_mb_areq_exit_tfm,
			
 
				-			.cra_ctxsize	= sizeof(struct sha1_hash_ctx),
			
 
				-		}
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static int sha1_mb_async_init(struct ahash_request *req)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_init(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_update(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_update(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_finup(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_finup(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_final(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_final(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_digest(struct ahash_request *req)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_digest(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_export(struct ahash_request *req, void *out)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_export(mcryptd_req, out);
			
 
				-}
			
 
				-
			
 
				-static int sha1_mb_async_import(struct ahash_request *req, const void *in)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
			
 
				-	struct mcryptd_hash_request_ctx *rctx;
			
 
				-	struct ahash_request *areq;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	rctx = ahash_request_ctx(mcryptd_req);
			
 
				-	areq = &rctx->areq;
			
 
				-
			
 
				-	ahash_request_set_tfm(areq, child);
			
 
				-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-					rctx->complete, req);
			
 
				-
			
 
				-	return crypto_ahash_import(mcryptd_req, in);
			
 
				-}
			
 
				-
			
 
				-static struct ahash_alg sha1_mb_async_alg = {
			
 
				-	.init           = sha1_mb_async_init,
			
 
				-	.update         = sha1_mb_async_update,
			
 
				-	.final          = sha1_mb_async_final,
			
 
				-	.finup          = sha1_mb_async_finup,
			
 
				-	.digest         = sha1_mb_async_digest,
			
 
				-	.export		= sha1_mb_async_export,
			
 
				-	.import		= sha1_mb_async_import,
			
 
				-	.halg = {
			
 
				-		.digestsize     = SHA1_DIGEST_SIZE,
			
 
				-		.statesize	= sizeof(struct sha1_hash_ctx),
			
 
				-		.base = {
			
 
				-			.cra_name               = "sha1",
			
 
				-			.cra_driver_name        = "sha1_mb",
			
 
				-			/*
			
 
				-			 * Low priority, since with few concurrent hash requests
			
 
				-			 * this is extremely slow due to the flush delay.  Users
			
 
				-			 * whose workloads would benefit from this can request
			
 
				-			 * it explicitly by driver name, or can increase its
			
 
				-			 * priority at runtime using NETLINK_CRYPTO.
			
 
				-			 */
			
 
				-			.cra_priority           = 50,
			
 
				-			.cra_flags              = CRYPTO_ALG_ASYNC,
			
 
				-			.cra_blocksize          = SHA1_BLOCK_SIZE,
			
 
				-			.cra_module             = THIS_MODULE,
			
 
				-			.cra_list               = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
			
 
				-			.cra_init               = sha1_mb_async_init_tfm,
			
 
				-			.cra_exit               = sha1_mb_async_exit_tfm,
			
 
				-			.cra_ctxsize		= sizeof(struct sha1_mb_ctx),
			
 
				-			.cra_alignmask		= 0,
			
 
				-		},
			
 
				-	},
			
 
				-};
			
 
				-
			
 
				-static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx;
			
 
				-	unsigned long cur_time;
			
 
				-	unsigned long next_flush = 0;
			
 
				-	struct sha1_hash_ctx *sha_ctx;
			
 
				-
			
 
				-
			
 
				-	cur_time = jiffies;
			
 
				-
			
 
				-	while (!list_empty(&cstate->work_list)) {
			
 
				-		rctx = list_entry(cstate->work_list.next,
			
 
				-				struct mcryptd_hash_request_ctx, waiter);
			
 
				-		if (time_before(cur_time, rctx->tag.expire))
			
 
				-			break;
			
 
				-		kernel_fpu_begin();
			
 
				-		sha_ctx = (struct sha1_hash_ctx *)
			
 
				-					sha1_ctx_mgr_flush(cstate->mgr);
			
 
				-		kernel_fpu_end();
			
 
				-		if (!sha_ctx) {
			
 
				-			pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
			
 
				-			break;
			
 
				-		}
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		sha_finish_walk(&rctx, cstate, true);
			
 
				-		sha_complete_job(rctx, cstate, 0);
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&cstate->work_list)) {
			
 
				-		rctx = list_entry(cstate->work_list.next,
			
 
				-				struct mcryptd_hash_request_ctx, waiter);
			
 
				-		/* get the hash context and then flush time */
			
 
				-		next_flush = rctx->tag.expire;
			
 
				-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
			
 
				-	}
			
 
				-	return next_flush;
			
 
				-}
			
 
				-
			
 
				-static int __init sha1_mb_mod_init(void)
			
 
				-{
			
 
				-
			
 
				-	int cpu;
			
 
				-	int err;
			
 
				-	struct mcryptd_alg_cstate *cpu_state;
			
 
				-
			
 
				-	/* check for dependent cpu features */
			
 
				-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
			
 
				-	    !boot_cpu_has(X86_FEATURE_BMI2))
			
 
				-		return -ENODEV;
			
 
				-
			
 
				-	/* initialize multibuffer structures */
			
 
				-	sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
			
 
				-
			
 
				-	sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
			
 
				-	sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
			
 
				-	sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
			
 
				-	sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
			
 
				-
			
 
				-	if (!sha1_mb_alg_state.alg_cstate)
			
 
				-		return -ENOMEM;
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
			
 
				-		cpu_state->next_flush = 0;
			
 
				-		cpu_state->next_seq_num = 0;
			
 
				-		cpu_state->flusher_engaged = false;
			
 
				-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
			
 
				-		cpu_state->cpu = cpu;
			
 
				-		cpu_state->alg_state = &sha1_mb_alg_state;
			
 
				-		cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
			
 
				-					GFP_KERNEL);
			
 
				-		if (!cpu_state->mgr)
			
 
				-			goto err2;
			
 
				-		sha1_ctx_mgr_init(cpu_state->mgr);
			
 
				-		INIT_LIST_HEAD(&cpu_state->work_list);
			
 
				-		spin_lock_init(&cpu_state->work_lock);
			
 
				-	}
			
 
				-	sha1_mb_alg_state.flusher = &sha1_mb_flusher;
			
 
				-
			
 
				-	err = crypto_register_ahash(&sha1_mb_areq_alg);
			
 
				-	if (err)
			
 
				-		goto err2;
			
 
				-	err = crypto_register_ahash(&sha1_mb_async_alg);
			
 
				-	if (err)
			
 
				-		goto err1;
			
 
				-
			
 
				-
			
 
				-	return 0;
			
 
				-err1:
			
 
				-	crypto_unregister_ahash(&sha1_mb_areq_alg);
			
 
				-err2:
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
			
 
				-		kfree(cpu_state->mgr);
			
 
				-	}
			
 
				-	free_percpu(sha1_mb_alg_state.alg_cstate);
			
 
				-	return -ENODEV;
			
 
				-}
			
 
				-
			
 
				-static void __exit sha1_mb_mod_fini(void)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	struct mcryptd_alg_cstate *cpu_state;
			
 
				-
			
 
				-	crypto_unregister_ahash(&sha1_mb_async_alg);
			
 
				-	crypto_unregister_ahash(&sha1_mb_areq_alg);
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
			
 
				-		kfree(cpu_state->mgr);
			
 
				-	}
			
 
				-	free_percpu(sha1_mb_alg_state.alg_cstate);
			
 
				-}
			
 
				-
			
 
				-module_init(sha1_mb_mod_init);
			
 
				-module_exit(sha1_mb_mod_fini);
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
			
 
				-
			
 
				-MODULE_ALIAS_CRYPTO("sha1");
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -1,134 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA context
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _SHA_MB_CTX_INTERNAL_H
			
 
				-#define _SHA_MB_CTX_INTERNAL_H
			
 
				-
			
 
				-#include "sha1_mb_mgr.h"
			
 
				-
			
 
				-#define HASH_UPDATE          0x00
			
 
				-#define HASH_LAST            0x01
			
 
				-#define HASH_DONE	     0x02
			
 
				-#define HASH_FINAL	     0x04
			
 
				-
			
 
				-#define HASH_CTX_STS_IDLE       0x00
			
 
				-#define HASH_CTX_STS_PROCESSING 0x01
			
 
				-#define HASH_CTX_STS_LAST       0x02
			
 
				-#define HASH_CTX_STS_COMPLETE   0x04
			
 
				-
			
 
				-enum hash_ctx_error {
			
 
				-	HASH_CTX_ERROR_NONE               =  0,
			
 
				-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
			
 
				-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
			
 
				-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
			
 
				-
			
 
				-#ifdef HASH_CTX_DEBUG
			
 
				-	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
			
 
				-#endif
			
 
				-};
			
 
				-
			
 
				-
			
 
				-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
			
 
				-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
			
 
				-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
			
 
				-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
			
 
				-#define hash_ctx_status(ctx)     ((ctx)->status)
			
 
				-#define hash_ctx_error(ctx)      ((ctx)->error)
			
 
				-#define hash_ctx_init(ctx) \
			
 
				-	do { \
			
 
				-		(ctx)->error = HASH_CTX_ERROR_NONE; \
			
 
				-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
			
 
				-	} while (0)
			
 
				-
			
 
				-
			
 
				-/* Hash Constants and Typedefs */
			
 
				-#define SHA1_DIGEST_LENGTH          5
			
 
				-#define SHA1_LOG2_BLOCK_SIZE        6
			
 
				-
			
 
				-#define SHA1_PADLENGTHFIELD_SIZE    8
			
 
				-
			
 
				-#ifdef SHA_MB_DEBUG
			
 
				-#define assert(expr) \
			
 
				-do { \
			
 
				-	if (unlikely(!(expr))) { \
			
 
				-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
			
 
				-		#expr, __FILE__, __func__, __LINE__); \
			
 
				-	} \
			
 
				-} while (0)
			
 
				-#else
			
 
				-#define assert(expr) do {} while (0)
			
 
				-#endif
			
 
				-
			
 
				-struct sha1_ctx_mgr {
			
 
				-	struct sha1_mb_mgr mgr;
			
 
				-};
			
 
				-
			
 
				-/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
			
 
				-
			
 
				-struct sha1_hash_ctx {
			
 
				-	/* Must be at struct offset 0 */
			
 
				-	struct job_sha1       job;
			
 
				-	/* status flag */
			
 
				-	int status;
			
 
				-	/* error flag */
			
 
				-	int error;
			
 
				-
			
 
				-	uint64_t	total_length;
			
 
				-	const void	*incoming_buffer;
			
 
				-	uint32_t	incoming_buffer_length;
			
 
				-	uint8_t		partial_block_buffer[SHA1_BLOCK_SIZE * 2];
			
 
				-	uint32_t	partial_block_buffer_length;
			
 
				-	void		*user_data;
			
 
				-};
			
 
				-
			
 
				-#endif
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
@@ -1,110 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA1 algorithm manager
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      James Guilford <james.guilford@intel.com>
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-#ifndef __SHA_MB_MGR_H
			
 
				-#define __SHA_MB_MGR_H
			
 
				-
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-#define NUM_SHA1_DIGEST_WORDS 5
			
 
				-
			
 
				-enum job_sts {	STS_UNKNOWN = 0,
			
 
				-		STS_BEING_PROCESSED = 1,
			
 
				-		STS_COMPLETED = 2,
			
 
				-		STS_INTERNAL_ERROR = 3,
			
 
				-		STS_ERROR = 4
			
 
				-};
			
 
				-
			
 
				-struct job_sha1 {
			
 
				-	u8	*buffer;
			
 
				-	u32	len;
			
 
				-	u32	result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
			
 
				-	enum	job_sts status;
			
 
				-	void	*user_data;
			
 
				-};
			
 
				-
			
 
				-/* SHA1 out-of-order scheduler */
			
 
				-
			
 
				-/* typedef uint32_t sha1_digest_array[5][8]; */
			
 
				-
			
 
				-struct sha1_args_x8 {
			
 
				-	uint32_t	digest[5][8];
			
 
				-	uint8_t		*data_ptr[8];
			
 
				-};
			
 
				-
			
 
				-struct sha1_lane_data {
			
 
				-	struct job_sha1 *job_in_lane;
			
 
				-};
			
 
				-
			
 
				-struct sha1_mb_mgr {
			
 
				-	struct sha1_args_x8 args;
			
 
				-
			
 
				-	uint32_t lens[8];
			
 
				-
			
 
				-	/* each byte is index (0...7) of unused lanes */
			
 
				-	uint64_t unused_lanes;
			
 
				-	/* byte 4 is set to FF as a flag */
			
 
				-	struct sha1_lane_data ldata[8];
			
 
				-};
			
 
				-
			
 
				-
			
 
				-#define SHA1_MB_MGR_NUM_LANES_AVX2 8
			
 
				-
			
 
				-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
			
 
				-struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
			
 
				-					 struct job_sha1 *job);
			
 
				-struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
			
 
				-struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
			
 
				-
			
 
				-#endif
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
@@ -1,287 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA1 algorithm data structure
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      James Guilford <james.guilford@intel.com>
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-# Macros for defining data structures
			
 
				-
			
 
				-# Usage example
			
 
				-
			
 
				-#START_FIELDS	# JOB_AES
			
 
				-###	name		size	align
			
 
				-#FIELD	_plaintext,	8,	8	# pointer to plaintext
			
 
				-#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
			
 
				-#FIELD	_IV,		16,	8	# IV
			
 
				-#FIELD	_keys,		8,	8	# pointer to keys
			
 
				-#FIELD	_len,		4,	4	# length in bytes
			
 
				-#FIELD	_status,	4,	4	# status enumeration
			
 
				-#FIELD	_user_data,	8,	8	# pointer to user data
			
 
				-#UNION  _union,         size1,  align1, \
			
 
				-#	                size2,  align2, \
			
 
				-#	                size3,  align3, \
			
 
				-#	                ...
			
 
				-#END_FIELDS
			
 
				-#%assign _JOB_AES_size	_FIELD_OFFSET
			
 
				-#%assign _JOB_AES_align	_STRUCT_ALIGN
			
 
				-
			
 
				-#########################################################################
			
 
				-
			
 
				-# Alternate "struc-like" syntax:
			
 
				-#	STRUCT job_aes2
			
 
				-#	RES_Q	.plaintext,	1
			
 
				-#	RES_Q	.ciphertext,	1
			
 
				-#	RES_DQ	.IV,		1
			
 
				-#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
			
 
				-#	RES_U	.union,		size1, align1, \
			
 
				-#				size2, align2, \
			
 
				-#				...
			
 
				-#	ENDSTRUCT
			
 
				-#	# Following only needed if nesting
			
 
				-#	%assign job_aes2_size	_FIELD_OFFSET
			
 
				-#	%assign job_aes2_align	_STRUCT_ALIGN
			
 
				-#
			
 
				-# RES_* macros take a name, a count and an optional alignment.
			
 
				-# The count in in terms of the base size of the macro, and the
			
 
				-# default alignment is the base size.
			
 
				-# The macros are:
			
 
				-# Macro    Base size
			
 
				-# RES_B	    1
			
 
				-# RES_W	    2
			
 
				-# RES_D     4
			
 
				-# RES_Q     8
			
 
				-# RES_DQ   16
			
 
				-# RES_Y    32
			
 
				-# RES_Z    64
			
 
				-#
			
 
				-# RES_U defines a union. It's arguments are a name and two or more
			
 
				-# pairs of "size, alignment"
			
 
				-#
			
 
				-# The two assigns are only needed if this structure is being nested
			
 
				-# within another. Even if the assigns are not done, one can still use
			
 
				-# STRUCT_NAME_size as the size of the structure.
			
 
				-#
			
 
				-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
			
 
				-#
			
 
				-# The differences between this and using "struc" directly are that each
			
 
				-# type is implicitly aligned to its natural length (although this can be
			
 
				-# over-ridden with an explicit third parameter), and that the structure
			
 
				-# is padded at the end to its overall alignment.
			
 
				-#
			
 
				-
			
 
				-#########################################################################
			
 
				-
			
 
				-#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
			
 
				-#define _SHA1_MB_MGR_DATASTRUCT_ASM_
			
 
				-
			
 
				-## START_FIELDS
			
 
				-.macro START_FIELDS
			
 
				- _FIELD_OFFSET = 0
			
 
				- _STRUCT_ALIGN = 0
			
 
				-.endm
			
 
				-
			
 
				-## FIELD name size align
			
 
				-.macro FIELD name size align
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
			
 
				- \name	= _FIELD_OFFSET
			
 
				- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
			
 
				-.if (\align > _STRUCT_ALIGN)
			
 
				- _STRUCT_ALIGN = \align
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-## END_FIELDS
			
 
				-.macro END_FIELDS
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
			
 
				-.endm
			
 
				-
			
 
				-########################################################################
			
 
				-
			
 
				-.macro STRUCT p1
			
 
				-START_FIELDS
			
 
				-.struc \p1
			
 
				-.endm
			
 
				-
			
 
				-.macro ENDSTRUCT
			
 
				- tmp = _FIELD_OFFSET
			
 
				- END_FIELDS
			
 
				- tmp = (_FIELD_OFFSET - %%tmp)
			
 
				-.if (tmp > 0)
			
 
				-	.lcomm	tmp
			
 
				-.endif
			
 
				-.endstruc
			
 
				-.endm
			
 
				-
			
 
				-## RES_int name size align
			
 
				-.macro RES_int p1 p2 p3
			
 
				- name = \p1
			
 
				- size = \p2
			
 
				- align = .\p3
			
 
				-
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
			
 
				-.align align
			
 
				-.lcomm name size
			
 
				- _FIELD_OFFSET = _FIELD_OFFSET + (size)
			
 
				-.if (align > _STRUCT_ALIGN)
			
 
				- _STRUCT_ALIGN = align
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-
			
 
				-
			
 
				-# macro RES_B name, size [, align]
			
 
				-.macro RES_B _name, _size, _align=1
			
 
				-RES_int _name _size _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_W name, size [, align]
			
 
				-.macro RES_W _name, _size, _align=2
			
 
				-RES_int _name 2*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_D name, size [, align]
			
 
				-.macro RES_D _name, _size, _align=4
			
 
				-RES_int _name 4*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Q name, size [, align]
			
 
				-.macro RES_Q _name, _size, _align=8
			
 
				-RES_int _name 8*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_DQ name, size [, align]
			
 
				-.macro RES_DQ _name, _size, _align=16
			
 
				-RES_int _name 16*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Y name, size [, align]
			
 
				-.macro RES_Y _name, _size, _align=32
			
 
				-RES_int _name 32*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Z name, size [, align]
			
 
				-.macro RES_Z _name, _size, _align=64
			
 
				-RES_int _name 64*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define constants
			
 
				-########################################################################
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define SHA1 Out Of Order Data Structures
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # LANE_DATA
			
 
				-###     name            size    align
			
 
				-FIELD   _job_in_lane,   8,      8       # pointer to job object
			
 
				-END_FIELDS
			
 
				-
			
 
				-_LANE_DATA_size = _FIELD_OFFSET
			
 
				-_LANE_DATA_align = _STRUCT_ALIGN
			
 
				-
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # SHA1_ARGS_X8
			
 
				-###     name            size    align
			
 
				-FIELD   _digest,        4*5*8,  16      # transposed digest
			
 
				-FIELD   _data_ptr,      8*8,    8       # array of pointers to data
			
 
				-END_FIELDS
			
 
				-
			
 
				-_SHA1_ARGS_X4_size =     _FIELD_OFFSET
			
 
				-_SHA1_ARGS_X4_align =    _STRUCT_ALIGN
			
 
				-_SHA1_ARGS_X8_size =     _FIELD_OFFSET
			
 
				-_SHA1_ARGS_X8_align =    _STRUCT_ALIGN
			
 
				-
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # MB_MGR
			
 
				-###     name            size    align
			
 
				-FIELD   _args,          _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
			
 
				-FIELD   _lens,          4*8,    8
			
 
				-FIELD   _unused_lanes,  8,      8
			
 
				-FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
			
 
				-END_FIELDS
			
 
				-
			
 
				-_MB_MGR_size =   _FIELD_OFFSET
			
 
				-_MB_MGR_align =  _STRUCT_ALIGN
			
 
				-
			
 
				-_args_digest    =     _args + _digest
			
 
				-_args_data_ptr  =     _args + _data_ptr
			
 
				-
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define constants
			
 
				-########################################################################
			
 
				-
			
 
				-#define STS_UNKNOWN             0
			
 
				-#define STS_BEING_PROCESSED     1
			
 
				-#define STS_COMPLETED           2
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define JOB_SHA1 structure
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # JOB_SHA1
			
 
				-
			
 
				-###     name                            size    align
			
 
				-FIELD   _buffer,                        8,      8       # pointer to buffer
			
 
				-FIELD   _len,                           4,      4       # length in bytes
			
 
				-FIELD   _result_digest,                 5*4,    32      # Digest (output)
			
 
				-FIELD   _status,                        4,      4
			
 
				-FIELD   _user_data,                     8,      8
			
 
				-END_FIELDS
			
 
				-
			
 
				-_JOB_SHA1_size =  _FIELD_OFFSET
			
 
				-_JOB_SHA1_align = _STRUCT_ALIGN
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
@@ -1,304 +0,0 @@
 
				-/*
			
 
				- * Flush routine for SHA1 multibuffer
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      James Guilford <james.guilford@intel.com>
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/frame.h>
			
 
				-#include "sha1_mb_mgr_datastruct.S"
			
 
				-
			
 
				-
			
 
				-.extern sha1_x8_avx2
			
 
				-
			
 
				-# LINUX register definitions
			
 
				-#define arg1    %rdi
			
 
				-#define arg2    %rsi
			
 
				-
			
 
				-# Common definitions
			
 
				-#define state   arg1
			
 
				-#define job     arg2
			
 
				-#define len2    arg2
			
 
				-
			
 
				-# idx must be a register not clobbered by sha1_x8_avx2
			
 
				-#define idx		%r8
			
 
				-#define DWORD_idx	%r8d
			
 
				-
			
 
				-#define unused_lanes    %rbx
			
 
				-#define lane_data       %rbx
			
 
				-#define tmp2            %rbx
			
 
				-#define tmp2_w		%ebx
			
 
				-
			
 
				-#define job_rax         %rax
			
 
				-#define tmp1            %rax
			
 
				-#define size_offset     %rax
			
 
				-#define tmp             %rax
			
 
				-#define start_offset    %rax
			
 
				-
			
 
				-#define tmp3            %arg1
			
 
				-
			
 
				-#define extra_blocks    %arg2
			
 
				-#define p               %arg2
			
 
				-
			
 
				-.macro LABEL prefix n
			
 
				-\prefix\n\():
			
 
				-.endm
			
 
				-
			
 
				-.macro JNE_SKIP i
			
 
				-jne     skip_\i
			
 
				-.endm
			
 
				-
			
 
				-.altmacro
			
 
				-.macro SET_OFFSET _offset
			
 
				-offset = \_offset
			
 
				-.endm
			
 
				-.noaltmacro
			
 
				-
			
 
				-# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
			
 
				-# arg 1 : rcx : state
			
 
				-ENTRY(sha1_mb_mgr_flush_avx2)
			
 
				-	FRAME_BEGIN
			
 
				-	push	%rbx
			
 
				-
			
 
				-	# If bit (32+3) is set, then all lanes are empty
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	bt      $32+3, unused_lanes
			
 
				-	jc      return_null
			
 
				-
			
 
				-	# find a lane with a non-null job
			
 
				-	xor     idx, idx
			
 
				-	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  one(%rip), idx
			
 
				-	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  two(%rip), idx
			
 
				-	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  three(%rip), idx
			
 
				-	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  four(%rip), idx
			
 
				-	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  five(%rip), idx
			
 
				-	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  six(%rip), idx
			
 
				-	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-	cmovne  seven(%rip), idx
			
 
				-
			
 
				-	# copy idx to empty lanes
			
 
				-copy_lane_data:
			
 
				-	offset =  (_args + _data_ptr)
			
 
				-	mov     offset(state,idx,8), tmp
			
 
				-
			
 
				-	I = 0
			
 
				-.rep 8
			
 
				-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq    $0, offset(state)
			
 
				-.altmacro
			
 
				-	JNE_SKIP %I
			
 
				-	offset =  (_args + _data_ptr + 8*I)
			
 
				-	mov     tmp, offset(state)
			
 
				-	offset =  (_lens + 4*I)
			
 
				-	movl    $0xFFFFFFFF, offset(state)
			
 
				-LABEL skip_ %I
			
 
				-	I = (I+1)
			
 
				-.noaltmacro
			
 
				-.endr
			
 
				-
			
 
				-	# Find min length
			
 
				-	vmovdqu _lens+0*16(state), %xmm0
			
 
				-	vmovdqu _lens+1*16(state), %xmm1
			
 
				-
			
 
				-	vpminud %xmm1, %xmm0, %xmm2     # xmm2 has {D,C,B,A}
			
 
				-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
			
 
				-	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
			
 
				-
			
 
				-	vmovd   %xmm2, DWORD_idx
			
 
				-	mov	idx, len2
			
 
				-	and	$0xF, idx
			
 
				-	shr	$4, len2
			
 
				-	jz	len_is_0
			
 
				-
			
 
				-	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
			
 
				-	vpshufd $0, %xmm2, %xmm2
			
 
				-
			
 
				-	vpsubd  %xmm2, %xmm0, %xmm0
			
 
				-	vpsubd  %xmm2, %xmm1, %xmm1
			
 
				-
			
 
				-	vmovdqu %xmm0, _lens+0*16(state)
			
 
				-	vmovdqu %xmm1, _lens+1*16(state)
			
 
				-
			
 
				-	# "state" and "args" are the same address, arg1
			
 
				-	# len is arg2
			
 
				-	call	sha1_x8_avx2
			
 
				-	# state and idx are intact
			
 
				-
			
 
				-
			
 
				-len_is_0:
			
 
				-	# process completed job "idx"
			
 
				-	imul    $_LANE_DATA_size, idx, lane_data
			
 
				-	lea     _ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov     _job_in_lane(lane_data), job_rax
			
 
				-	movq    $0, _job_in_lane(lane_data)
			
 
				-	movl    $STS_COMPLETED, _status(job_rax)
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	shl     $4, unused_lanes
			
 
				-	or      idx, unused_lanes
			
 
				-	mov     unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl	$0xFFFFFFFF, _lens(state, idx, 4)
			
 
				-
			
 
				-	vmovd    _args_digest(state , idx, 4) , %xmm0
			
 
				-	vpinsrd  $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd  $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd  $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	movl    _args_digest+4*32(state, idx, 4), tmp2_w
			
 
				-
			
 
				-	vmovdqu  %xmm0, _result_digest(job_rax)
			
 
				-	offset =  (_result_digest + 1*16)
			
 
				-	mov     tmp2_w, offset(job_rax)
			
 
				-
			
 
				-return:
			
 
				-	pop	%rbx
			
 
				-	FRAME_END
			
 
				-	ret
			
 
				-
			
 
				-return_null:
			
 
				-	xor     job_rax, job_rax
			
 
				-	jmp     return
			
 
				-ENDPROC(sha1_mb_mgr_flush_avx2)
			
 
				-
			
 
				-
			
 
				-#################################################################
			
 
				-
			
 
				-.align 16
			
 
				-ENTRY(sha1_mb_mgr_get_comp_job_avx2)
			
 
				-	push    %rbx
			
 
				-
			
 
				-	## if bit 32+3 is set, then all lanes are empty
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	bt      $(32+3), unused_lanes
			
 
				-	jc      .return_null
			
 
				-
			
 
				-	# Find min length
			
 
				-	vmovdqu _lens(state), %xmm0
			
 
				-	vmovdqu _lens+1*16(state), %xmm1
			
 
				-
			
 
				-	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
			
 
				-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
			
 
				-	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
			
 
				-
			
 
				-	vmovd   %xmm2, DWORD_idx
			
 
				-	test    $~0xF, idx
			
 
				-	jnz     .return_null
			
 
				-
			
 
				-	# process completed job "idx"
			
 
				-	imul    $_LANE_DATA_size, idx, lane_data
			
 
				-	lea     _ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov     _job_in_lane(lane_data), job_rax
			
 
				-	movq    $0,  _job_in_lane(lane_data)
			
 
				-	movl    $STS_COMPLETED, _status(job_rax)
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	shl     $4, unused_lanes
			
 
				-	or      idx, unused_lanes
			
 
				-	mov     unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl    $0xFFFFFFFF, _lens(state,  idx, 4)
			
 
				-
			
 
				-	vmovd   _args_digest(state, idx, 4), %xmm0
			
 
				-	vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	movl    _args_digest+4*32(state, idx, 4), tmp2_w
			
 
				-
			
 
				-	vmovdqu %xmm0, _result_digest(job_rax)
			
 
				-	movl    tmp2_w, _result_digest+1*16(job_rax)
			
 
				-
			
 
				-	pop     %rbx
			
 
				-
			
 
				-	ret
			
 
				-
			
 
				-.return_null:
			
 
				-	xor     job_rax, job_rax
			
 
				-	pop     %rbx
			
 
				-	ret
			
 
				-ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
			
 
				-
			
 
				-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
			
 
				-.align 16
			
 
				-clear_low_nibble:
			
 
				-.octa	0x000000000000000000000000FFFFFFF0
			
 
				-
			
 
				-.section	.rodata.cst8, "aM", @progbits, 8
			
 
				-.align 8
			
 
				-one:
			
 
				-.quad  1
			
 
				-two:
			
 
				-.quad  2
			
 
				-three:
			
 
				-.quad  3
			
 
				-four:
			
 
				-.quad  4
			
 
				-five:
			
 
				-.quad  5
			
 
				-six:
			
 
				-.quad  6
			
 
				-seven:
			
 
				-.quad  7
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
@@ -1,64 +0,0 @@
 
				-/*
			
 
				- * Initialization code for multi buffer SHA1 algorithm for AVX2
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "sha1_mb_mgr.h"
			
 
				-
			
 
				-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
			
 
				-{
			
 
				-	unsigned int j;
			
 
				-	state->unused_lanes = 0xF76543210ULL;
			
 
				-	for (j = 0; j < 8; j++) {
			
 
				-		state->lens[j] = 0xFFFFFFFF;
			
 
				-		state->ldata[j].job_in_lane = NULL;
			
 
				-	}
			
 
				-}
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
@@ -1,209 +0,0 @@
 
				-/*
			
 
				- * Buffer submit code for multi buffer SHA1 algorithm
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      James Guilford <james.guilford@intel.com>
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/frame.h>
			
 
				-#include "sha1_mb_mgr_datastruct.S"
			
 
				-
			
 
				-
			
 
				-.extern sha1_x8_avx
			
 
				-
			
 
				-# LINUX register definitions
			
 
				-arg1    = %rdi
			
 
				-arg2    = %rsi
			
 
				-size_offset	= %rcx
			
 
				-tmp2		= %rcx
			
 
				-extra_blocks	= %rdx
			
 
				-
			
 
				-# Common definitions
			
 
				-#define state   arg1
			
 
				-#define job     %rsi
			
 
				-#define len2    arg2
			
 
				-#define p2      arg2
			
 
				-
			
 
				-# idx must be a register not clobberred by sha1_x8_avx2
			
 
				-idx		= %r8
			
 
				-DWORD_idx	= %r8d
			
 
				-last_len	= %r8
			
 
				-
			
 
				-p               = %r11
			
 
				-start_offset    = %r11
			
 
				-
			
 
				-unused_lanes    = %rbx
			
 
				-BYTE_unused_lanes = %bl
			
 
				-
			
 
				-job_rax         = %rax
			
 
				-len             = %rax
			
 
				-DWORD_len	= %eax
			
 
				-
			
 
				-lane            = %r12
			
 
				-tmp3            = %r12
			
 
				-
			
 
				-tmp             = %r9
			
 
				-DWORD_tmp	= %r9d
			
 
				-
			
 
				-lane_data       = %r10
			
 
				-
			
 
				-# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
			
 
				-# arg 1 : rcx : state
			
 
				-# arg 2 : rdx : job
			
 
				-ENTRY(sha1_mb_mgr_submit_avx2)
			
 
				-	FRAME_BEGIN
			
 
				-	push	%rbx
			
 
				-	push	%r12
			
 
				-
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	mov	unused_lanes, lane
			
 
				-	and	$0xF, lane
			
 
				-	shr     $4, unused_lanes
			
 
				-	imul    $_LANE_DATA_size, lane, lane_data
			
 
				-	movl    $STS_BEING_PROCESSED, _status(job)
			
 
				-	lea     _ldata(state, lane_data), lane_data
			
 
				-	mov     unused_lanes, _unused_lanes(state)
			
 
				-	movl    _len(job),  DWORD_len
			
 
				-
			
 
				-	mov	job, _job_in_lane(lane_data)
			
 
				-	shl	$4, len
			
 
				-	or	lane, len
			
 
				-
			
 
				-	movl    DWORD_len,  _lens(state , lane, 4)
			
 
				-
			
 
				-	# Load digest words from result_digest
			
 
				-	vmovdqu	_result_digest(job), %xmm0
			
 
				-	mov	_result_digest+1*16(job), DWORD_tmp
			
 
				-	vmovd    %xmm0, _args_digest(state, lane, 4)
			
 
				-	vpextrd  $1, %xmm0, _args_digest+1*32(state , lane, 4)
			
 
				-	vpextrd  $2, %xmm0, _args_digest+2*32(state , lane, 4)
			
 
				-	vpextrd  $3, %xmm0, _args_digest+3*32(state , lane, 4)
			
 
				-	movl    DWORD_tmp, _args_digest+4*32(state , lane, 4)
			
 
				-
			
 
				-	mov     _buffer(job), p
			
 
				-	mov     p, _args_data_ptr(state, lane, 8)
			
 
				-
			
 
				-	cmp     $0xF, unused_lanes
			
 
				-	jne     return_null
			
 
				-
			
 
				-start_loop:
			
 
				-	# Find min length
			
 
				-	vmovdqa _lens(state), %xmm0
			
 
				-	vmovdqa _lens+1*16(state), %xmm1
			
 
				-
			
 
				-	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
			
 
				-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
			
 
				-	vpalignr $4, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,x,E}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
			
 
				-
			
 
				-	vmovd   %xmm2, DWORD_idx
			
 
				-	mov    idx, len2
			
 
				-	and    $0xF, idx
			
 
				-	shr    $4, len2
			
 
				-	jz     len_is_0
			
 
				-
			
 
				-	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
			
 
				-	vpshufd $0, %xmm2, %xmm2
			
 
				-
			
 
				-	vpsubd  %xmm2, %xmm0, %xmm0
			
 
				-	vpsubd  %xmm2, %xmm1, %xmm1
			
 
				-
			
 
				-	vmovdqa %xmm0, _lens + 0*16(state)
			
 
				-	vmovdqa %xmm1, _lens + 1*16(state)
			
 
				-
			
 
				-
			
 
				-	# "state" and "args" are the same address, arg1
			
 
				-	# len is arg2
			
 
				-	call    sha1_x8_avx2
			
 
				-
			
 
				-	# state and idx are intact
			
 
				-
			
 
				-len_is_0:
			
 
				-	# process completed job "idx"
			
 
				-	imul    $_LANE_DATA_size, idx, lane_data
			
 
				-	lea     _ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov     _job_in_lane(lane_data), job_rax
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	movq    $0, _job_in_lane(lane_data)
			
 
				-	movl    $STS_COMPLETED, _status(job_rax)
			
 
				-	shl     $4, unused_lanes
			
 
				-	or      idx, unused_lanes
			
 
				-	mov     unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl	$0xFFFFFFFF, _lens(state, idx, 4)
			
 
				-
			
 
				-	vmovd    _args_digest(state, idx, 4), %xmm0
			
 
				-	vpinsrd  $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd  $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd  $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
			
 
				-	movl     _args_digest+4*32(state, idx, 4), DWORD_tmp
			
 
				-
			
 
				-	vmovdqu  %xmm0, _result_digest(job_rax)
			
 
				-	movl    DWORD_tmp, _result_digest+1*16(job_rax)
			
 
				-
			
 
				-return:
			
 
				-	pop	%r12
			
 
				-	pop	%rbx
			
 
				-	FRAME_END
			
 
				-	ret
			
 
				-
			
 
				-return_null:
			
 
				-	xor     job_rax, job_rax
			
 
				-	jmp     return
			
 
				-
			
 
				-ENDPROC(sha1_mb_mgr_submit_avx2)
			
 
				-
			
 
				-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
			
 
				-.align 16
			
 
				-clear_low_nibble:
			
 
				-	.octa	0x000000000000000000000000FFFFFFF0
			
--- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
@@ -1,492 +0,0 @@
 
				-/*
			
 
				- * Multi-buffer SHA1 algorithm hash compute routine
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      James Guilford <james.guilford@intel.com>
			
 
				- *	Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2014 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include "sha1_mb_mgr_datastruct.S"
			
 
				-
			
 
				-## code to compute oct SHA1 using SSE-256
			
 
				-## outer calling routine takes care of save and restore of XMM registers
			
 
				-
			
 
				-## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15# ymm0-15
			
 
				-##
			
 
				-## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
			
 
				-## Linux preserves:                       rdi rbp r8
			
 
				-##
			
 
				-## clobbers ymm0-15
			
 
				-
			
 
				-
			
 
				-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
			
 
				-# "transpose" data in {r0...r7} using temps {t0...t1}
			
 
				-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
			
 
				-# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
			
 
				-# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
			
 
				-# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
			
 
				-# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
			
 
				-# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
			
 
				-# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
			
 
				-# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
			
 
				-# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
			
 
				-#
			
 
				-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
			
 
				-# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
			
 
				-# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
			
 
				-# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
			
 
				-# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
			
 
				-# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
			
 
				-# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
			
 
				-# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
			
 
				-# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
			
 
				-#
			
 
				-
			
 
				-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
			
 
				-	# process top half (r0..r3) {a...d}
			
 
				-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
			
 
				-	vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
			
 
				-	vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
			
 
				-	vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
			
 
				-	vshufps  $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
			
 
				-	vshufps  $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
			
 
				-	vshufps  $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
			
 
				-	vshufps  $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
			
 
				-
			
 
				-	# use r2 in place of t0
			
 
				-	# process bottom half (r4..r7) {e...h}
			
 
				-	vshufps  $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
			
 
				-	vshufps  $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
			
 
				-	vshufps  $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
			
 
				-	vshufps  $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
			
 
				-	vshufps  $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
			
 
				-	vshufps  $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
			
 
				-	vshufps  $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
			
 
				-	vshufps  $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
			
 
				-
			
 
				-	vperm2f128      $0x13, \r1, \r5, \r6  # h6...a6
			
 
				-	vperm2f128      $0x02, \r1, \r5, \r2  # h2...a2
			
 
				-	vperm2f128      $0x13, \r3, \r7, \r5  # h5...a5
			
 
				-	vperm2f128      $0x02, \r3, \r7, \r1  # h1...a1
			
 
				-	vperm2f128      $0x13, \r0, \r4, \r7  # h7...a7
			
 
				-	vperm2f128      $0x02, \r0, \r4, \r3  # h3...a3
			
 
				-	vperm2f128      $0x13, \t0, \t1, \r4  # h4...a4
			
 
				-	vperm2f128      $0x02, \t0, \t1, \r0  # h0...a0
			
 
				-
			
 
				-.endm
			
 
				-##
			
 
				-## Magic functions defined in FIPS 180-1
			
 
				-##
			
 
				-# macro MAGIC_F0 F,B,C,D,T   ## F = (D ^ (B & (C ^ D)))
			
 
				-.macro MAGIC_F0 regF regB regC regD regT
			
 
				-    vpxor \regD, \regC, \regF
			
 
				-    vpand \regB, \regF, \regF
			
 
				-    vpxor \regD, \regF, \regF
			
 
				-.endm
			
 
				-
			
 
				-# macro MAGIC_F1 F,B,C,D,T   ## F = (B ^ C ^ D)
			
 
				-.macro MAGIC_F1 regF regB regC regD regT
			
 
				-    vpxor  \regC, \regD, \regF
			
 
				-    vpxor  \regB, \regF, \regF
			
 
				-.endm
			
 
				-
			
 
				-# macro MAGIC_F2 F,B,C,D,T   ## F = ((B & C) | (B & D) | (C & D))
			
 
				-.macro MAGIC_F2 regF regB regC regD regT
			
 
				-    vpor  \regC, \regB, \regF
			
 
				-    vpand \regC, \regB, \regT
			
 
				-    vpand \regD, \regF, \regF
			
 
				-    vpor  \regT, \regF, \regF
			
 
				-.endm
			
 
				-
			
 
				-# macro MAGIC_F3 F,B,C,D,T   ## F = (B ^ C ^ D)
			
 
				-.macro MAGIC_F3 regF regB regC regD regT
			
 
				-    MAGIC_F1 \regF,\regB,\regC,\regD,\regT
			
 
				-.endm
			
 
				-
			
 
				-# PROLD reg, imm, tmp
			
 
				-.macro PROLD reg imm tmp
			
 
				-	vpsrld  $(32-\imm), \reg, \tmp
			
 
				-	vpslld  $\imm, \reg, \reg
			
 
				-	vpor    \tmp, \reg, \reg
			
 
				-.endm
			
 
				-
			
 
				-.macro PROLD_nd reg imm tmp src
			
 
				-	vpsrld  $(32-\imm), \src, \tmp
			
 
				-	vpslld  $\imm, \src, \reg
			
 
				-	vpor	\tmp, \reg, \reg
			
 
				-.endm
			
 
				-
			
 
				-.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
			
 
				-	vpaddd	\immCNT, \regE, \regE
			
 
				-	vpaddd	\memW*32(%rsp), \regE, \regE
			
 
				-	PROLD_nd \regT, 5, \regF, \regA
			
 
				-	vpaddd	\regT, \regE, \regE
			
 
				-	\MAGIC  \regF, \regB, \regC, \regD, \regT
			
 
				-        PROLD   \regB, 30, \regT
			
 
				-        vpaddd  \regF, \regE, \regE
			
 
				-.endm
			
 
				-
			
 
				-.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
			
 
				-	vpaddd	\immCNT, \regE, \regE
			
 
				-	offset = ((\memW - 14) & 15) * 32
			
 
				-	vmovdqu offset(%rsp), W14
			
 
				-	vpxor	W14, W16, W16
			
 
				-	offset = ((\memW -  8) & 15) * 32
			
 
				-	vpxor	offset(%rsp), W16, W16
			
 
				-	offset = ((\memW -  3) & 15) * 32
			
 
				-	vpxor	offset(%rsp), W16, W16
			
 
				-	vpsrld	$(32-1), W16, \regF
			
 
				-	vpslld	$1, W16, W16
			
 
				-	vpor	W16, \regF, \regF
			
 
				-
			
 
				-	ROTATE_W
			
 
				-
			
 
				-	offset = ((\memW - 0) & 15) * 32
			
 
				-	vmovdqu	\regF, offset(%rsp)
			
 
				-	vpaddd	\regF, \regE, \regE
			
 
				-	PROLD_nd \regT, 5, \regF, \regA
			
 
				-	vpaddd	\regT, \regE, \regE
			
 
				-	\MAGIC \regF,\regB,\regC,\regD,\regT      ## FUN  = MAGIC_Fi(B,C,D)
			
 
				-	PROLD   \regB,30, \regT
			
 
				-	vpaddd  \regF, \regE, \regE
			
 
				-.endm
			
 
				-
			
 
				-########################################################################
			
 
				-########################################################################
			
 
				-########################################################################
			
 
				-
			
 
				-## FRAMESZ plus pushes must be an odd multiple of 8
			
 
				-YMM_SAVE = (15-15)*32
			
 
				-FRAMESZ = 32*16 + YMM_SAVE
			
 
				-_YMM  =   FRAMESZ - YMM_SAVE
			
 
				-
			
 
				-#define VMOVPS   vmovups
			
 
				-
			
 
				-IDX  = %rax
			
 
				-inp0 = %r9
			
 
				-inp1 = %r10
			
 
				-inp2 = %r11
			
 
				-inp3 = %r12
			
 
				-inp4 = %r13
			
 
				-inp5 = %r14
			
 
				-inp6 = %r15
			
 
				-inp7 = %rcx
			
 
				-arg1 = %rdi
			
 
				-arg2 = %rsi
			
 
				-RSP_SAVE = %rdx
			
 
				-
			
 
				-# ymm0 A
			
 
				-# ymm1 B
			
 
				-# ymm2 C
			
 
				-# ymm3 D
			
 
				-# ymm4 E
			
 
				-# ymm5         F       AA
			
 
				-# ymm6         T0      BB
			
 
				-# ymm7         T1      CC
			
 
				-# ymm8         T2      DD
			
 
				-# ymm9         T3      EE
			
 
				-# ymm10                T4      TMP
			
 
				-# ymm11                T5      FUN
			
 
				-# ymm12                T6      K
			
 
				-# ymm13                T7      W14
			
 
				-# ymm14                T8      W15
			
 
				-# ymm15                T9      W16
			
 
				-
			
 
				-
			
 
				-A  =     %ymm0
			
 
				-B  =     %ymm1
			
 
				-C  =     %ymm2
			
 
				-D  =     %ymm3
			
 
				-E  =     %ymm4
			
 
				-F  =     %ymm5
			
 
				-T0 =	 %ymm6
			
 
				-T1 =     %ymm7
			
 
				-T2 =     %ymm8
			
 
				-T3 =     %ymm9
			
 
				-T4 =     %ymm10
			
 
				-T5 =     %ymm11
			
 
				-T6 =     %ymm12
			
 
				-T7 =     %ymm13
			
 
				-T8  =     %ymm14
			
 
				-T9  =     %ymm15
			
 
				-
			
 
				-AA  =     %ymm5
			
 
				-BB  =     %ymm6
			
 
				-CC  =     %ymm7
			
 
				-DD  =     %ymm8
			
 
				-EE  =     %ymm9
			
 
				-TMP =     %ymm10
			
 
				-FUN =     %ymm11
			
 
				-K   =     %ymm12
			
 
				-W14 =     %ymm13
			
 
				-W15 =     %ymm14
			
 
				-W16 =     %ymm15
			
 
				-
			
 
				-.macro ROTATE_ARGS
			
 
				- TMP_ = E
			
 
				- E = D
			
 
				- D = C
			
 
				- C = B
			
 
				- B = A
			
 
				- A = TMP_
			
 
				-.endm
			
 
				-
			
 
				-.macro ROTATE_W
			
 
				-TMP_  = W16
			
 
				-W16  = W15
			
 
				-W15  = W14
			
 
				-W14  = TMP_
			
 
				-.endm
			
 
				-
			
 
				-# 8 streams x 5 32bit words per digest x 4 bytes per word
			
 
				-#define DIGEST_SIZE (8*5*4)
			
 
				-
			
 
				-.align 32
			
 
				-
			
 
				-# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
			
 
				-# arg 1 : pointer to array[4] of pointer to input data
			
 
				-# arg 2 : size (in blocks) ;; assumed to be >= 1
			
 
				-#
			
 
				-ENTRY(sha1_x8_avx2)
			
 
				-
			
 
				-	# save callee-saved clobbered registers to comply with C function ABI
			
 
				-	push	%r12
			
 
				-	push	%r13
			
 
				-	push	%r14
			
 
				-	push	%r15
			
 
				-
			
 
				-	#save rsp
			
 
				-	mov	%rsp, RSP_SAVE
			
 
				-	sub     $FRAMESZ, %rsp
			
 
				-
			
 
				-	#align rsp to 32 Bytes
			
 
				-	and	$~0x1F, %rsp
			
 
				-
			
 
				-	## Initialize digests
			
 
				-	vmovdqu  0*32(arg1), A
			
 
				-	vmovdqu  1*32(arg1), B
			
 
				-	vmovdqu  2*32(arg1), C
			
 
				-	vmovdqu  3*32(arg1), D
			
 
				-	vmovdqu  4*32(arg1), E
			
 
				-
			
 
				-	## transpose input onto stack
			
 
				-	mov     _data_ptr+0*8(arg1),inp0
			
 
				-	mov     _data_ptr+1*8(arg1),inp1
			
 
				-	mov     _data_ptr+2*8(arg1),inp2
			
 
				-	mov     _data_ptr+3*8(arg1),inp3
			
 
				-	mov     _data_ptr+4*8(arg1),inp4
			
 
				-	mov     _data_ptr+5*8(arg1),inp5
			
 
				-	mov     _data_ptr+6*8(arg1),inp6
			
 
				-	mov     _data_ptr+7*8(arg1),inp7
			
 
				-
			
 
				-	xor     IDX, IDX
			
 
				-lloop:
			
 
				-	vmovdqu  PSHUFFLE_BYTE_FLIP_MASK(%rip), F
			
 
				-	I=0
			
 
				-.rep 2
			
 
				-	VMOVPS   (inp0, IDX), T0
			
 
				-	VMOVPS   (inp1, IDX), T1
			
 
				-	VMOVPS   (inp2, IDX), T2
			
 
				-	VMOVPS   (inp3, IDX), T3
			
 
				-	VMOVPS   (inp4, IDX), T4
			
 
				-	VMOVPS   (inp5, IDX), T5
			
 
				-	VMOVPS   (inp6, IDX), T6
			
 
				-	VMOVPS   (inp7, IDX), T7
			
 
				-
			
 
				-	TRANSPOSE8       T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
			
 
				-	vpshufb  F, T0, T0
			
 
				-	vmovdqu  T0, (I*8)*32(%rsp)
			
 
				-	vpshufb  F, T1, T1
			
 
				-	vmovdqu  T1, (I*8+1)*32(%rsp)
			
 
				-	vpshufb  F, T2, T2
			
 
				-	vmovdqu  T2, (I*8+2)*32(%rsp)
			
 
				-	vpshufb  F, T3, T3
			
 
				-	vmovdqu  T3, (I*8+3)*32(%rsp)
			
 
				-	vpshufb  F, T4, T4
			
 
				-	vmovdqu  T4, (I*8+4)*32(%rsp)
			
 
				-	vpshufb  F, T5, T5
			
 
				-	vmovdqu  T5, (I*8+5)*32(%rsp)
			
 
				-	vpshufb  F, T6, T6
			
 
				-	vmovdqu  T6, (I*8+6)*32(%rsp)
			
 
				-	vpshufb  F, T7, T7
			
 
				-	vmovdqu  T7, (I*8+7)*32(%rsp)
			
 
				-	add     $32, IDX
			
 
				-	I = (I+1)
			
 
				-.endr
			
 
				-	# save old digests
			
 
				-	vmovdqu  A,AA
			
 
				-	vmovdqu  B,BB
			
 
				-	vmovdqu  C,CC
			
 
				-	vmovdqu  D,DD
			
 
				-	vmovdqu  E,EE
			
 
				-
			
 
				-##
			
 
				-## perform 0-79 steps
			
 
				-##
			
 
				-	vmovdqu  K00_19(%rip), K
			
 
				-## do rounds 0...15
			
 
				-	I = 0
			
 
				-.rep 16
			
 
				-	SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
			
 
				-	ROTATE_ARGS
			
 
				-	I = (I+1)
			
 
				-.endr
			
 
				-
			
 
				-## do rounds 16...19
			
 
				-	vmovdqu  ((16 - 16) & 15) * 32 (%rsp), W16
			
 
				-	vmovdqu  ((16 - 15) & 15) * 32 (%rsp), W15
			
 
				-.rep 4
			
 
				-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
			
 
				-	ROTATE_ARGS
			
 
				-	I = (I+1)
			
 
				-.endr
			
 
				-
			
 
				-## do rounds 20...39
			
 
				-	vmovdqu  K20_39(%rip), K
			
 
				-.rep 20
			
 
				-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
			
 
				-	ROTATE_ARGS
			
 
				-	I = (I+1)
			
 
				-.endr
			
 
				-
			
 
				-## do rounds 40...59
			
 
				-	vmovdqu  K40_59(%rip), K
			
 
				-.rep 20
			
 
				-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
			
 
				-	ROTATE_ARGS
			
 
				-	I = (I+1)
			
 
				-.endr
			
 
				-
			
 
				-## do rounds 60...79
			
 
				-	vmovdqu  K60_79(%rip), K
			
 
				-.rep 20
			
 
				-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
			
 
				-	ROTATE_ARGS
			
 
				-	I = (I+1)
			
 
				-.endr
			
 
				-
			
 
				-	vpaddd   AA,A,A
			
 
				-	vpaddd   BB,B,B
			
 
				-	vpaddd   CC,C,C
			
 
				-	vpaddd   DD,D,D
			
 
				-	vpaddd   EE,E,E
			
 
				-
			
 
				-	sub     $1, arg2
			
 
				-	jne     lloop
			
 
				-
			
 
				-	# write out digests
			
 
				-	vmovdqu  A, 0*32(arg1)
			
 
				-	vmovdqu  B, 1*32(arg1)
			
 
				-	vmovdqu  C, 2*32(arg1)
			
 
				-	vmovdqu  D, 3*32(arg1)
			
 
				-	vmovdqu  E, 4*32(arg1)
			
 
				-
			
 
				-	# update input pointers
			
 
				-	add     IDX, inp0
			
 
				-	add     IDX, inp1
			
 
				-	add     IDX, inp2
			
 
				-	add     IDX, inp3
			
 
				-	add     IDX, inp4
			
 
				-	add     IDX, inp5
			
 
				-	add     IDX, inp6
			
 
				-	add     IDX, inp7
			
 
				-	mov     inp0, _data_ptr (arg1)
			
 
				-	mov     inp1, _data_ptr + 1*8(arg1)
			
 
				-	mov     inp2, _data_ptr + 2*8(arg1)
			
 
				-	mov     inp3, _data_ptr + 3*8(arg1)
			
 
				-	mov     inp4, _data_ptr + 4*8(arg1)
			
 
				-	mov     inp5, _data_ptr + 5*8(arg1)
			
 
				-	mov     inp6, _data_ptr + 6*8(arg1)
			
 
				-	mov     inp7, _data_ptr + 7*8(arg1)
			
 
				-
			
 
				-	################
			
 
				-	## Postamble
			
 
				-
			
 
				-	mov     RSP_SAVE, %rsp
			
 
				-
			
 
				-	# restore callee-saved clobbered registers
			
 
				-	pop	%r15
			
 
				-	pop	%r14
			
 
				-	pop	%r13
			
 
				-	pop	%r12
			
 
				-
			
 
				-	ret
			
 
				-ENDPROC(sha1_x8_avx2)
			
 
				-
			
 
				-
			
 
				-.section	.rodata.cst32.K00_19, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-K00_19:
			
 
				-.octa 0x5A8279995A8279995A8279995A827999
			
 
				-.octa 0x5A8279995A8279995A8279995A827999
			
 
				-
			
 
				-.section	.rodata.cst32.K20_39, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-K20_39:
			
 
				-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
			
 
				-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
			
 
				-
			
 
				-.section	.rodata.cst32.K40_59, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-K40_59:
			
 
				-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
			
 
				-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
			
 
				-
			
 
				-.section	.rodata.cst32.K60_79, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-K60_79:
			
 
				-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
			
 
				-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
			
 
				-
			
 
				-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-PSHUFFLE_BYTE_FLIP_MASK:
			
 
				-.octa 0x0c0d0e0f08090a0b0405060700010203
			
 
				-.octa 0x0c0d0e0f08090a0b0405060700010203
			
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -1,14 +0,0 @@
 
				-# SPDX-License-Identifier: GPL-2.0
			
 
				-#
			
 
				-# Arch-specific CryptoAPI modules.
			
 
				-#
			
 
				-
			
 
				-OBJECT_FILES_NON_STANDARD := y
			
 
				-
			
 
				-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
			
 
				-                                $(comma)4)$(comma)%ymm2,yes,no)
			
 
				-ifeq ($(avx2_supported),yes)
			
 
				-	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
			
 
				-	sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
			
 
				-	     sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
			
 
				-endif
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -1,1013 +0,0 @@
 
				-/*
			
 
				- * Multi buffer SHA256 algorithm Glue Code
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
			
 
				-
			
 
				-#include <crypto/internal/hash.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/cryptohash.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <crypto/scatterwalk.h>
			
 
				-#include <crypto/sha.h>
			
 
				-#include <crypto/mcryptd.h>
			
 
				-#include <crypto/crypto_wq.h>
			
 
				-#include <asm/byteorder.h>
			
 
				-#include <linux/hardirq.h>
			
 
				-#include <asm/fpu/api.h>
			
 
				-#include "sha256_mb_ctx.h"
			
 
				-
			
 
				-#define FLUSH_INTERVAL 1000 /* in usec */
			
 
				-
			
 
				-static struct mcryptd_alg_state sha256_mb_alg_state;
			
 
				-
			
 
				-struct sha256_mb_ctx {
			
 
				-	struct mcryptd_ahash *mcryptd_tfm;
			
 
				-};
			
 
				-
			
 
				-static inline struct mcryptd_hash_request_ctx
			
 
				-		*cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
			
 
				-{
			
 
				-	struct ahash_request *areq;
			
 
				-
			
 
				-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
			
 
				-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-}
			
 
				-
			
 
				-static inline struct ahash_request
			
 
				-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
			
 
				-{
			
 
				-	return container_of((void *) ctx, struct ahash_request, __ctx);
			
 
				-}
			
 
				-
			
 
				-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
			
 
				-				struct ahash_request *areq)
			
 
				-{
			
 
				-	rctx->flag = HASH_UPDATE;
			
 
				-}
			
 
				-
			
 
				-static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
			
 
				-static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
			
 
				-			(struct sha256_mb_mgr *state, struct job_sha256 *job);
			
 
				-static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
			
 
				-			(struct sha256_mb_mgr *state);
			
 
				-static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
			
 
				-			(struct sha256_mb_mgr *state);
			
 
				-
			
 
				-inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
			
 
				-			 uint64_t total_len)
			
 
				-{
			
 
				-	uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
			
 
				-
			
 
				-	memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
			
 
				-	padblock[i] = 0x80;
			
 
				-
			
 
				-	i += ((SHA256_BLOCK_SIZE - 1) &
			
 
				-	      (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
			
 
				-	     + 1 + SHA256_PADLENGTHFIELD_SIZE;
			
 
				-
			
 
				-#if SHA256_PADLENGTHFIELD_SIZE == 16
			
 
				-	*((uint64_t *) &padblock[i - 16]) = 0;
			
 
				-#endif
			
 
				-
			
 
				-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
			
 
				-
			
 
				-	/* Number of extra blocks to hash */
			
 
				-	return i >> SHA256_LOG2_BLOCK_SIZE;
			
 
				-}
			
 
				-
			
 
				-static struct sha256_hash_ctx
			
 
				-		*sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
			
 
				-					struct sha256_hash_ctx *ctx)
			
 
				-{
			
 
				-	while (ctx) {
			
 
				-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
			
 
				-			/* Clear PROCESSING bit */
			
 
				-			ctx->status = HASH_CTX_STS_COMPLETE;
			
 
				-			return ctx;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra blocks are empty, begin hashing what remains
			
 
				-		 * in the user's buffer.
			
 
				-		 */
			
 
				-		if (ctx->partial_block_buffer_length == 0 &&
			
 
				-		    ctx->incoming_buffer_length) {
			
 
				-
			
 
				-			const void *buffer = ctx->incoming_buffer;
			
 
				-			uint32_t len = ctx->incoming_buffer_length;
			
 
				-			uint32_t copy_len;
			
 
				-
			
 
				-			/*
			
 
				-			 * Only entire blocks can be hashed.
			
 
				-			 * Copy remainder to extra blocks buffer.
			
 
				-			 */
			
 
				-			copy_len = len & (SHA256_BLOCK_SIZE-1);
			
 
				-
			
 
				-			if (copy_len) {
			
 
				-				len -= copy_len;
			
 
				-				memcpy(ctx->partial_block_buffer,
			
 
				-				       ((const char *) buffer + len),
			
 
				-				       copy_len);
			
 
				-				ctx->partial_block_buffer_length = copy_len;
			
 
				-			}
			
 
				-
			
 
				-			ctx->incoming_buffer_length = 0;
			
 
				-
			
 
				-			/* len should be a multiple of the block size now */
			
 
				-			assert((len % SHA256_BLOCK_SIZE) == 0);
			
 
				-
			
 
				-			/* Set len to the number of blocks to be hashed */
			
 
				-			len >>= SHA256_LOG2_BLOCK_SIZE;
			
 
				-
			
 
				-			if (len) {
			
 
				-
			
 
				-				ctx->job.buffer = (uint8_t *) buffer;
			
 
				-				ctx->job.len = len;
			
 
				-				ctx = (struct sha256_hash_ctx *)
			
 
				-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-				continue;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra blocks are not empty, then we are
			
 
				-		 * either on the last block(s) or we need more
			
 
				-		 * user input before continuing.
			
 
				-		 */
			
 
				-		if (ctx->status & HASH_CTX_STS_LAST) {
			
 
				-
			
 
				-			uint8_t *buf = ctx->partial_block_buffer;
			
 
				-			uint32_t n_extra_blocks =
			
 
				-				sha256_pad(buf, ctx->total_length);
			
 
				-
			
 
				-			ctx->status = (HASH_CTX_STS_PROCESSING |
			
 
				-				       HASH_CTX_STS_COMPLETE);
			
 
				-			ctx->job.buffer = buf;
			
 
				-			ctx->job.len = (uint32_t) n_extra_blocks;
			
 
				-			ctx = (struct sha256_hash_ctx *)
			
 
				-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		ctx->status = HASH_CTX_STS_IDLE;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct sha256_hash_ctx
			
 
				-		*sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
			
 
				-{
			
 
				-	/*
			
 
				-	 * If get_comp_job returns NULL, there are no jobs complete.
			
 
				-	 * If get_comp_job returns a job, verify that it is safe to return to
			
 
				-	 * the user. If it is not ready, resubmit the job to finish processing.
			
 
				-	 * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
			
 
				-	 * returned. Otherwise, all jobs currently being managed by the
			
 
				-	 * hash_ctx_mgr still need processing.
			
 
				-	 */
			
 
				-	struct sha256_hash_ctx *ctx;
			
 
				-
			
 
				-	ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
			
 
				-	return sha256_ctx_mgr_resubmit(mgr, ctx);
			
 
				-}
			
 
				-
			
 
				-static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
			
 
				-{
			
 
				-	sha256_job_mgr_init(&mgr->mgr);
			
 
				-}
			
 
				-
			
 
				-static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
			
 
				-					  struct sha256_hash_ctx *ctx,
			
 
				-					  const void *buffer,
			
 
				-					  uint32_t len,
			
 
				-					  int flags)
			
 
				-{
			
 
				-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
			
 
				-		/* User should not pass anything other than UPDATE or LAST */
			
 
				-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
			
 
				-		/* Cannot submit to a currently processing job. */
			
 
				-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
			
 
				-		/* Cannot update a finished job. */
			
 
				-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	/* If we made it here, there was no error during this call to submit */
			
 
				-	ctx->error = HASH_CTX_ERROR_NONE;
			
 
				-
			
 
				-	/* Store buffer ptr info from user */
			
 
				-	ctx->incoming_buffer = buffer;
			
 
				-	ctx->incoming_buffer_length = len;
			
 
				-
			
 
				-	/*
			
 
				-	 * Store the user's request flags and mark this ctx as currently
			
 
				-	 * being processed.
			
 
				-	 */
			
 
				-	ctx->status = (flags & HASH_LAST) ?
			
 
				-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
			
 
				-			HASH_CTX_STS_PROCESSING;
			
 
				-
			
 
				-	/* Advance byte counter */
			
 
				-	ctx->total_length += len;
			
 
				-
			
 
				-	/*
			
 
				-	 * If there is anything currently buffered in the extra blocks,
			
 
				-	 * append to it until it contains a whole block.
			
 
				-	 * Or if the user's buffer contains less than a whole block,
			
 
				-	 * append as much as possible to the extra block.
			
 
				-	 */
			
 
				-	if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
			
 
				-		/*
			
 
				-		 * Compute how many bytes to copy from user buffer into
			
 
				-		 * extra block
			
 
				-		 */
			
 
				-		uint32_t copy_len = SHA256_BLOCK_SIZE -
			
 
				-					ctx->partial_block_buffer_length;
			
 
				-		if (len < copy_len)
			
 
				-			copy_len = len;
			
 
				-
			
 
				-		if (copy_len) {
			
 
				-			/* Copy and update relevant pointers and counters */
			
 
				-			memcpy(
			
 
				-		&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
			
 
				-				buffer, copy_len);
			
 
				-
			
 
				-			ctx->partial_block_buffer_length += copy_len;
			
 
				-			ctx->incoming_buffer = (const void *)
			
 
				-					((const char *)buffer + copy_len);
			
 
				-			ctx->incoming_buffer_length = len - copy_len;
			
 
				-		}
			
 
				-
			
 
				-		/* The extra block should never contain more than 1 block */
			
 
				-		assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra block buffer contains exactly 1 block,
			
 
				-		 * it can be hashed.
			
 
				-		 */
			
 
				-		if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
			
 
				-			ctx->partial_block_buffer_length = 0;
			
 
				-
			
 
				-			ctx->job.buffer = ctx->partial_block_buffer;
			
 
				-			ctx->job.len = 1;
			
 
				-			ctx = (struct sha256_hash_ctx *)
			
 
				-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return sha256_ctx_mgr_resubmit(mgr, ctx);
			
 
				-}
			
 
				-
			
 
				-static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
			
 
				-{
			
 
				-	struct sha256_hash_ctx *ctx;
			
 
				-
			
 
				-	while (1) {
			
 
				-		ctx = (struct sha256_hash_ctx *)
			
 
				-					sha256_job_mgr_flush(&mgr->mgr);
			
 
				-
			
 
				-		/* If flush returned 0, there are no more jobs in flight. */
			
 
				-		if (!ctx)
			
 
				-			return NULL;
			
 
				-
			
 
				-		/*
			
 
				-		 * If flush returned a job, resubmit the job to finish
			
 
				-		 * processing.
			
 
				-		 */
			
 
				-		ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
			
 
				-
			
 
				-		/*
			
 
				-		 * If sha256_ctx_mgr_resubmit returned a job, it is ready to
			
 
				-		 * be returned. Otherwise, all jobs currently being managed by
			
 
				-		 * the sha256_ctx_mgr still need processing. Loop.
			
 
				-		 */
			
 
				-		if (ctx)
			
 
				-			return ctx;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_init(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	hash_ctx_init(sctx);
			
 
				-	sctx->job.result_digest[0] = SHA256_H0;
			
 
				-	sctx->job.result_digest[1] = SHA256_H1;
			
 
				-	sctx->job.result_digest[2] = SHA256_H2;
			
 
				-	sctx->job.result_digest[3] = SHA256_H3;
			
 
				-	sctx->job.result_digest[4] = SHA256_H4;
			
 
				-	sctx->job.result_digest[5] = SHA256_H5;
			
 
				-	sctx->job.result_digest[6] = SHA256_H6;
			
 
				-	sctx->job.result_digest[7] = SHA256_H7;
			
 
				-	sctx->total_length = 0;
			
 
				-	sctx->partial_block_buffer_length = 0;
			
 
				-	sctx->status = HASH_CTX_STS_IDLE;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
			
 
				-{
			
 
				-	int	i;
			
 
				-	struct	sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
			
 
				-	__be32	*dst = (__be32 *) rctx->out;
			
 
				-
			
 
				-	for (i = 0; i < 8; ++i)
			
 
				-		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
			
 
				-			struct mcryptd_alg_cstate *cstate, bool flush)
			
 
				-{
			
 
				-	int	flag = HASH_UPDATE;
			
 
				-	int	nbytes, err = 0;
			
 
				-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
			
 
				-	struct sha256_hash_ctx *sha_ctx;
			
 
				-
			
 
				-	/* more work ? */
			
 
				-	while (!(rctx->flag & HASH_DONE)) {
			
 
				-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
			
 
				-		if (nbytes < 0) {
			
 
				-			err = nbytes;
			
 
				-			goto out;
			
 
				-		}
			
 
				-		/* check if the walk is done */
			
 
				-		if (crypto_ahash_walk_last(&rctx->walk)) {
			
 
				-			rctx->flag |= HASH_DONE;
			
 
				-			if (rctx->flag & HASH_FINAL)
			
 
				-				flag |= HASH_LAST;
			
 
				-
			
 
				-		}
			
 
				-		sha_ctx = (struct sha256_hash_ctx *)
			
 
				-						ahash_request_ctx(&rctx->areq);
			
 
				-		kernel_fpu_begin();
			
 
				-		sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
			
 
				-						rctx->walk.data, nbytes, flag);
			
 
				-		if (!sha_ctx) {
			
 
				-			if (flush)
			
 
				-				sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
			
 
				-		}
			
 
				-		kernel_fpu_end();
			
 
				-		if (sha_ctx)
			
 
				-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		else {
			
 
				-			rctx = NULL;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* copy the results */
			
 
				-	if (rctx->flag & HASH_FINAL)
			
 
				-		sha256_mb_set_results(rctx);
			
 
				-
			
 
				-out:
			
 
				-	*ret_rctx = rctx;
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
			
 
				-			    struct mcryptd_alg_cstate *cstate,
			
 
				-			    int err)
			
 
				-{
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha256_hash_ctx *sha_ctx;
			
 
				-	struct mcryptd_hash_request_ctx *req_ctx;
			
 
				-	int ret;
			
 
				-
			
 
				-	/* remove from work list */
			
 
				-	spin_lock(&cstate->work_lock);
			
 
				-	list_del(&rctx->waiter);
			
 
				-	spin_unlock(&cstate->work_lock);
			
 
				-
			
 
				-	if (irqs_disabled())
			
 
				-		rctx->complete(&req->base, err);
			
 
				-	else {
			
 
				-		local_bh_disable();
			
 
				-		rctx->complete(&req->base, err);
			
 
				-		local_bh_enable();
			
 
				-	}
			
 
				-
			
 
				-	/* check to see if there are other jobs that are done */
			
 
				-	sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
			
 
				-	while (sha_ctx) {
			
 
				-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		ret = sha_finish_walk(&req_ctx, cstate, false);
			
 
				-		if (req_ctx) {
			
 
				-			spin_lock(&cstate->work_lock);
			
 
				-			list_del(&req_ctx->waiter);
			
 
				-			spin_unlock(&cstate->work_lock);
			
 
				-
			
 
				-			req = cast_mcryptd_ctx_to_req(req_ctx);
			
 
				-			if (irqs_disabled())
			
 
				-				req_ctx->complete(&req->base, ret);
			
 
				-			else {
			
 
				-				local_bh_disable();
			
 
				-				req_ctx->complete(&req->base, ret);
			
 
				-				local_bh_enable();
			
 
				-			}
			
 
				-		}
			
 
				-		sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
			
 
				-			     struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	unsigned long next_flush;
			
 
				-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
			
 
				-
			
 
				-	/* initialize tag */
			
 
				-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
			
 
				-	rctx->tag.seq_num = cstate->next_seq_num++;
			
 
				-	next_flush = rctx->tag.arrival + delay;
			
 
				-	rctx->tag.expire = next_flush;
			
 
				-
			
 
				-	spin_lock(&cstate->work_lock);
			
 
				-	list_add_tail(&rctx->waiter, &cstate->work_list);
			
 
				-	spin_unlock(&cstate->work_lock);
			
 
				-
			
 
				-	mcryptd_arm_flusher(cstate, delay);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_update(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha256_hash_ctx *sha_ctx;
			
 
				-	int ret = 0, nbytes;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
			
 
				-
			
 
				-	if (nbytes < 0) {
			
 
				-		ret = nbytes;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	if (crypto_ahash_walk_last(&rctx->walk))
			
 
				-		rctx->flag |= HASH_DONE;
			
 
				-
			
 
				-	/* submit */
			
 
				-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	sha256_mb_add_list(rctx, cstate);
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
			
 
				-							nbytes, HASH_UPDATE);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_finup(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha256_hash_ctx *sha_ctx;
			
 
				-	int ret = 0, flag = HASH_UPDATE, nbytes;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
			
 
				-
			
 
				-	if (nbytes < 0) {
			
 
				-		ret = nbytes;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	if (crypto_ahash_walk_last(&rctx->walk)) {
			
 
				-		rctx->flag |= HASH_DONE;
			
 
				-		flag = HASH_LAST;
			
 
				-	}
			
 
				-
			
 
				-	/* submit */
			
 
				-	rctx->flag |= HASH_FINAL;
			
 
				-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	sha256_mb_add_list(rctx, cstate);
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
			
 
				-								nbytes, flag);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_final(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-			container_of(areq, struct mcryptd_hash_request_ctx,
			
 
				-			areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct sha256_hash_ctx *sha_ctx;
			
 
				-	int ret = 0;
			
 
				-	u8 data;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	rctx->flag |= HASH_DONE | HASH_FINAL;
			
 
				-
			
 
				-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	/* flag HASH_FINAL and 0 data size */
			
 
				-	sha256_mb_add_list(rctx, cstate);
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
			
 
				-								HASH_LAST);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_export(struct ahash_request *areq, void *out)
			
 
				-{
			
 
				-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	memcpy(out, sctx, sizeof(*sctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_import(struct ahash_request *areq, const void *in)
			
 
				-{
			
 
				-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	memcpy(sctx, in, sizeof(*sctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct mcryptd_ahash *mcryptd_tfm;
			
 
				-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	struct mcryptd_hash_ctx *mctx;
			
 
				-
			
 
				-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
			
 
				-						CRYPTO_ALG_INTERNAL,
			
 
				-						CRYPTO_ALG_INTERNAL);
			
 
				-	if (IS_ERR(mcryptd_tfm))
			
 
				-		return PTR_ERR(mcryptd_tfm);
			
 
				-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
			
 
				-	mctx->alg_state = &sha256_mb_alg_state;
			
 
				-	ctx->mcryptd_tfm = mcryptd_tfm;
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				sizeof(struct ahash_request) +
			
 
				-				crypto_ahash_reqsize(&mcryptd_tfm->base));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	mcryptd_free_ahash(ctx->mcryptd_tfm);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				sizeof(struct ahash_request) +
			
 
				-				sizeof(struct sha256_hash_ctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	mcryptd_free_ahash(ctx->mcryptd_tfm);
			
 
				-}
			
 
				-
			
 
				-static struct ahash_alg sha256_mb_areq_alg = {
			
 
				-	.init		=	sha256_mb_init,
			
 
				-	.update		=	sha256_mb_update,
			
 
				-	.final		=	sha256_mb_final,
			
 
				-	.finup		=	sha256_mb_finup,
			
 
				-	.export		=	sha256_mb_export,
			
 
				-	.import		=	sha256_mb_import,
			
 
				-	.halg		=	{
			
 
				-	.digestsize	=	SHA256_DIGEST_SIZE,
			
 
				-	.statesize	=	sizeof(struct sha256_hash_ctx),
			
 
				-		.base		=	{
			
 
				-			.cra_name	 = "__sha256-mb",
			
 
				-			.cra_driver_name = "__intel_sha256-mb",
			
 
				-			.cra_priority	 = 100,
			
 
				-			/*
			
 
				-			 * use ASYNC flag as some buffers in multi-buffer
			
 
				-			 * algo may not have completed before hashing thread
			
 
				-			 * sleep
			
 
				-			 */
			
 
				-			.cra_flags	= CRYPTO_ALG_ASYNC |
			
 
				-					  CRYPTO_ALG_INTERNAL,
			
 
				-			.cra_blocksize	= SHA256_BLOCK_SIZE,
			
 
				-			.cra_module	= THIS_MODULE,
			
 
				-			.cra_list	= LIST_HEAD_INIT
			
 
				-					(sha256_mb_areq_alg.halg.base.cra_list),
			
 
				-			.cra_init	= sha256_mb_areq_init_tfm,
			
 
				-			.cra_exit	= sha256_mb_areq_exit_tfm,
			
 
				-			.cra_ctxsize	= sizeof(struct sha256_hash_ctx),
			
 
				-		}
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static int sha256_mb_async_init(struct ahash_request *req)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_init(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_update(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_update(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_finup(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_finup(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_final(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_final(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_digest(struct ahash_request *req)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_digest(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_export(struct ahash_request *req, void *out)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_export(mcryptd_req, out);
			
 
				-}
			
 
				-
			
 
				-static int sha256_mb_async_import(struct ahash_request *req, const void *in)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
			
 
				-	struct mcryptd_hash_request_ctx *rctx;
			
 
				-	struct ahash_request *areq;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	rctx = ahash_request_ctx(mcryptd_req);
			
 
				-	areq = &rctx->areq;
			
 
				-
			
 
				-	ahash_request_set_tfm(areq, child);
			
 
				-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-					rctx->complete, req);
			
 
				-
			
 
				-	return crypto_ahash_import(mcryptd_req, in);
			
 
				-}
			
 
				-
			
 
				-static struct ahash_alg sha256_mb_async_alg = {
			
 
				-	.init           = sha256_mb_async_init,
			
 
				-	.update         = sha256_mb_async_update,
			
 
				-	.final          = sha256_mb_async_final,
			
 
				-	.finup          = sha256_mb_async_finup,
			
 
				-	.export         = sha256_mb_async_export,
			
 
				-	.import         = sha256_mb_async_import,
			
 
				-	.digest         = sha256_mb_async_digest,
			
 
				-	.halg = {
			
 
				-		.digestsize     = SHA256_DIGEST_SIZE,
			
 
				-		.statesize      = sizeof(struct sha256_hash_ctx),
			
 
				-		.base = {
			
 
				-			.cra_name               = "sha256",
			
 
				-			.cra_driver_name        = "sha256_mb",
			
 
				-			/*
			
 
				-			 * Low priority, since with few concurrent hash requests
			
 
				-			 * this is extremely slow due to the flush delay.  Users
			
 
				-			 * whose workloads would benefit from this can request
			
 
				-			 * it explicitly by driver name, or can increase its
			
 
				-			 * priority at runtime using NETLINK_CRYPTO.
			
 
				-			 */
			
 
				-			.cra_priority           = 50,
			
 
				-			.cra_flags              = CRYPTO_ALG_ASYNC,
			
 
				-			.cra_blocksize          = SHA256_BLOCK_SIZE,
			
 
				-			.cra_module             = THIS_MODULE,
			
 
				-			.cra_list               = LIST_HEAD_INIT
			
 
				-				(sha256_mb_async_alg.halg.base.cra_list),
			
 
				-			.cra_init               = sha256_mb_async_init_tfm,
			
 
				-			.cra_exit               = sha256_mb_async_exit_tfm,
			
 
				-			.cra_ctxsize		= sizeof(struct sha256_mb_ctx),
			
 
				-			.cra_alignmask		= 0,
			
 
				-		},
			
 
				-	},
			
 
				-};
			
 
				-
			
 
				-static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx;
			
 
				-	unsigned long cur_time;
			
 
				-	unsigned long next_flush = 0;
			
 
				-	struct sha256_hash_ctx *sha_ctx;
			
 
				-
			
 
				-
			
 
				-	cur_time = jiffies;
			
 
				-
			
 
				-	while (!list_empty(&cstate->work_list)) {
			
 
				-		rctx = list_entry(cstate->work_list.next,
			
 
				-				struct mcryptd_hash_request_ctx, waiter);
			
 
				-		if (time_before(cur_time, rctx->tag.expire))
			
 
				-			break;
			
 
				-		kernel_fpu_begin();
			
 
				-		sha_ctx = (struct sha256_hash_ctx *)
			
 
				-					sha256_ctx_mgr_flush(cstate->mgr);
			
 
				-		kernel_fpu_end();
			
 
				-		if (!sha_ctx) {
			
 
				-			pr_err("sha256_mb error: nothing got"
			
 
				-					" flushed for non-empty list\n");
			
 
				-			break;
			
 
				-		}
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		sha_finish_walk(&rctx, cstate, true);
			
 
				-		sha_complete_job(rctx, cstate, 0);
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&cstate->work_list)) {
			
 
				-		rctx = list_entry(cstate->work_list.next,
			
 
				-				struct mcryptd_hash_request_ctx, waiter);
			
 
				-		/* get the hash context and then flush time */
			
 
				-		next_flush = rctx->tag.expire;
			
 
				-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
			
 
				-	}
			
 
				-	return next_flush;
			
 
				-}
			
 
				-
			
 
				-static int __init sha256_mb_mod_init(void)
			
 
				-{
			
 
				-
			
 
				-	int cpu;
			
 
				-	int err;
			
 
				-	struct mcryptd_alg_cstate *cpu_state;
			
 
				-
			
 
				-	/* check for dependent cpu features */
			
 
				-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
			
 
				-	    !boot_cpu_has(X86_FEATURE_BMI2))
			
 
				-		return -ENODEV;
			
 
				-
			
 
				-	/* initialize multibuffer structures */
			
 
				-	sha256_mb_alg_state.alg_cstate = alloc_percpu
			
 
				-						(struct mcryptd_alg_cstate);
			
 
				-
			
 
				-	sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
			
 
				-	sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
			
 
				-	sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
			
 
				-	sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
			
 
				-
			
 
				-	if (!sha256_mb_alg_state.alg_cstate)
			
 
				-		return -ENOMEM;
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
			
 
				-		cpu_state->next_flush = 0;
			
 
				-		cpu_state->next_seq_num = 0;
			
 
				-		cpu_state->flusher_engaged = false;
			
 
				-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
			
 
				-		cpu_state->cpu = cpu;
			
 
				-		cpu_state->alg_state = &sha256_mb_alg_state;
			
 
				-		cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
			
 
				-					GFP_KERNEL);
			
 
				-		if (!cpu_state->mgr)
			
 
				-			goto err2;
			
 
				-		sha256_ctx_mgr_init(cpu_state->mgr);
			
 
				-		INIT_LIST_HEAD(&cpu_state->work_list);
			
 
				-		spin_lock_init(&cpu_state->work_lock);
			
 
				-	}
			
 
				-	sha256_mb_alg_state.flusher = &sha256_mb_flusher;
			
 
				-
			
 
				-	err = crypto_register_ahash(&sha256_mb_areq_alg);
			
 
				-	if (err)
			
 
				-		goto err2;
			
 
				-	err = crypto_register_ahash(&sha256_mb_async_alg);
			
 
				-	if (err)
			
 
				-		goto err1;
			
 
				-
			
 
				-
			
 
				-	return 0;
			
 
				-err1:
			
 
				-	crypto_unregister_ahash(&sha256_mb_areq_alg);
			
 
				-err2:
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
			
 
				-		kfree(cpu_state->mgr);
			
 
				-	}
			
 
				-	free_percpu(sha256_mb_alg_state.alg_cstate);
			
 
				-	return -ENODEV;
			
 
				-}
			
 
				-
			
 
				-static void __exit sha256_mb_mod_fini(void)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	struct mcryptd_alg_cstate *cpu_state;
			
 
				-
			
 
				-	crypto_unregister_ahash(&sha256_mb_async_alg);
			
 
				-	crypto_unregister_ahash(&sha256_mb_areq_alg);
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
			
 
				-		kfree(cpu_state->mgr);
			
 
				-	}
			
 
				-	free_percpu(sha256_mb_alg_state.alg_cstate);
			
 
				-}
			
 
				-
			
 
				-module_init(sha256_mb_mod_init);
			
 
				-module_exit(sha256_mb_mod_fini);
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
			
 
				-
			
 
				-MODULE_ALIAS_CRYPTO("sha256");
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -1,134 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA256 context
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _SHA_MB_CTX_INTERNAL_H
			
 
				-#define _SHA_MB_CTX_INTERNAL_H
			
 
				-
			
 
				-#include "sha256_mb_mgr.h"
			
 
				-
			
 
				-#define HASH_UPDATE          0x00
			
 
				-#define HASH_LAST            0x01
			
 
				-#define HASH_DONE	     0x02
			
 
				-#define HASH_FINAL	     0x04
			
 
				-
			
 
				-#define HASH_CTX_STS_IDLE       0x00
			
 
				-#define HASH_CTX_STS_PROCESSING 0x01
			
 
				-#define HASH_CTX_STS_LAST       0x02
			
 
				-#define HASH_CTX_STS_COMPLETE   0x04
			
 
				-
			
 
				-enum hash_ctx_error {
			
 
				-	HASH_CTX_ERROR_NONE               =  0,
			
 
				-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
			
 
				-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
			
 
				-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
			
 
				-
			
 
				-#ifdef HASH_CTX_DEBUG
			
 
				-	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
			
 
				-#endif
			
 
				-};
			
 
				-
			
 
				-
			
 
				-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
			
 
				-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
			
 
				-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
			
 
				-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
			
 
				-#define hash_ctx_status(ctx)     ((ctx)->status)
			
 
				-#define hash_ctx_error(ctx)      ((ctx)->error)
			
 
				-#define hash_ctx_init(ctx) \
			
 
				-	do { \
			
 
				-		(ctx)->error = HASH_CTX_ERROR_NONE; \
			
 
				-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
			
 
				-	} while (0)
			
 
				-
			
 
				-
			
 
				-/* Hash Constants and Typedefs */
			
 
				-#define SHA256_DIGEST_LENGTH        8
			
 
				-#define SHA256_LOG2_BLOCK_SIZE        6
			
 
				-
			
 
				-#define SHA256_PADLENGTHFIELD_SIZE    8
			
 
				-
			
 
				-#ifdef SHA_MB_DEBUG
			
 
				-#define assert(expr) \
			
 
				-do { \
			
 
				-	if (unlikely(!(expr))) { \
			
 
				-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
			
 
				-		#expr, __FILE__, __func__, __LINE__); \
			
 
				-	} \
			
 
				-} while (0)
			
 
				-#else
			
 
				-#define assert(expr) do {} while (0)
			
 
				-#endif
			
 
				-
			
 
				-struct sha256_ctx_mgr {
			
 
				-	struct sha256_mb_mgr mgr;
			
 
				-};
			
 
				-
			
 
				-/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
			
 
				-
			
 
				-struct sha256_hash_ctx {
			
 
				-	/* Must be at struct offset 0 */
			
 
				-	struct job_sha256       job;
			
 
				-	/* status flag */
			
 
				-	int status;
			
 
				-	/* error flag */
			
 
				-	int error;
			
 
				-
			
 
				-	uint64_t	total_length;
			
 
				-	const void	*incoming_buffer;
			
 
				-	uint32_t	incoming_buffer_length;
			
 
				-	uint8_t		partial_block_buffer[SHA256_BLOCK_SIZE * 2];
			
 
				-	uint32_t	partial_block_buffer_length;
			
 
				-	void		*user_data;
			
 
				-};
			
 
				-
			
 
				-#endif
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
@@ -1,108 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA256 algorithm manager
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-#ifndef __SHA_MB_MGR_H
			
 
				-#define __SHA_MB_MGR_H
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-#define NUM_SHA256_DIGEST_WORDS 8
			
 
				-
			
 
				-enum job_sts {	STS_UNKNOWN = 0,
			
 
				-		STS_BEING_PROCESSED = 1,
			
 
				-		STS_COMPLETED = 2,
			
 
				-		STS_INTERNAL_ERROR = 3,
			
 
				-		STS_ERROR = 4
			
 
				-};
			
 
				-
			
 
				-struct job_sha256 {
			
 
				-	u8	*buffer;
			
 
				-	u32	len;
			
 
				-	u32	result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
			
 
				-	enum	job_sts status;
			
 
				-	void	*user_data;
			
 
				-};
			
 
				-
			
 
				-/* SHA256 out-of-order scheduler */
			
 
				-
			
 
				-/* typedef uint32_t sha8_digest_array[8][8]; */
			
 
				-
			
 
				-struct sha256_args_x8 {
			
 
				-	uint32_t	digest[8][8];
			
 
				-	uint8_t		*data_ptr[8];
			
 
				-};
			
 
				-
			
 
				-struct sha256_lane_data {
			
 
				-	struct job_sha256 *job_in_lane;
			
 
				-};
			
 
				-
			
 
				-struct sha256_mb_mgr {
			
 
				-	struct sha256_args_x8 args;
			
 
				-
			
 
				-	uint32_t lens[8];
			
 
				-
			
 
				-	/* each byte is index (0...7) of unused lanes */
			
 
				-	uint64_t unused_lanes;
			
 
				-	/* byte 4 is set to FF as a flag */
			
 
				-	struct sha256_lane_data ldata[8];
			
 
				-};
			
 
				-
			
 
				-
			
 
				-#define SHA256_MB_MGR_NUM_LANES_AVX2 8
			
 
				-
			
 
				-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
			
 
				-struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
			
 
				-					 struct job_sha256 *job);
			
 
				-struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
			
 
				-struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
			
 
				-
			
 
				-#endif
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
@@ -1,304 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA256 algorithm data structure
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of version 2 of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * Contact Information:
			
 
				- *     Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- * BSD LICENSE
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions
			
 
				- * are met:
			
 
				- *
			
 
				- *   * Redistributions of source code must retain the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer.
			
 
				- *   * Redistributions in binary form must reproduce the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer in
			
 
				- *     the documentation and/or other materials provided with the
			
 
				- *     distribution.
			
 
				- *   * Neither the name of Intel Corporation nor the names of its
			
 
				- *     contributors may be used to endorse or promote products derived
			
 
				- *     from this software without specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-# Macros for defining data structures
			
 
				-
			
 
				-# Usage example
			
 
				-
			
 
				-#START_FIELDS	# JOB_AES
			
 
				-###	name		size	align
			
 
				-#FIELD	_plaintext,	8,	8	# pointer to plaintext
			
 
				-#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
			
 
				-#FIELD	_IV,		16,	8	# IV
			
 
				-#FIELD	_keys,		8,	8	# pointer to keys
			
 
				-#FIELD	_len,		4,	4	# length in bytes
			
 
				-#FIELD	_status,	4,	4	# status enumeration
			
 
				-#FIELD	_user_data,	8,	8	# pointer to user data
			
 
				-#UNION  _union,         size1,  align1, \
			
 
				-#	                size2,  align2, \
			
 
				-#	                size3,  align3, \
			
 
				-#	                ...
			
 
				-#END_FIELDS
			
 
				-#%assign _JOB_AES_size	_FIELD_OFFSET
			
 
				-#%assign _JOB_AES_align	_STRUCT_ALIGN
			
 
				-
			
 
				-#########################################################################
			
 
				-
			
 
				-# Alternate "struc-like" syntax:
			
 
				-#	STRUCT job_aes2
			
 
				-#	RES_Q	.plaintext,	1
			
 
				-#	RES_Q	.ciphertext, 	1
			
 
				-#	RES_DQ	.IV,		1
			
 
				-#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
			
 
				-#	RES_U	.union,		size1, align1, \
			
 
				-#				size2, align2, \
			
 
				-#				...
			
 
				-#	ENDSTRUCT
			
 
				-#	# Following only needed if nesting
			
 
				-#	%assign job_aes2_size	_FIELD_OFFSET
			
 
				-#	%assign job_aes2_align	_STRUCT_ALIGN
			
 
				-#
			
 
				-# RES_* macros take a name, a count and an optional alignment.
			
 
				-# The count in in terms of the base size of the macro, and the
			
 
				-# default alignment is the base size.
			
 
				-# The macros are:
			
 
				-# Macro    Base size
			
 
				-# RES_B	    1
			
 
				-# RES_W	    2
			
 
				-# RES_D     4
			
 
				-# RES_Q     8
			
 
				-# RES_DQ   16
			
 
				-# RES_Y    32
			
 
				-# RES_Z    64
			
 
				-#
			
 
				-# RES_U defines a union. It's arguments are a name and two or more
			
 
				-# pairs of "size, alignment"
			
 
				-#
			
 
				-# The two assigns are only needed if this structure is being nested
			
 
				-# within another. Even if the assigns are not done, one can still use
			
 
				-# STRUCT_NAME_size as the size of the structure.
			
 
				-#
			
 
				-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
			
 
				-#
			
 
				-# The differences between this and using "struc" directly are that each
			
 
				-# type is implicitly aligned to its natural length (although this can be
			
 
				-# over-ridden with an explicit third parameter), and that the structure
			
 
				-# is padded at the end to its overall alignment.
			
 
				-#
			
 
				-
			
 
				-#########################################################################
			
 
				-
			
 
				-#ifndef _DATASTRUCT_ASM_
			
 
				-#define _DATASTRUCT_ASM_
			
 
				-
			
 
				-#define SZ8			8*SHA256_DIGEST_WORD_SIZE
			
 
				-#define ROUNDS			64*SZ8
			
 
				-#define PTR_SZ                  8
			
 
				-#define SHA256_DIGEST_WORD_SIZE 4
			
 
				-#define MAX_SHA256_LANES        8
			
 
				-#define SHA256_DIGEST_WORDS 8
			
 
				-#define SHA256_DIGEST_ROW_SIZE  (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
			
 
				-#define SHA256_DIGEST_SIZE      (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
			
 
				-#define SHA256_BLK_SZ           64
			
 
				-
			
 
				-# START_FIELDS
			
 
				-.macro START_FIELDS
			
 
				- _FIELD_OFFSET = 0
			
 
				- _STRUCT_ALIGN = 0
			
 
				-.endm
			
 
				-
			
 
				-# FIELD name size align
			
 
				-.macro FIELD name size align
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
			
 
				- \name	= _FIELD_OFFSET
			
 
				- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
			
 
				-.if (\align > _STRUCT_ALIGN)
			
 
				- _STRUCT_ALIGN = \align
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-# END_FIELDS
			
 
				-.macro END_FIELDS
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
			
 
				-.endm
			
 
				-
			
 
				-########################################################################
			
 
				-
			
 
				-.macro STRUCT p1
			
 
				-START_FIELDS
			
 
				-.struc \p1
			
 
				-.endm
			
 
				-
			
 
				-.macro ENDSTRUCT
			
 
				- tmp = _FIELD_OFFSET
			
 
				- END_FIELDS
			
 
				- tmp = (_FIELD_OFFSET - %%tmp)
			
 
				-.if (tmp > 0)
			
 
				-	.lcomm	tmp
			
 
				-.endif
			
 
				-.endstruc
			
 
				-.endm
			
 
				-
			
 
				-## RES_int name size align
			
 
				-.macro RES_int p1 p2 p3
			
 
				- name = \p1
			
 
				- size = \p2
			
 
				- align = .\p3
			
 
				-
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
			
 
				-.align align
			
 
				-.lcomm name size
			
 
				- _FIELD_OFFSET = _FIELD_OFFSET + (size)
			
 
				-.if (align > _STRUCT_ALIGN)
			
 
				- _STRUCT_ALIGN = align
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_B name, size [, align]
			
 
				-.macro RES_B _name, _size, _align=1
			
 
				-RES_int _name _size _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_W name, size [, align]
			
 
				-.macro RES_W _name, _size, _align=2
			
 
				-RES_int _name 2*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_D name, size [, align]
			
 
				-.macro RES_D _name, _size, _align=4
			
 
				-RES_int _name 4*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Q name, size [, align]
			
 
				-.macro RES_Q _name, _size, _align=8
			
 
				-RES_int _name 8*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_DQ name, size [, align]
			
 
				-.macro RES_DQ _name, _size, _align=16
			
 
				-RES_int _name 16*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Y name, size [, align]
			
 
				-.macro RES_Y _name, _size, _align=32
			
 
				-RES_int _name 32*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Z name, size [, align]
			
 
				-.macro RES_Z _name, _size, _align=64
			
 
				-RES_int _name 64*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define SHA256 Out Of Order Data Structures
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # LANE_DATA
			
 
				-###     name            size    align
			
 
				-FIELD   _job_in_lane,   8,      8       # pointer to job object
			
 
				-END_FIELDS
			
 
				-
			
 
				- _LANE_DATA_size = _FIELD_OFFSET
			
 
				- _LANE_DATA_align = _STRUCT_ALIGN
			
 
				-
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # SHA256_ARGS_X4
			
 
				-###     name            size    align
			
 
				-FIELD   _digest,        4*8*8,  4       # transposed digest
			
 
				-FIELD   _data_ptr,      8*8,    8       # array of pointers to data
			
 
				-END_FIELDS
			
 
				-
			
 
				- _SHA256_ARGS_X4_size  =  _FIELD_OFFSET
			
 
				- _SHA256_ARGS_X4_align = _STRUCT_ALIGN
			
 
				- _SHA256_ARGS_X8_size  =	_FIELD_OFFSET
			
 
				- _SHA256_ARGS_X8_align =	_STRUCT_ALIGN
			
 
				-
			
 
				-#######################################################################
			
 
				-
			
 
				-START_FIELDS    # MB_MGR
			
 
				-###     name            size    align
			
 
				-FIELD   _args,          _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
			
 
				-FIELD   _lens,          4*8,    8
			
 
				-FIELD   _unused_lanes,  8,      8
			
 
				-FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
			
 
				-END_FIELDS
			
 
				-
			
 
				- _MB_MGR_size  =  _FIELD_OFFSET
			
 
				- _MB_MGR_align =  _STRUCT_ALIGN
			
 
				-
			
 
				-_args_digest   =     _args + _digest
			
 
				-_args_data_ptr =     _args + _data_ptr
			
 
				-
			
 
				-#######################################################################
			
 
				-
			
 
				-START_FIELDS    #STACK_FRAME
			
 
				-###     name            size    align
			
 
				-FIELD   _data,		16*SZ8,   1       # transposed digest
			
 
				-FIELD   _digest,         8*SZ8,   1       # array of pointers to data
			
 
				-FIELD   _ytmp,           4*SZ8,   1
			
 
				-FIELD   _rsp,            8,       1
			
 
				-END_FIELDS
			
 
				-
			
 
				- _STACK_FRAME_size  =  _FIELD_OFFSET
			
 
				- _STACK_FRAME_align =  _STRUCT_ALIGN
			
 
				-
			
 
				-#######################################################################
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define constants
			
 
				-########################################################################
			
 
				-
			
 
				-#define STS_UNKNOWN             0
			
 
				-#define STS_BEING_PROCESSED     1
			
 
				-#define STS_COMPLETED           2
			
 
				-
			
 
				-########################################################################
			
 
				-#### Define JOB_SHA256 structure
			
 
				-########################################################################
			
 
				-
			
 
				-START_FIELDS    # JOB_SHA256
			
 
				-
			
 
				-###     name                            size    align
			
 
				-FIELD   _buffer,                        8,      8       # pointer to buffer
			
 
				-FIELD   _len,                           8,      8       # length in bytes
			
 
				-FIELD   _result_digest,                 8*4,    32      # Digest (output)
			
 
				-FIELD   _status,                        4,      4
			
 
				-FIELD   _user_data,                     8,      8
			
 
				-END_FIELDS
			
 
				-
			
 
				- _JOB_SHA256_size = _FIELD_OFFSET
			
 
				- _JOB_SHA256_align = _STRUCT_ALIGN
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -1,307 +0,0 @@
 
				-/*
			
 
				- * Flush routine for SHA256 multibuffer
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/frame.h>
			
 
				-#include "sha256_mb_mgr_datastruct.S"
			
 
				-
			
 
				-.extern sha256_x8_avx2
			
 
				-
			
 
				-#LINUX register definitions
			
 
				-#define arg1	%rdi
			
 
				-#define arg2	%rsi
			
 
				-
			
 
				-# Common register definitions
			
 
				-#define state	arg1
			
 
				-#define job	arg2
			
 
				-#define len2	arg2
			
 
				-
			
 
				-# idx must be a register not clobberred by sha1_mult
			
 
				-#define idx		%r8
			
 
				-#define DWORD_idx	%r8d
			
 
				-
			
 
				-#define unused_lanes	%rbx
			
 
				-#define lane_data	%rbx
			
 
				-#define tmp2		%rbx
			
 
				-#define tmp2_w		%ebx
			
 
				-
			
 
				-#define job_rax		%rax
			
 
				-#define tmp1		%rax
			
 
				-#define size_offset	%rax
			
 
				-#define tmp		%rax
			
 
				-#define start_offset	%rax
			
 
				-
			
 
				-#define tmp3		%arg1
			
 
				-
			
 
				-#define extra_blocks	%arg2
			
 
				-#define p		%arg2
			
 
				-
			
 
				-.macro LABEL prefix n
			
 
				-\prefix\n\():
			
 
				-.endm
			
 
				-
			
 
				-.macro JNE_SKIP i
			
 
				-jne     skip_\i
			
 
				-.endm
			
 
				-
			
 
				-.altmacro
			
 
				-.macro SET_OFFSET _offset
			
 
				-offset = \_offset
			
 
				-.endm
			
 
				-.noaltmacro
			
 
				-
			
 
				-# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
			
 
				-# arg 1 : rcx : state
			
 
				-ENTRY(sha256_mb_mgr_flush_avx2)
			
 
				-	FRAME_BEGIN
			
 
				-        push    %rbx
			
 
				-
			
 
				-	# If bit (32+3) is set, then all lanes are empty
			
 
				-	mov	_unused_lanes(state), unused_lanes
			
 
				-	bt	$32+3, unused_lanes
			
 
				-	jc	return_null
			
 
				-
			
 
				-	# find a lane with a non-null job
			
 
				-	xor	idx, idx
			
 
				-	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	one(%rip), idx
			
 
				-	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	two(%rip), idx
			
 
				-	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	three(%rip), idx
			
 
				-	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	four(%rip), idx
			
 
				-	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	five(%rip), idx
			
 
				-	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	six(%rip), idx
			
 
				-	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-	cmovne	seven(%rip), idx
			
 
				-
			
 
				-	# copy idx to empty lanes
			
 
				-copy_lane_data:
			
 
				-	offset =  (_args + _data_ptr)
			
 
				-	mov	offset(state,idx,8), tmp
			
 
				-
			
 
				-	I = 0
			
 
				-.rep 8
			
 
				-	offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
			
 
				-	cmpq	$0, offset(state)
			
 
				-.altmacro
			
 
				-	JNE_SKIP %I
			
 
				-	offset =  (_args + _data_ptr + 8*I)
			
 
				-	mov	tmp, offset(state)
			
 
				-	offset =  (_lens + 4*I)
			
 
				-	movl	$0xFFFFFFFF, offset(state)
			
 
				-LABEL skip_ %I
			
 
				-	I = (I+1)
			
 
				-.noaltmacro
			
 
				-.endr
			
 
				-
			
 
				-	# Find min length
			
 
				-	vmovdqu _lens+0*16(state), %xmm0
			
 
				-	vmovdqu _lens+1*16(state), %xmm1
			
 
				-
			
 
				-	vpminud %xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
			
 
				-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
			
 
				-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
			
 
				-	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
			
 
				-
			
 
				-	vmovd	%xmm2, DWORD_idx
			
 
				-	mov	idx, len2
			
 
				-	and	$0xF, idx
			
 
				-	shr	$4, len2
			
 
				-	jz	len_is_0
			
 
				-
			
 
				-	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
			
 
				-	vpshufd	$0, %xmm2, %xmm2
			
 
				-
			
 
				-	vpsubd	%xmm2, %xmm0, %xmm0
			
 
				-	vpsubd	%xmm2, %xmm1, %xmm1
			
 
				-
			
 
				-	vmovdqu	%xmm0, _lens+0*16(state)
			
 
				-	vmovdqu	%xmm1, _lens+1*16(state)
			
 
				-
			
 
				-	# "state" and "args" are the same address, arg1
			
 
				-	# len is arg2
			
 
				-	call	sha256_x8_avx2
			
 
				-	# state and idx are intact
			
 
				-
			
 
				-len_is_0:
			
 
				-	# process completed job "idx"
			
 
				-	imul	$_LANE_DATA_size, idx, lane_data
			
 
				-	lea	_ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov	_job_in_lane(lane_data), job_rax
			
 
				-	movq	$0, _job_in_lane(lane_data)
			
 
				-	movl	$STS_COMPLETED, _status(job_rax)
			
 
				-	mov	_unused_lanes(state), unused_lanes
			
 
				-	shl	$4, unused_lanes
			
 
				-	or	idx, unused_lanes
			
 
				-
			
 
				-	mov	unused_lanes, _unused_lanes(state)
			
 
				-	movl	$0xFFFFFFFF, _lens(state,idx,4)
			
 
				-
			
 
				-	vmovd	_args_digest(state , idx, 4) , %xmm0
			
 
				-	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
			
 
				-	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
			
 
				-	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
			
 
				-	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
			
 
				-
			
 
				-	vmovdqu	%xmm0, _result_digest(job_rax)
			
 
				-	offset =  (_result_digest + 1*16)
			
 
				-	vmovdqu	%xmm1, offset(job_rax)
			
 
				-
			
 
				-return:
			
 
				-	pop     %rbx
			
 
				-	FRAME_END
			
 
				-	ret
			
 
				-
			
 
				-return_null:
			
 
				-	xor	job_rax, job_rax
			
 
				-	jmp	return
			
 
				-ENDPROC(sha256_mb_mgr_flush_avx2)
			
 
				-
			
 
				-##############################################################################
			
 
				-
			
 
				-.align 16
			
 
				-ENTRY(sha256_mb_mgr_get_comp_job_avx2)
			
 
				-	push	%rbx
			
 
				-
			
 
				-	## if bit 32+3 is set, then all lanes are empty
			
 
				-	mov	_unused_lanes(state), unused_lanes
			
 
				-	bt	$(32+3), unused_lanes
			
 
				-	jc	.return_null
			
 
				-
			
 
				-	# Find min length
			
 
				-	vmovdqu	_lens(state), %xmm0
			
 
				-	vmovdqu	_lens+1*16(state), %xmm1
			
 
				-
			
 
				-	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
			
 
				-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
			
 
				-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
			
 
				-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
			
 
				-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
			
 
				-
			
 
				-	vmovd	%xmm2, DWORD_idx
			
 
				-	test	$~0xF, idx
			
 
				-	jnz	.return_null
			
 
				-
			
 
				-	# process completed job "idx"
			
 
				-	imul	$_LANE_DATA_size, idx, lane_data
			
 
				-	lea	_ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov	_job_in_lane(lane_data), job_rax
			
 
				-	movq	$0,  _job_in_lane(lane_data)
			
 
				-	movl	$STS_COMPLETED, _status(job_rax)
			
 
				-	mov	_unused_lanes(state), unused_lanes
			
 
				-	shl	$4, unused_lanes
			
 
				-	or	idx, unused_lanes
			
 
				-	mov	unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl	$0xFFFFFFFF, _lens(state,  idx, 4)
			
 
				-
			
 
				-	vmovd	_args_digest(state, idx, 4), %xmm0
			
 
				-	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
			
 
				-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
			
 
				-	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
			
 
				-	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
			
 
				-	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
			
 
				-
			
 
				-        vmovdqu %xmm0, _result_digest(job_rax)
			
 
				-        offset =  (_result_digest + 1*16)
			
 
				-        vmovdqu %xmm1, offset(job_rax)
			
 
				-
			
 
				-	pop	%rbx
			
 
				-
			
 
				-	ret
			
 
				-
			
 
				-.return_null:
			
 
				-	xor	job_rax, job_rax
			
 
				-	pop	%rbx
			
 
				-	ret
			
 
				-ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
			
 
				-
			
 
				-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
			
 
				-.align 16
			
 
				-clear_low_nibble:
			
 
				-.octa	0x000000000000000000000000FFFFFFF0
			
 
				-
			
 
				-.section	.rodata.cst8, "aM", @progbits, 8
			
 
				-.align 8
			
 
				-one:
			
 
				-.quad	1
			
 
				-two:
			
 
				-.quad	2
			
 
				-three:
			
 
				-.quad	3
			
 
				-four:
			
 
				-.quad	4
			
 
				-five:
			
 
				-.quad	5
			
 
				-six:
			
 
				-.quad	6
			
 
				-seven:
			
 
				-.quad  7
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
@@ -1,65 +0,0 @@
 
				-/*
			
 
				- * Initialization code for multi buffer SHA256 algorithm for AVX2
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "sha256_mb_mgr.h"
			
 
				-
			
 
				-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
			
 
				-{
			
 
				-	unsigned int j;
			
 
				-
			
 
				-	state->unused_lanes = 0xF76543210ULL;
			
 
				-	for (j = 0; j < 8; j++) {
			
 
				-		state->lens[j] = 0xFFFFFFFF;
			
 
				-		state->ldata[j].job_in_lane = NULL;
			
 
				-	}
			
 
				-}
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
@@ -1,214 +0,0 @@
 
				-/*
			
 
				- * Buffer submit code for multi buffer SHA256 algorithm
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/frame.h>
			
 
				-#include "sha256_mb_mgr_datastruct.S"
			
 
				-
			
 
				-.extern sha256_x8_avx2
			
 
				-
			
 
				-# LINUX register definitions
			
 
				-arg1		= %rdi
			
 
				-arg2		= %rsi
			
 
				-size_offset	= %rcx
			
 
				-tmp2		= %rcx
			
 
				-extra_blocks	= %rdx
			
 
				-
			
 
				-# Common definitions
			
 
				-#define state	arg1
			
 
				-#define job	%rsi
			
 
				-#define len2	arg2
			
 
				-#define p2	arg2
			
 
				-
			
 
				-# idx must be a register not clobberred by sha1_x8_avx2
			
 
				-idx		= %r8
			
 
				-DWORD_idx	= %r8d
			
 
				-last_len	= %r8
			
 
				-
			
 
				-p		= %r11
			
 
				-start_offset	= %r11
			
 
				-
			
 
				-unused_lanes	= %rbx
			
 
				-BYTE_unused_lanes = %bl
			
 
				-
			
 
				-job_rax		= %rax
			
 
				-len		= %rax
			
 
				-DWORD_len	= %eax
			
 
				-
			
 
				-lane		= %r12
			
 
				-tmp3		= %r12
			
 
				-
			
 
				-tmp		= %r9
			
 
				-DWORD_tmp	= %r9d
			
 
				-
			
 
				-lane_data	= %r10
			
 
				-
			
 
				-# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
			
 
				-# arg 1 : rcx : state
			
 
				-# arg 2 : rdx : job
			
 
				-ENTRY(sha256_mb_mgr_submit_avx2)
			
 
				-	FRAME_BEGIN
			
 
				-	push	%rbx
			
 
				-	push	%r12
			
 
				-
			
 
				-	mov	_unused_lanes(state), unused_lanes
			
 
				-	mov	unused_lanes, lane
			
 
				-	and	$0xF, lane
			
 
				-	shr	$4, unused_lanes
			
 
				-	imul	$_LANE_DATA_size, lane, lane_data
			
 
				-	movl	$STS_BEING_PROCESSED, _status(job)
			
 
				-	lea	_ldata(state, lane_data), lane_data
			
 
				-	mov	unused_lanes, _unused_lanes(state)
			
 
				-	movl	_len(job),  DWORD_len
			
 
				-
			
 
				-	mov	job, _job_in_lane(lane_data)
			
 
				-	shl	$4, len
			
 
				-	or	lane, len
			
 
				-
			
 
				-	movl	DWORD_len,  _lens(state , lane, 4)
			
 
				-
			
 
				-	# Load digest words from result_digest
			
 
				-	vmovdqu	_result_digest(job), %xmm0
			
 
				-	vmovdqu	_result_digest+1*16(job), %xmm1
			
 
				-	vmovd	%xmm0, _args_digest(state, lane, 4)
			
 
				-	vpextrd	$1, %xmm0, _args_digest+1*32(state , lane, 4)
			
 
				-	vpextrd	$2, %xmm0, _args_digest+2*32(state , lane, 4)
			
 
				-	vpextrd	$3, %xmm0, _args_digest+3*32(state , lane, 4)
			
 
				-	vmovd	%xmm1, _args_digest+4*32(state , lane, 4)
			
 
				-
			
 
				-	vpextrd	$1, %xmm1, _args_digest+5*32(state , lane, 4)
			
 
				-	vpextrd	$2, %xmm1, _args_digest+6*32(state , lane, 4)
			
 
				-	vpextrd	$3, %xmm1, _args_digest+7*32(state , lane, 4)
			
 
				-
			
 
				-	mov	_buffer(job), p
			
 
				-	mov	p, _args_data_ptr(state, lane, 8)
			
 
				-
			
 
				-	cmp	$0xF, unused_lanes
			
 
				-	jne	return_null
			
 
				-
			
 
				-start_loop:
			
 
				-	# Find min length
			
 
				-	vmovdqa	_lens(state), %xmm0
			
 
				-	vmovdqa	_lens+1*16(state), %xmm1
			
 
				-
			
 
				-	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
			
 
				-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
			
 
				-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
			
 
				-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
			
 
				-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
			
 
				-
			
 
				-	vmovd	%xmm2, DWORD_idx
			
 
				-	mov	idx, len2
			
 
				-	and	$0xF, idx
			
 
				-	shr	$4, len2
			
 
				-	jz	len_is_0
			
 
				-
			
 
				-	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
			
 
				-	vpshufd	$0, %xmm2, %xmm2
			
 
				-
			
 
				-	vpsubd	%xmm2, %xmm0, %xmm0
			
 
				-	vpsubd	%xmm2, %xmm1, %xmm1
			
 
				-
			
 
				-	vmovdqa	%xmm0, _lens + 0*16(state)
			
 
				-	vmovdqa	%xmm1, _lens + 1*16(state)
			
 
				-
			
 
				-	# "state" and "args" are the same address, arg1
			
 
				-	# len is arg2
			
 
				-	call	sha256_x8_avx2
			
 
				-
			
 
				-	# state and idx are intact
			
 
				-
			
 
				-len_is_0:
			
 
				-	# process completed job "idx"
			
 
				-	imul	$_LANE_DATA_size, idx, lane_data
			
 
				-	lea	_ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov	_job_in_lane(lane_data), job_rax
			
 
				-	mov	_unused_lanes(state), unused_lanes
			
 
				-	movq	$0, _job_in_lane(lane_data)
			
 
				-	movl	$STS_COMPLETED, _status(job_rax)
			
 
				-	shl	$4, unused_lanes
			
 
				-	or	idx, unused_lanes
			
 
				-	mov	unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl	$0xFFFFFFFF, _lens(state,idx,4)
			
 
				-
			
 
				-	vmovd	_args_digest(state, idx, 4), %xmm0
			
 
				-	vpinsrd	$1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd	$2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
			
 
				-	vpinsrd	$3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
			
 
				-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
			
 
				-
			
 
				-	vpinsrd	$1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
			
 
				-	vpinsrd	$2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
			
 
				-	vpinsrd	$3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
			
 
				-
			
 
				-	vmovdqu	%xmm0, _result_digest(job_rax)
			
 
				-	vmovdqu	%xmm1, _result_digest+1*16(job_rax)
			
 
				-
			
 
				-return:
			
 
				-	pop     %r12
			
 
				-        pop     %rbx
			
 
				-        FRAME_END
			
 
				-	ret
			
 
				-
			
 
				-return_null:
			
 
				-	xor	job_rax, job_rax
			
 
				-	jmp	return
			
 
				-
			
 
				-ENDPROC(sha256_mb_mgr_submit_avx2)
			
 
				-
			
 
				-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
			
 
				-.align 16
			
 
				-clear_low_nibble:
			
 
				-	.octa	0x000000000000000000000000FFFFFFF0
			
--- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
@@ -1,598 +0,0 @@
 
				-/*
			
 
				- * Multi-buffer SHA256 algorithm hash compute routine
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *	Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include "sha256_mb_mgr_datastruct.S"
			
 
				-
			
 
				-## code to compute oct SHA256 using SSE-256
			
 
				-## outer calling routine takes care of save and restore of XMM registers
			
 
				-## Logic designed/laid out by JDG
			
 
				-
			
 
				-## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15; %ymm0-15
			
 
				-## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
			
 
				-## Linux preserves:                       rdi rbp r8
			
 
				-##
			
 
				-## clobbers %ymm0-15
			
 
				-
			
 
				-arg1 = %rdi
			
 
				-arg2 = %rsi
			
 
				-reg3 = %rcx
			
 
				-reg4 = %rdx
			
 
				-
			
 
				-# Common definitions
			
 
				-STATE = arg1
			
 
				-INP_SIZE = arg2
			
 
				-
			
 
				-IDX = %rax
			
 
				-ROUND = %rbx
			
 
				-TBL = reg3
			
 
				-
			
 
				-inp0 = %r9
			
 
				-inp1 = %r10
			
 
				-inp2 = %r11
			
 
				-inp3 = %r12
			
 
				-inp4 = %r13
			
 
				-inp5 = %r14
			
 
				-inp6 = %r15
			
 
				-inp7 = reg4
			
 
				-
			
 
				-a = %ymm0
			
 
				-b = %ymm1
			
 
				-c = %ymm2
			
 
				-d = %ymm3
			
 
				-e = %ymm4
			
 
				-f = %ymm5
			
 
				-g = %ymm6
			
 
				-h = %ymm7
			
 
				-
			
 
				-T1 = %ymm8
			
 
				-
			
 
				-a0 = %ymm12
			
 
				-a1 = %ymm13
			
 
				-a2 = %ymm14
			
 
				-TMP = %ymm15
			
 
				-TMP0 = %ymm6
			
 
				-TMP1 = %ymm7
			
 
				-
			
 
				-TT0 = %ymm8
			
 
				-TT1 = %ymm9
			
 
				-TT2 = %ymm10
			
 
				-TT3 = %ymm11
			
 
				-TT4 = %ymm12
			
 
				-TT5 = %ymm13
			
 
				-TT6 = %ymm14
			
 
				-TT7 = %ymm15
			
 
				-
			
 
				-# Define stack usage
			
 
				-
			
 
				-# Assume stack aligned to 32 bytes before call
			
 
				-# Therefore FRAMESZ mod 32 must be 32-8 = 24
			
 
				-
			
 
				-#define FRAMESZ	0x388
			
 
				-
			
 
				-#define VMOVPS	vmovups
			
 
				-
			
 
				-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
			
 
				-# "transpose" data in {r0...r7} using temps {t0...t1}
			
 
				-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
			
 
				-# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
			
 
				-# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
			
 
				-# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
			
 
				-# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
			
 
				-# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
			
 
				-# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
			
 
				-# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
			
 
				-# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
			
 
				-#
			
 
				-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
			
 
				-# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
			
 
				-# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
			
 
				-# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
			
 
				-# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
			
 
				-# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
			
 
				-# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
			
 
				-# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
			
 
				-# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
			
 
				-#
			
 
				-
			
 
				-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
			
 
				-	# process top half (r0..r3) {a...d}
			
 
				-	vshufps	$0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
			
 
				-	vshufps	$0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
			
 
				-	vshufps	$0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
			
 
				-	vshufps	$0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
			
 
				-	vshufps	$0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
			
 
				-	vshufps	$0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
			
 
				-	vshufps	$0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
			
 
				-	vshufps	$0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
			
 
				-
			
 
				-	# use r2 in place of t0
			
 
				-	# process bottom half (r4..r7) {e...h}
			
 
				-	vshufps	$0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
			
 
				-	vshufps	$0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
			
 
				-	vshufps	$0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
			
 
				-	vshufps	$0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
			
 
				-	vshufps	$0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
			
 
				-	vshufps	$0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
			
 
				-	vshufps	$0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
			
 
				-	vshufps	$0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
			
 
				-
			
 
				-	vperm2f128	$0x13, \r1, \r5, \r6  # h6...a6
			
 
				-	vperm2f128	$0x02, \r1, \r5, \r2  # h2...a2
			
 
				-	vperm2f128	$0x13, \r3, \r7, \r5  # h5...a5
			
 
				-	vperm2f128	$0x02, \r3, \r7, \r1  # h1...a1
			
 
				-	vperm2f128	$0x13, \r0, \r4, \r7  # h7...a7
			
 
				-	vperm2f128	$0x02, \r0, \r4, \r3  # h3...a3
			
 
				-	vperm2f128	$0x13, \t0, \t1, \r4  # h4...a4
			
 
				-	vperm2f128	$0x02, \t0, \t1, \r0  # h0...a0
			
 
				-
			
 
				-.endm
			
 
				-
			
 
				-.macro ROTATE_ARGS
			
 
				-TMP_ = h
			
 
				-h = g
			
 
				-g = f
			
 
				-f = e
			
 
				-e = d
			
 
				-d = c
			
 
				-c = b
			
 
				-b = a
			
 
				-a = TMP_
			
 
				-.endm
			
 
				-
			
 
				-.macro _PRORD reg imm tmp
			
 
				-	vpslld	$(32-\imm),\reg,\tmp
			
 
				-	vpsrld	$\imm,\reg, \reg
			
 
				-	vpor	\tmp,\reg, \reg
			
 
				-.endm
			
 
				-
			
 
				-# PRORD_nd reg, imm, tmp, src
			
 
				-.macro _PRORD_nd reg imm tmp src
			
 
				-	vpslld	$(32-\imm), \src, \tmp
			
 
				-	vpsrld	$\imm, \src, \reg
			
 
				-	vpor	\tmp, \reg, \reg
			
 
				-.endm
			
 
				-
			
 
				-# PRORD dst/src, amt
			
 
				-.macro PRORD reg imm
			
 
				-	_PRORD	\reg,\imm,TMP
			
 
				-.endm
			
 
				-
			
 
				-# PRORD_nd dst, src, amt
			
 
				-.macro PRORD_nd reg tmp imm
			
 
				-	_PRORD_nd	\reg, \imm, TMP, \tmp
			
 
				-.endm
			
 
				-
			
 
				-# arguments passed implicitly in preprocessor symbols i, a...h
			
 
				-.macro ROUND_00_15 _T1 i
			
 
				-	PRORD_nd	a0,e,5	# sig1: a0 = (e >> 5)
			
 
				-
			
 
				-	vpxor	g, f, a2	# ch: a2 = f^g
			
 
				-	vpand	e,a2, a2	# ch: a2 = (f^g)&e
			
 
				-	vpxor	g, a2, a2	# a2 = ch
			
 
				-
			
 
				-	PRORD_nd	a1,e,25	# sig1: a1 = (e >> 25)
			
 
				-
			
 
				-	vmovdqu	\_T1,(SZ8*(\i & 0xf))(%rsp)
			
 
				-	vpaddd	(TBL,ROUND,1), \_T1, \_T1	# T1 = W + K
			
 
				-	vpxor	e,a0, a0	# sig1: a0 = e ^ (e >> 5)
			
 
				-	PRORD	a0, 6		# sig1: a0 = (e >> 6) ^ (e >> 11)
			
 
				-	vpaddd	a2, h, h	# h = h + ch
			
 
				-	PRORD_nd	a2,a,11	# sig0: a2 = (a >> 11)
			
 
				-	vpaddd	\_T1,h, h 	# h = h + ch + W + K
			
 
				-	vpxor	a1, a0, a0	# a0 = sigma1
			
 
				-	PRORD_nd	a1,a,22	# sig0: a1 = (a >> 22)
			
 
				-	vpxor	c, a, \_T1	# maj: T1 = a^c
			
 
				-	add	$SZ8, ROUND	# ROUND++
			
 
				-	vpand	b, \_T1, \_T1	# maj: T1 = (a^c)&b
			
 
				-	vpaddd	a0, h, h
			
 
				-	vpaddd	h, d, d
			
 
				-	vpxor	a, a2, a2	# sig0: a2 = a ^ (a >> 11)
			
 
				-	PRORD	a2,2		# sig0: a2 = (a >> 2) ^ (a >> 13)
			
 
				-	vpxor	a1, a2, a2	# a2 = sig0
			
 
				-	vpand	c, a, a1	# maj: a1 = a&c
			
 
				-	vpor	\_T1, a1, a1 	# a1 = maj
			
 
				-	vpaddd	a1, h, h	# h = h + ch + W + K + maj
			
 
				-	vpaddd	a2, h, h	# h = h + ch + W + K + maj + sigma0
			
 
				-	ROTATE_ARGS
			
 
				-.endm
			
 
				-
			
 
				-# arguments passed implicitly in preprocessor symbols i, a...h
			
 
				-.macro ROUND_16_XX _T1 i
			
 
				-	vmovdqu	(SZ8*((\i-15)&0xf))(%rsp), \_T1
			
 
				-	vmovdqu	(SZ8*((\i-2)&0xf))(%rsp), a1
			
 
				-	vmovdqu	\_T1, a0
			
 
				-	PRORD	\_T1,11
			
 
				-	vmovdqu	a1, a2
			
 
				-	PRORD	a1,2
			
 
				-	vpxor	a0, \_T1, \_T1
			
 
				-	PRORD	\_T1, 7
			
 
				-	vpxor	a2, a1, a1
			
 
				-	PRORD	a1, 17
			
 
				-	vpsrld	$3, a0, a0
			
 
				-	vpxor	a0, \_T1, \_T1
			
 
				-	vpsrld	$10, a2, a2
			
 
				-	vpxor	a2, a1, a1
			
 
				-	vpaddd	(SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
			
 
				-	vpaddd	(SZ8*((\i-7)&0xf))(%rsp), a1, a1
			
 
				-	vpaddd	a1, \_T1, \_T1
			
 
				-
			
 
				-	ROUND_00_15 \_T1,\i
			
 
				-.endm
			
 
				-
			
 
				-# SHA256_ARGS:
			
 
				-#   UINT128 digest[8];  // transposed digests
			
 
				-#   UINT8  *data_ptr[4];
			
 
				-
			
 
				-# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
			
 
				-# arg 1 : STATE : pointer to array of pointers to input data
			
 
				-# arg 2 : INP_SIZE  : size of input in blocks
			
 
				-	# general registers preserved in outer calling routine
			
 
				-	# outer calling routine saves all the XMM registers
			
 
				-	# save rsp, allocate 32-byte aligned for local variables
			
 
				-ENTRY(sha256_x8_avx2)
			
 
				-
			
 
				-	# save callee-saved clobbered registers to comply with C function ABI
			
 
				-	push    %r12
			
 
				-	push    %r13
			
 
				-	push    %r14
			
 
				-	push    %r15
			
 
				-
			
 
				-	mov	%rsp, IDX
			
 
				-	sub	$FRAMESZ, %rsp
			
 
				-	and	$~0x1F, %rsp
			
 
				-	mov	IDX, _rsp(%rsp)
			
 
				-
			
 
				-	# Load the pre-transposed incoming digest.
			
 
				-	vmovdqu	0*SHA256_DIGEST_ROW_SIZE(STATE),a
			
 
				-	vmovdqu	1*SHA256_DIGEST_ROW_SIZE(STATE),b
			
 
				-	vmovdqu	2*SHA256_DIGEST_ROW_SIZE(STATE),c
			
 
				-	vmovdqu	3*SHA256_DIGEST_ROW_SIZE(STATE),d
			
 
				-	vmovdqu	4*SHA256_DIGEST_ROW_SIZE(STATE),e
			
 
				-	vmovdqu	5*SHA256_DIGEST_ROW_SIZE(STATE),f
			
 
				-	vmovdqu	6*SHA256_DIGEST_ROW_SIZE(STATE),g
			
 
				-	vmovdqu	7*SHA256_DIGEST_ROW_SIZE(STATE),h
			
 
				-
			
 
				-	lea	K256_8(%rip),TBL
			
 
				-
			
 
				-	# load the address of each of the 4 message lanes
			
 
				-	# getting ready to transpose input onto stack
			
 
				-	mov	_args_data_ptr+0*PTR_SZ(STATE),inp0
			
 
				-	mov	_args_data_ptr+1*PTR_SZ(STATE),inp1
			
 
				-	mov	_args_data_ptr+2*PTR_SZ(STATE),inp2
			
 
				-	mov	_args_data_ptr+3*PTR_SZ(STATE),inp3
			
 
				-	mov	_args_data_ptr+4*PTR_SZ(STATE),inp4
			
 
				-	mov	_args_data_ptr+5*PTR_SZ(STATE),inp5
			
 
				-	mov	_args_data_ptr+6*PTR_SZ(STATE),inp6
			
 
				-	mov	_args_data_ptr+7*PTR_SZ(STATE),inp7
			
 
				-
			
 
				-	xor	IDX, IDX
			
 
				-lloop:
			
 
				-	xor	ROUND, ROUND
			
 
				-
			
 
				-	# save old digest
			
 
				-	vmovdqu	a, _digest(%rsp)
			
 
				-	vmovdqu	b, _digest+1*SZ8(%rsp)
			
 
				-	vmovdqu	c, _digest+2*SZ8(%rsp)
			
 
				-	vmovdqu	d, _digest+3*SZ8(%rsp)
			
 
				-	vmovdqu	e, _digest+4*SZ8(%rsp)
			
 
				-	vmovdqu	f, _digest+5*SZ8(%rsp)
			
 
				-	vmovdqu	g, _digest+6*SZ8(%rsp)
			
 
				-	vmovdqu	h, _digest+7*SZ8(%rsp)
			
 
				-	i = 0
			
 
				-.rep 2
			
 
				-	VMOVPS	i*32(inp0, IDX), TT0
			
 
				-	VMOVPS	i*32(inp1, IDX), TT1
			
 
				-	VMOVPS	i*32(inp2, IDX), TT2
			
 
				-	VMOVPS	i*32(inp3, IDX), TT3
			
 
				-	VMOVPS	i*32(inp4, IDX), TT4
			
 
				-	VMOVPS	i*32(inp5, IDX), TT5
			
 
				-	VMOVPS	i*32(inp6, IDX), TT6
			
 
				-	VMOVPS	i*32(inp7, IDX), TT7
			
 
				-	vmovdqu	g, _ytmp(%rsp)
			
 
				-	vmovdqu	h, _ytmp+1*SZ8(%rsp)
			
 
				-	TRANSPOSE8	TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7,   TMP0, TMP1
			
 
				-	vmovdqu	PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
			
 
				-	vmovdqu	_ytmp(%rsp), g
			
 
				-	vpshufb	TMP1, TT0, TT0
			
 
				-	vpshufb	TMP1, TT1, TT1
			
 
				-	vpshufb	TMP1, TT2, TT2
			
 
				-	vpshufb	TMP1, TT3, TT3
			
 
				-	vpshufb	TMP1, TT4, TT4
			
 
				-	vpshufb	TMP1, TT5, TT5
			
 
				-	vpshufb	TMP1, TT6, TT6
			
 
				-	vpshufb	TMP1, TT7, TT7
			
 
				-	vmovdqu	_ytmp+1*SZ8(%rsp), h
			
 
				-	vmovdqu	TT4, _ytmp(%rsp)
			
 
				-	vmovdqu	TT5, _ytmp+1*SZ8(%rsp)
			
 
				-	vmovdqu	TT6, _ytmp+2*SZ8(%rsp)
			
 
				-	vmovdqu	TT7, _ytmp+3*SZ8(%rsp)
			
 
				-	ROUND_00_15	TT0,(i*8+0)
			
 
				-	vmovdqu	_ytmp(%rsp), TT0
			
 
				-	ROUND_00_15	TT1,(i*8+1)
			
 
				-	vmovdqu	_ytmp+1*SZ8(%rsp), TT1
			
 
				-	ROUND_00_15	TT2,(i*8+2)
			
 
				-	vmovdqu	_ytmp+2*SZ8(%rsp), TT2
			
 
				-	ROUND_00_15	TT3,(i*8+3)
			
 
				-	vmovdqu	_ytmp+3*SZ8(%rsp), TT3
			
 
				-	ROUND_00_15	TT0,(i*8+4)
			
 
				-	ROUND_00_15	TT1,(i*8+5)
			
 
				-	ROUND_00_15	TT2,(i*8+6)
			
 
				-	ROUND_00_15	TT3,(i*8+7)
			
 
				-	i = (i+1)
			
 
				-.endr
			
 
				-	add	$64, IDX
			
 
				-	i = (i*8)
			
 
				-
			
 
				-	jmp	Lrounds_16_xx
			
 
				-.align 16
			
 
				-Lrounds_16_xx:
			
 
				-.rep 16
			
 
				-	ROUND_16_XX	T1, i
			
 
				-	i = (i+1)
			
 
				-.endr
			
 
				-
			
 
				-	cmp	$ROUNDS,ROUND
			
 
				-	jb	Lrounds_16_xx
			
 
				-
			
 
				-	# add old digest
			
 
				-	vpaddd	_digest+0*SZ8(%rsp), a, a
			
 
				-	vpaddd	_digest+1*SZ8(%rsp), b, b
			
 
				-	vpaddd	_digest+2*SZ8(%rsp), c, c
			
 
				-	vpaddd	_digest+3*SZ8(%rsp), d, d
			
 
				-	vpaddd	_digest+4*SZ8(%rsp), e, e
			
 
				-	vpaddd	_digest+5*SZ8(%rsp), f, f
			
 
				-	vpaddd	_digest+6*SZ8(%rsp), g, g
			
 
				-	vpaddd	_digest+7*SZ8(%rsp), h, h
			
 
				-
			
 
				-	sub	$1, INP_SIZE  # unit is blocks
			
 
				-	jne	lloop
			
 
				-
			
 
				-	# write back to memory (state object) the transposed digest
			
 
				-	vmovdqu	a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-	vmovdqu	h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
			
 
				-
			
 
				-	# update input pointers
			
 
				-	add	IDX, inp0
			
 
				-	mov	inp0, _args_data_ptr+0*8(STATE)
			
 
				-	add	IDX, inp1
			
 
				-	mov	inp1, _args_data_ptr+1*8(STATE)
			
 
				-	add	IDX, inp2
			
 
				-	mov	inp2, _args_data_ptr+2*8(STATE)
			
 
				-	add	IDX, inp3
			
 
				-	mov	inp3, _args_data_ptr+3*8(STATE)
			
 
				-	add	IDX, inp4
			
 
				-	mov	inp4, _args_data_ptr+4*8(STATE)
			
 
				-	add	IDX, inp5
			
 
				-	mov	inp5, _args_data_ptr+5*8(STATE)
			
 
				-	add	IDX, inp6
			
 
				-	mov	inp6, _args_data_ptr+6*8(STATE)
			
 
				-	add	IDX, inp7
			
 
				-	mov	inp7, _args_data_ptr+7*8(STATE)
			
 
				-
			
 
				-	# Postamble
			
 
				-	mov	_rsp(%rsp), %rsp
			
 
				-
			
 
				-	# restore callee-saved clobbered registers
			
 
				-	pop     %r15
			
 
				-	pop     %r14
			
 
				-	pop     %r13
			
 
				-	pop     %r12
			
 
				-
			
 
				-	ret
			
 
				-ENDPROC(sha256_x8_avx2)
			
 
				-
			
 
				-.section	.rodata.K256_8, "a", @progbits
			
 
				-.align 64
			
 
				-K256_8:
			
 
				-	.octa	0x428a2f98428a2f98428a2f98428a2f98
			
 
				-	.octa	0x428a2f98428a2f98428a2f98428a2f98
			
 
				-	.octa	0x71374491713744917137449171374491
			
 
				-	.octa	0x71374491713744917137449171374491
			
 
				-	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
			
 
				-	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
			
 
				-	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
			
 
				-	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
			
 
				-	.octa	0x3956c25b3956c25b3956c25b3956c25b
			
 
				-	.octa	0x3956c25b3956c25b3956c25b3956c25b
			
 
				-	.octa	0x59f111f159f111f159f111f159f111f1
			
 
				-	.octa	0x59f111f159f111f159f111f159f111f1
			
 
				-	.octa	0x923f82a4923f82a4923f82a4923f82a4
			
 
				-	.octa	0x923f82a4923f82a4923f82a4923f82a4
			
 
				-	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
			
 
				-	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
			
 
				-	.octa	0xd807aa98d807aa98d807aa98d807aa98
			
 
				-	.octa	0xd807aa98d807aa98d807aa98d807aa98
			
 
				-	.octa	0x12835b0112835b0112835b0112835b01
			
 
				-	.octa	0x12835b0112835b0112835b0112835b01
			
 
				-	.octa	0x243185be243185be243185be243185be
			
 
				-	.octa	0x243185be243185be243185be243185be
			
 
				-	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
			
 
				-	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
			
 
				-	.octa	0x72be5d7472be5d7472be5d7472be5d74
			
 
				-	.octa	0x72be5d7472be5d7472be5d7472be5d74
			
 
				-	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
			
 
				-	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
			
 
				-	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
			
 
				-	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
			
 
				-	.octa	0xc19bf174c19bf174c19bf174c19bf174
			
 
				-	.octa	0xc19bf174c19bf174c19bf174c19bf174
			
 
				-	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
			
 
				-	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
			
 
				-	.octa	0xefbe4786efbe4786efbe4786efbe4786
			
 
				-	.octa	0xefbe4786efbe4786efbe4786efbe4786
			
 
				-	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
			
 
				-	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
			
 
				-	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
			
 
				-	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
			
 
				-	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
			
 
				-	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
			
 
				-	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
			
 
				-	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
			
 
				-	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
			
 
				-	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
			
 
				-	.octa	0x76f988da76f988da76f988da76f988da
			
 
				-	.octa	0x76f988da76f988da76f988da76f988da
			
 
				-	.octa	0x983e5152983e5152983e5152983e5152
			
 
				-	.octa	0x983e5152983e5152983e5152983e5152
			
 
				-	.octa	0xa831c66da831c66da831c66da831c66d
			
 
				-	.octa	0xa831c66da831c66da831c66da831c66d
			
 
				-	.octa	0xb00327c8b00327c8b00327c8b00327c8
			
 
				-	.octa	0xb00327c8b00327c8b00327c8b00327c8
			
 
				-	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
			
 
				-	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
			
 
				-	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
			
 
				-	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
			
 
				-	.octa	0xd5a79147d5a79147d5a79147d5a79147
			
 
				-	.octa	0xd5a79147d5a79147d5a79147d5a79147
			
 
				-	.octa	0x06ca635106ca635106ca635106ca6351
			
 
				-	.octa	0x06ca635106ca635106ca635106ca6351
			
 
				-	.octa	0x14292967142929671429296714292967
			
 
				-	.octa	0x14292967142929671429296714292967
			
 
				-	.octa	0x27b70a8527b70a8527b70a8527b70a85
			
 
				-	.octa	0x27b70a8527b70a8527b70a8527b70a85
			
 
				-	.octa	0x2e1b21382e1b21382e1b21382e1b2138
			
 
				-	.octa	0x2e1b21382e1b21382e1b21382e1b2138
			
 
				-	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
			
 
				-	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
			
 
				-	.octa	0x53380d1353380d1353380d1353380d13
			
 
				-	.octa	0x53380d1353380d1353380d1353380d13
			
 
				-	.octa	0x650a7354650a7354650a7354650a7354
			
 
				-	.octa	0x650a7354650a7354650a7354650a7354
			
 
				-	.octa	0x766a0abb766a0abb766a0abb766a0abb
			
 
				-	.octa	0x766a0abb766a0abb766a0abb766a0abb
			
 
				-	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
			
 
				-	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
			
 
				-	.octa	0x92722c8592722c8592722c8592722c85
			
 
				-	.octa	0x92722c8592722c8592722c8592722c85
			
 
				-	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
			
 
				-	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
			
 
				-	.octa	0xa81a664ba81a664ba81a664ba81a664b
			
 
				-	.octa	0xa81a664ba81a664ba81a664ba81a664b
			
 
				-	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
			
 
				-	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
			
 
				-	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
			
 
				-	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
			
 
				-	.octa	0xd192e819d192e819d192e819d192e819
			
 
				-	.octa	0xd192e819d192e819d192e819d192e819
			
 
				-	.octa	0xd6990624d6990624d6990624d6990624
			
 
				-	.octa	0xd6990624d6990624d6990624d6990624
			
 
				-	.octa	0xf40e3585f40e3585f40e3585f40e3585
			
 
				-	.octa	0xf40e3585f40e3585f40e3585f40e3585
			
 
				-	.octa	0x106aa070106aa070106aa070106aa070
			
 
				-	.octa	0x106aa070106aa070106aa070106aa070
			
 
				-	.octa	0x19a4c11619a4c11619a4c11619a4c116
			
 
				-	.octa	0x19a4c11619a4c11619a4c11619a4c116
			
 
				-	.octa	0x1e376c081e376c081e376c081e376c08
			
 
				-	.octa	0x1e376c081e376c081e376c081e376c08
			
 
				-	.octa	0x2748774c2748774c2748774c2748774c
			
 
				-	.octa	0x2748774c2748774c2748774c2748774c
			
 
				-	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
			
 
				-	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
			
 
				-	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
			
 
				-	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
			
 
				-	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
			
 
				-	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
			
 
				-	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
			
 
				-	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
			
 
				-	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
			
 
				-	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
			
 
				-	.octa	0x748f82ee748f82ee748f82ee748f82ee
			
 
				-	.octa	0x748f82ee748f82ee748f82ee748f82ee
			
 
				-	.octa	0x78a5636f78a5636f78a5636f78a5636f
			
 
				-	.octa	0x78a5636f78a5636f78a5636f78a5636f
			
 
				-	.octa	0x84c8781484c8781484c8781484c87814
			
 
				-	.octa	0x84c8781484c8781484c8781484c87814
			
 
				-	.octa	0x8cc702088cc702088cc702088cc70208
			
 
				-	.octa	0x8cc702088cc702088cc702088cc70208
			
 
				-	.octa	0x90befffa90befffa90befffa90befffa
			
 
				-	.octa	0x90befffa90befffa90befffa90befffa
			
 
				-	.octa	0xa4506ceba4506ceba4506ceba4506ceb
			
 
				-	.octa	0xa4506ceba4506ceba4506ceba4506ceb
			
 
				-	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
			
 
				-	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
			
 
				-	.octa	0xc67178f2c67178f2c67178f2c67178f2
			
 
				-	.octa	0xc67178f2c67178f2c67178f2c67178f2
			
 
				-
			
 
				-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-PSHUFFLE_BYTE_FLIP_MASK:
			
 
				-.octa 0x0c0d0e0f08090a0b0405060700010203
			
 
				-.octa 0x0c0d0e0f08090a0b0405060700010203
			
 
				-
			
 
				-.section	.rodata.cst256.K256, "aM", @progbits, 256
			
 
				-.align 64
			
 
				-.global K256
			
 
				-K256:
			
 
				-	.int	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
			
 
				-	.int	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
			
 
				-	.int	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
			
 
				-	.int	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
			
 
				-	.int	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
			
 
				-	.int	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
			
 
				-	.int	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
			
 
				-	.int	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
			
 
				-	.int	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
			
 
				-	.int	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
			
 
				-	.int	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
			
 
				-	.int	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
			
 
				-	.int	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
			
 
				-	.int	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
			
 
				-	.int	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
			
 
				-	.int	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
			
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ b/arch/x86/crypto/sha512-mb/Makefile
@@ -1,12 +0,0 @@
 
				-# SPDX-License-Identifier: GPL-2.0
			
 
				-#
			
 
				-# Arch-specific CryptoAPI modules.
			
 
				-#
			
 
				-
			
 
				-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
			
 
				-                                $(comma)4)$(comma)%ymm2,yes,no)
			
 
				-ifeq ($(avx2_supported),yes)
			
 
				-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
			
 
				-	sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
			
 
				-	     sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
			
 
				-endif
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -1,1047 +0,0 @@
 
				-/*
			
 
				- * Multi buffer SHA512 algorithm Glue Code
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of version 2 of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * Contact Information:
			
 
				- *	Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- * BSD LICENSE
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions
			
 
				- * are met:
			
 
				- *
			
 
				- *   * Redistributions of source code must retain the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer.
			
 
				- *   * Redistributions in binary form must reproduce the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer in
			
 
				- *     the documentation and/or other materials provided with the
			
 
				- *     distribution.
			
 
				- *   * Neither the name of Intel Corporation nor the names of its
			
 
				- *     contributors may be used to endorse or promote products derived
			
 
				- *     from this software without specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
			
 
				-
			
 
				-#include <crypto/internal/hash.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/mm.h>
			
 
				-#include <linux/cryptohash.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <crypto/scatterwalk.h>
			
 
				-#include <crypto/sha.h>
			
 
				-#include <crypto/mcryptd.h>
			
 
				-#include <crypto/crypto_wq.h>
			
 
				-#include <asm/byteorder.h>
			
 
				-#include <linux/hardirq.h>
			
 
				-#include <asm/fpu/api.h>
			
 
				-#include "sha512_mb_ctx.h"
			
 
				-
			
 
				-#define FLUSH_INTERVAL 1000 /* in usec */
			
 
				-
			
 
				-static struct mcryptd_alg_state sha512_mb_alg_state;
			
 
				-
			
 
				-struct sha512_mb_ctx {
			
 
				-	struct mcryptd_ahash *mcryptd_tfm;
			
 
				-};
			
 
				-
			
 
				-static inline struct mcryptd_hash_request_ctx
			
 
				-		*cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
			
 
				-{
			
 
				-	struct ahash_request *areq;
			
 
				-
			
 
				-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
			
 
				-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
			
 
				-}
			
 
				-
			
 
				-static inline struct ahash_request
			
 
				-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
			
 
				-{
			
 
				-	return container_of((void *) ctx, struct ahash_request, __ctx);
			
 
				-}
			
 
				-
			
 
				-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
			
 
				-				struct ahash_request *areq)
			
 
				-{
			
 
				-	rctx->flag = HASH_UPDATE;
			
 
				-}
			
 
				-
			
 
				-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
			
 
				-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
			
 
				-						(struct sha512_mb_mgr *state,
			
 
				-						struct job_sha512 *job);
			
 
				-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
			
 
				-						(struct sha512_mb_mgr *state);
			
 
				-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
			
 
				-						(struct sha512_mb_mgr *state);
			
 
				-
			
 
				-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
			
 
				-			 uint64_t total_len)
			
 
				-{
			
 
				-	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
			
 
				-
			
 
				-	memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
			
 
				-	padblock[i] = 0x80;
			
 
				-
			
 
				-	i += ((SHA512_BLOCK_SIZE - 1) &
			
 
				-	      (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
			
 
				-	     + 1 + SHA512_PADLENGTHFIELD_SIZE;
			
 
				-
			
 
				-#if SHA512_PADLENGTHFIELD_SIZE == 16
			
 
				-	*((uint64_t *) &padblock[i - 16]) = 0;
			
 
				-#endif
			
 
				-
			
 
				-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
			
 
				-
			
 
				-	/* Number of extra blocks to hash */
			
 
				-	return i >> SHA512_LOG2_BLOCK_SIZE;
			
 
				-}
			
 
				-
			
 
				-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
			
 
				-		(struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
			
 
				-{
			
 
				-	while (ctx) {
			
 
				-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
			
 
				-			/* Clear PROCESSING bit */
			
 
				-			ctx->status = HASH_CTX_STS_COMPLETE;
			
 
				-			return ctx;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra blocks are empty, begin hashing what remains
			
 
				-		 * in the user's buffer.
			
 
				-		 */
			
 
				-		if (ctx->partial_block_buffer_length == 0 &&
			
 
				-		    ctx->incoming_buffer_length) {
			
 
				-
			
 
				-			const void *buffer = ctx->incoming_buffer;
			
 
				-			uint32_t len = ctx->incoming_buffer_length;
			
 
				-			uint32_t copy_len;
			
 
				-
			
 
				-			/*
			
 
				-			 * Only entire blocks can be hashed.
			
 
				-			 * Copy remainder to extra blocks buffer.
			
 
				-			 */
			
 
				-			copy_len = len & (SHA512_BLOCK_SIZE-1);
			
 
				-
			
 
				-			if (copy_len) {
			
 
				-				len -= copy_len;
			
 
				-				memcpy(ctx->partial_block_buffer,
			
 
				-				       ((const char *) buffer + len),
			
 
				-				       copy_len);
			
 
				-				ctx->partial_block_buffer_length = copy_len;
			
 
				-			}
			
 
				-
			
 
				-			ctx->incoming_buffer_length = 0;
			
 
				-
			
 
				-			/* len should be a multiple of the block size now */
			
 
				-			assert((len % SHA512_BLOCK_SIZE) == 0);
			
 
				-
			
 
				-			/* Set len to the number of blocks to be hashed */
			
 
				-			len >>= SHA512_LOG2_BLOCK_SIZE;
			
 
				-
			
 
				-			if (len) {
			
 
				-
			
 
				-				ctx->job.buffer = (uint8_t *) buffer;
			
 
				-				ctx->job.len = len;
			
 
				-				ctx = (struct sha512_hash_ctx *)
			
 
				-					sha512_job_mgr_submit(&mgr->mgr,
			
 
				-					&ctx->job);
			
 
				-				continue;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If the extra blocks are not empty, then we are
			
 
				-		 * either on the last block(s) or we need more
			
 
				-		 * user input before continuing.
			
 
				-		 */
			
 
				-		if (ctx->status & HASH_CTX_STS_LAST) {
			
 
				-
			
 
				-			uint8_t *buf = ctx->partial_block_buffer;
			
 
				-			uint32_t n_extra_blocks =
			
 
				-					sha512_pad(buf, ctx->total_length);
			
 
				-
			
 
				-			ctx->status = (HASH_CTX_STS_PROCESSING |
			
 
				-				       HASH_CTX_STS_COMPLETE);
			
 
				-			ctx->job.buffer = buf;
			
 
				-			ctx->job.len = (uint32_t) n_extra_blocks;
			
 
				-			ctx = (struct sha512_hash_ctx *)
			
 
				-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (ctx)
			
 
				-			ctx->status = HASH_CTX_STS_IDLE;
			
 
				-		return ctx;
			
 
				-	}
			
 
				-
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static struct sha512_hash_ctx
			
 
				-		*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	/*
			
 
				-	 * If get_comp_job returns NULL, there are no jobs complete.
			
 
				-	 * If get_comp_job returns a job, verify that it is safe to return to
			
 
				-	 * the user.
			
 
				-	 * If it is not ready, resubmit the job to finish processing.
			
 
				-	 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
			
 
				-	 * returned.
			
 
				-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
			
 
				-	 * still need processing.
			
 
				-	 */
			
 
				-	struct sha512_ctx_mgr *mgr;
			
 
				-	struct sha512_hash_ctx *ctx;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	mgr = cstate->mgr;
			
 
				-	spin_lock_irqsave(&cstate->work_lock, flags);
			
 
				-	ctx = (struct sha512_hash_ctx *)
			
 
				-				sha512_job_mgr_get_comp_job(&mgr->mgr);
			
 
				-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
			
 
				-	spin_unlock_irqrestore(&cstate->work_lock, flags);
			
 
				-	return ctx;
			
 
				-}
			
 
				-
			
 
				-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
			
 
				-{
			
 
				-	sha512_job_mgr_init(&mgr->mgr);
			
 
				-}
			
 
				-
			
 
				-static struct sha512_hash_ctx
			
 
				-			*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
			
 
				-					  struct sha512_hash_ctx *ctx,
			
 
				-					  const void *buffer,
			
 
				-					  uint32_t len,
			
 
				-					  int flags)
			
 
				-{
			
 
				-	struct sha512_ctx_mgr *mgr;
			
 
				-	unsigned long irqflags;
			
 
				-
			
 
				-	mgr = cstate->mgr;
			
 
				-	spin_lock_irqsave(&cstate->work_lock, irqflags);
			
 
				-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
			
 
				-		/* User should not pass anything other than UPDATE or LAST */
			
 
				-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
			
 
				-		goto unlock;
			
 
				-	}
			
 
				-
			
 
				-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
			
 
				-		/* Cannot submit to a currently processing job. */
			
 
				-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
			
 
				-		goto unlock;
			
 
				-	}
			
 
				-
			
 
				-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
			
 
				-		/* Cannot update a finished job. */
			
 
				-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
			
 
				-		goto unlock;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If we made it here, there were no errors during this call to
			
 
				-	 * submit
			
 
				-	 */
			
 
				-	ctx->error = HASH_CTX_ERROR_NONE;
			
 
				-
			
 
				-	/* Store buffer ptr info from user */
			
 
				-	ctx->incoming_buffer = buffer;
			
 
				-	ctx->incoming_buffer_length = len;
			
 
				-
			
 
				-	/*
			
 
				-	 * Store the user's request flags and mark this ctx as currently being
			
 
				-	 * processed.
			
 
				-	 */
			
 
				-	ctx->status = (flags & HASH_LAST) ?
			
 
				-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
			
 
				-			HASH_CTX_STS_PROCESSING;
			
 
				-
			
 
				-	/* Advance byte counter */
			
 
				-	ctx->total_length += len;
			
 
				-
			
 
				-	/*
			
 
				-	 * If there is anything currently buffered in the extra blocks,
			
 
				-	 * append to it until it contains a whole block.
			
 
				-	 * Or if the user's buffer contains less than a whole block,
			
 
				-	 * append as much as possible to the extra block.
			
 
				-	 */
			
 
				-	if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
			
 
				-		/* Compute how many bytes to copy from user buffer into extra
			
 
				-		 * block
			
 
				-		 */
			
 
				-		uint32_t copy_len = SHA512_BLOCK_SIZE -
			
 
				-					ctx->partial_block_buffer_length;
			
 
				-		if (len < copy_len)
			
 
				-			copy_len = len;
			
 
				-
			
 
				-		if (copy_len) {
			
 
				-			/* Copy and update relevant pointers and counters */
			
 
				-			memcpy
			
 
				-		(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
			
 
				-				buffer, copy_len);
			
 
				-
			
 
				-			ctx->partial_block_buffer_length += copy_len;
			
 
				-			ctx->incoming_buffer = (const void *)
			
 
				-					((const char *)buffer + copy_len);
			
 
				-			ctx->incoming_buffer_length = len - copy_len;
			
 
				-		}
			
 
				-
			
 
				-		/* The extra block should never contain more than 1 block
			
 
				-		 * here
			
 
				-		 */
			
 
				-		assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
			
 
				-
			
 
				-		/* If the extra block buffer contains exactly 1 block, it can
			
 
				-		 * be hashed.
			
 
				-		 */
			
 
				-		if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
			
 
				-			ctx->partial_block_buffer_length = 0;
			
 
				-
			
 
				-			ctx->job.buffer = ctx->partial_block_buffer;
			
 
				-			ctx->job.len = 1;
			
 
				-			ctx = (struct sha512_hash_ctx *)
			
 
				-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
			
 
				-unlock:
			
 
				-	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
			
 
				-	return ctx;
			
 
				-}
			
 
				-
			
 
				-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	struct sha512_ctx_mgr *mgr;
			
 
				-	struct sha512_hash_ctx *ctx;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	mgr = cstate->mgr;
			
 
				-	spin_lock_irqsave(&cstate->work_lock, flags);
			
 
				-	while (1) {
			
 
				-		ctx = (struct sha512_hash_ctx *)
			
 
				-					sha512_job_mgr_flush(&mgr->mgr);
			
 
				-
			
 
				-		/* If flush returned 0, there are no more jobs in flight. */
			
 
				-		if (!ctx)
			
 
				-			break;
			
 
				-
			
 
				-		/*
			
 
				-		 * If flush returned a job, resubmit the job to finish
			
 
				-		 * processing.
			
 
				-		 */
			
 
				-		ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
			
 
				-
			
 
				-		/*
			
 
				-		 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
			
 
				-		 * be returned. Otherwise, all jobs currently being managed by
			
 
				-		 * the sha512_ctx_mgr still need processing. Loop.
			
 
				-		 */
			
 
				-		if (ctx)
			
 
				-			break;
			
 
				-	}
			
 
				-	spin_unlock_irqrestore(&cstate->work_lock, flags);
			
 
				-	return ctx;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_init(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	hash_ctx_init(sctx);
			
 
				-	sctx->job.result_digest[0] = SHA512_H0;
			
 
				-	sctx->job.result_digest[1] = SHA512_H1;
			
 
				-	sctx->job.result_digest[2] = SHA512_H2;
			
 
				-	sctx->job.result_digest[3] = SHA512_H3;
			
 
				-	sctx->job.result_digest[4] = SHA512_H4;
			
 
				-	sctx->job.result_digest[5] = SHA512_H5;
			
 
				-	sctx->job.result_digest[6] = SHA512_H6;
			
 
				-	sctx->job.result_digest[7] = SHA512_H7;
			
 
				-	sctx->total_length = 0;
			
 
				-	sctx->partial_block_buffer_length = 0;
			
 
				-	sctx->status = HASH_CTX_STS_IDLE;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
			
 
				-{
			
 
				-	int	i;
			
 
				-	struct	sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
			
 
				-	__be64	*dst = (__be64 *) rctx->out;
			
 
				-
			
 
				-	for (i = 0; i < 8; ++i)
			
 
				-		dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
			
 
				-			struct mcryptd_alg_cstate *cstate, bool flush)
			
 
				-{
			
 
				-	int	flag = HASH_UPDATE;
			
 
				-	int	nbytes, err = 0;
			
 
				-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
			
 
				-	struct sha512_hash_ctx *sha_ctx;
			
 
				-
			
 
				-	/* more work ? */
			
 
				-	while (!(rctx->flag & HASH_DONE)) {
			
 
				-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
			
 
				-		if (nbytes < 0) {
			
 
				-			err = nbytes;
			
 
				-			goto out;
			
 
				-		}
			
 
				-		/* check if the walk is done */
			
 
				-		if (crypto_ahash_walk_last(&rctx->walk)) {
			
 
				-			rctx->flag |= HASH_DONE;
			
 
				-			if (rctx->flag & HASH_FINAL)
			
 
				-				flag |= HASH_LAST;
			
 
				-
			
 
				-		}
			
 
				-		sha_ctx = (struct sha512_hash_ctx *)
			
 
				-						ahash_request_ctx(&rctx->areq);
			
 
				-		kernel_fpu_begin();
			
 
				-		sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
			
 
				-						rctx->walk.data, nbytes, flag);
			
 
				-		if (!sha_ctx) {
			
 
				-			if (flush)
			
 
				-				sha_ctx = sha512_ctx_mgr_flush(cstate);
			
 
				-		}
			
 
				-		kernel_fpu_end();
			
 
				-		if (sha_ctx)
			
 
				-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		else {
			
 
				-			rctx = NULL;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/* copy the results */
			
 
				-	if (rctx->flag & HASH_FINAL)
			
 
				-		sha512_mb_set_results(rctx);
			
 
				-
			
 
				-out:
			
 
				-	*ret_rctx = rctx;
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
			
 
				-			    struct mcryptd_alg_cstate *cstate,
			
 
				-			    int err)
			
 
				-{
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha512_hash_ctx *sha_ctx;
			
 
				-	struct mcryptd_hash_request_ctx *req_ctx;
			
 
				-	int ret;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/* remove from work list */
			
 
				-	spin_lock_irqsave(&cstate->work_lock, flags);
			
 
				-	list_del(&rctx->waiter);
			
 
				-	spin_unlock_irqrestore(&cstate->work_lock, flags);
			
 
				-
			
 
				-	if (irqs_disabled())
			
 
				-		rctx->complete(&req->base, err);
			
 
				-	else {
			
 
				-		local_bh_disable();
			
 
				-		rctx->complete(&req->base, err);
			
 
				-		local_bh_enable();
			
 
				-	}
			
 
				-
			
 
				-	/* check to see if there are other jobs that are done */
			
 
				-	sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
			
 
				-	while (sha_ctx) {
			
 
				-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		ret = sha_finish_walk(&req_ctx, cstate, false);
			
 
				-		if (req_ctx) {
			
 
				-			spin_lock_irqsave(&cstate->work_lock, flags);
			
 
				-			list_del(&req_ctx->waiter);
			
 
				-			spin_unlock_irqrestore(&cstate->work_lock, flags);
			
 
				-
			
 
				-			req = cast_mcryptd_ctx_to_req(req_ctx);
			
 
				-			if (irqs_disabled())
			
 
				-				req_ctx->complete(&req->base, ret);
			
 
				-			else {
			
 
				-				local_bh_disable();
			
 
				-				req_ctx->complete(&req->base, ret);
			
 
				-				local_bh_enable();
			
 
				-			}
			
 
				-		}
			
 
				-		sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
			
 
				-			     struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	unsigned long next_flush;
			
 
				-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/* initialize tag */
			
 
				-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
			
 
				-	rctx->tag.seq_num = cstate->next_seq_num++;
			
 
				-	next_flush = rctx->tag.arrival + delay;
			
 
				-	rctx->tag.expire = next_flush;
			
 
				-
			
 
				-	spin_lock_irqsave(&cstate->work_lock, flags);
			
 
				-	list_add_tail(&rctx->waiter, &cstate->work_list);
			
 
				-	spin_unlock_irqrestore(&cstate->work_lock, flags);
			
 
				-
			
 
				-	mcryptd_arm_flusher(cstate, delay);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_update(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-			container_of(areq, struct mcryptd_hash_request_ctx,
			
 
				-									areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha512_hash_ctx *sha_ctx;
			
 
				-	int ret = 0, nbytes;
			
 
				-
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
			
 
				-
			
 
				-	if (nbytes < 0) {
			
 
				-		ret = nbytes;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	if (crypto_ahash_walk_last(&rctx->walk))
			
 
				-		rctx->flag |= HASH_DONE;
			
 
				-
			
 
				-	/* submit */
			
 
				-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	sha512_mb_add_list(rctx, cstate);
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
			
 
				-							nbytes, HASH_UPDATE);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_finup(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-			container_of(areq, struct mcryptd_hash_request_ctx,
			
 
				-									areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
			
 
				-	struct sha512_hash_ctx *sha_ctx;
			
 
				-	int ret = 0, flag = HASH_UPDATE, nbytes;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
			
 
				-
			
 
				-	if (nbytes < 0) {
			
 
				-		ret = nbytes;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	if (crypto_ahash_walk_last(&rctx->walk)) {
			
 
				-		rctx->flag |= HASH_DONE;
			
 
				-		flag = HASH_LAST;
			
 
				-	}
			
 
				-
			
 
				-	/* submit */
			
 
				-	rctx->flag |= HASH_FINAL;
			
 
				-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	sha512_mb_add_list(rctx, cstate);
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
			
 
				-								nbytes, flag);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_final(struct ahash_request *areq)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx =
			
 
				-			container_of(areq, struct mcryptd_hash_request_ctx,
			
 
				-									areq);
			
 
				-	struct mcryptd_alg_cstate *cstate =
			
 
				-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
			
 
				-
			
 
				-	struct sha512_hash_ctx *sha_ctx;
			
 
				-	int ret = 0;
			
 
				-	u8 data;
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	if (rctx->tag.cpu != smp_processor_id()) {
			
 
				-		pr_err("mcryptd error: cpu clash\n");
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	/* need to init context */
			
 
				-	req_ctx_init(rctx, areq);
			
 
				-
			
 
				-	rctx->flag |= HASH_DONE | HASH_FINAL;
			
 
				-
			
 
				-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
			
 
				-	/* flag HASH_FINAL and 0 data size */
			
 
				-	sha512_mb_add_list(rctx, cstate);
			
 
				-	kernel_fpu_begin();
			
 
				-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	/* check if anything is returned */
			
 
				-	if (!sha_ctx)
			
 
				-		return -EINPROGRESS;
			
 
				-
			
 
				-	if (sha_ctx->error) {
			
 
				-		ret = sha_ctx->error;
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		goto done;
			
 
				-	}
			
 
				-
			
 
				-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-	ret = sha_finish_walk(&rctx, cstate, false);
			
 
				-	if (!rctx)
			
 
				-		return -EINPROGRESS;
			
 
				-done:
			
 
				-	sha_complete_job(rctx, cstate, ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_export(struct ahash_request *areq, void *out)
			
 
				-{
			
 
				-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	memcpy(out, sctx, sizeof(*sctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_import(struct ahash_request *areq, const void *in)
			
 
				-{
			
 
				-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
			
 
				-
			
 
				-	memcpy(sctx, in, sizeof(*sctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct mcryptd_ahash *mcryptd_tfm;
			
 
				-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	struct mcryptd_hash_ctx *mctx;
			
 
				-
			
 
				-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
			
 
				-						CRYPTO_ALG_INTERNAL,
			
 
				-						CRYPTO_ALG_INTERNAL);
			
 
				-	if (IS_ERR(mcryptd_tfm))
			
 
				-		return PTR_ERR(mcryptd_tfm);
			
 
				-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
			
 
				-	mctx->alg_state = &sha512_mb_alg_state;
			
 
				-	ctx->mcryptd_tfm = mcryptd_tfm;
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				sizeof(struct ahash_request) +
			
 
				-				crypto_ahash_reqsize(&mcryptd_tfm->base));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	mcryptd_free_ahash(ctx->mcryptd_tfm);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				sizeof(struct ahash_request) +
			
 
				-				sizeof(struct sha512_hash_ctx));
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	mcryptd_free_ahash(ctx->mcryptd_tfm);
			
 
				-}
			
 
				-
			
 
				-static struct ahash_alg sha512_mb_areq_alg = {
			
 
				-	.init		=	sha512_mb_init,
			
 
				-	.update		=	sha512_mb_update,
			
 
				-	.final		=	sha512_mb_final,
			
 
				-	.finup		=	sha512_mb_finup,
			
 
				-	.export		=	sha512_mb_export,
			
 
				-	.import		=	sha512_mb_import,
			
 
				-	.halg		=	{
			
 
				-	.digestsize	=	SHA512_DIGEST_SIZE,
			
 
				-	.statesize	=	sizeof(struct sha512_hash_ctx),
			
 
				-	.base		=	{
			
 
				-			.cra_name	 = "__sha512-mb",
			
 
				-			.cra_driver_name = "__intel_sha512-mb",
			
 
				-			.cra_priority	 = 100,
			
 
				-			/*
			
 
				-			 * use ASYNC flag as some buffers in multi-buffer
			
 
				-			 * algo may not have completed before hashing thread
			
 
				-			 * sleep
			
 
				-			 */
			
 
				-			.cra_flags	= CRYPTO_ALG_ASYNC |
			
 
				-					  CRYPTO_ALG_INTERNAL,
			
 
				-			.cra_blocksize	= SHA512_BLOCK_SIZE,
			
 
				-			.cra_module	= THIS_MODULE,
			
 
				-			.cra_list	= LIST_HEAD_INIT
			
 
				-					(sha512_mb_areq_alg.halg.base.cra_list),
			
 
				-			.cra_init	= sha512_mb_areq_init_tfm,
			
 
				-			.cra_exit	= sha512_mb_areq_exit_tfm,
			
 
				-			.cra_ctxsize	= sizeof(struct sha512_hash_ctx),
			
 
				-		}
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static int sha512_mb_async_init(struct ahash_request *req)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_init(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_update(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_update(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_finup(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_finup(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_final(struct ahash_request *req)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_final(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_digest(struct ahash_request *req)
			
 
				-{
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_digest(mcryptd_req);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_export(struct ahash_request *req, void *out)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	return crypto_ahash_export(mcryptd_req, out);
			
 
				-}
			
 
				-
			
 
				-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
			
 
				-{
			
 
				-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
			
 
				-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
			
 
				-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
			
 
				-	struct mcryptd_hash_request_ctx *rctx;
			
 
				-	struct ahash_request *areq;
			
 
				-
			
 
				-	memcpy(mcryptd_req, req, sizeof(*req));
			
 
				-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
			
 
				-	rctx = ahash_request_ctx(mcryptd_req);
			
 
				-
			
 
				-	areq = &rctx->areq;
			
 
				-
			
 
				-	ahash_request_set_tfm(areq, child);
			
 
				-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-					rctx->complete, req);
			
 
				-
			
 
				-	return crypto_ahash_import(mcryptd_req, in);
			
 
				-}
			
 
				-
			
 
				-static struct ahash_alg sha512_mb_async_alg = {
			
 
				-	.init           = sha512_mb_async_init,
			
 
				-	.update         = sha512_mb_async_update,
			
 
				-	.final          = sha512_mb_async_final,
			
 
				-	.finup          = sha512_mb_async_finup,
			
 
				-	.digest         = sha512_mb_async_digest,
			
 
				-	.export		= sha512_mb_async_export,
			
 
				-	.import		= sha512_mb_async_import,
			
 
				-	.halg = {
			
 
				-		.digestsize     = SHA512_DIGEST_SIZE,
			
 
				-		.statesize      = sizeof(struct sha512_hash_ctx),
			
 
				-		.base = {
			
 
				-			.cra_name               = "sha512",
			
 
				-			.cra_driver_name        = "sha512_mb",
			
 
				-			/*
			
 
				-			 * Low priority, since with few concurrent hash requests
			
 
				-			 * this is extremely slow due to the flush delay.  Users
			
 
				-			 * whose workloads would benefit from this can request
			
 
				-			 * it explicitly by driver name, or can increase its
			
 
				-			 * priority at runtime using NETLINK_CRYPTO.
			
 
				-			 */
			
 
				-			.cra_priority           = 50,
			
 
				-			.cra_flags              = CRYPTO_ALG_ASYNC,
			
 
				-			.cra_blocksize          = SHA512_BLOCK_SIZE,
			
 
				-			.cra_module             = THIS_MODULE,
			
 
				-			.cra_list               = LIST_HEAD_INIT
			
 
				-				(sha512_mb_async_alg.halg.base.cra_list),
			
 
				-			.cra_init               = sha512_mb_async_init_tfm,
			
 
				-			.cra_exit               = sha512_mb_async_exit_tfm,
			
 
				-			.cra_ctxsize		= sizeof(struct sha512_mb_ctx),
			
 
				-			.cra_alignmask		= 0,
			
 
				-		},
			
 
				-	},
			
 
				-};
			
 
				-
			
 
				-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx;
			
 
				-	unsigned long cur_time;
			
 
				-	unsigned long next_flush = 0;
			
 
				-	struct sha512_hash_ctx *sha_ctx;
			
 
				-
			
 
				-
			
 
				-	cur_time = jiffies;
			
 
				-
			
 
				-	while (!list_empty(&cstate->work_list)) {
			
 
				-		rctx = list_entry(cstate->work_list.next,
			
 
				-				struct mcryptd_hash_request_ctx, waiter);
			
 
				-		if time_before(cur_time, rctx->tag.expire)
			
 
				-			break;
			
 
				-		kernel_fpu_begin();
			
 
				-		sha_ctx = (struct sha512_hash_ctx *)
			
 
				-					sha512_ctx_mgr_flush(cstate);
			
 
				-		kernel_fpu_end();
			
 
				-		if (!sha_ctx) {
			
 
				-			pr_err("sha512_mb error: nothing got flushed for"
			
 
				-							" non-empty list\n");
			
 
				-			break;
			
 
				-		}
			
 
				-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
			
 
				-		sha_finish_walk(&rctx, cstate, true);
			
 
				-		sha_complete_job(rctx, cstate, 0);
			
 
				-	}
			
 
				-
			
 
				-	if (!list_empty(&cstate->work_list)) {
			
 
				-		rctx = list_entry(cstate->work_list.next,
			
 
				-				struct mcryptd_hash_request_ctx, waiter);
			
 
				-		/* get the hash context and then flush time */
			
 
				-		next_flush = rctx->tag.expire;
			
 
				-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
			
 
				-	}
			
 
				-	return next_flush;
			
 
				-}
			
 
				-
			
 
				-static int __init sha512_mb_mod_init(void)
			
 
				-{
			
 
				-
			
 
				-	int cpu;
			
 
				-	int err;
			
 
				-	struct mcryptd_alg_cstate *cpu_state;
			
 
				-
			
 
				-	/* check for dependent cpu features */
			
 
				-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
			
 
				-	    !boot_cpu_has(X86_FEATURE_BMI2))
			
 
				-		return -ENODEV;
			
 
				-
			
 
				-	/* initialize multibuffer structures */
			
 
				-	sha512_mb_alg_state.alg_cstate =
			
 
				-				alloc_percpu(struct mcryptd_alg_cstate);
			
 
				-
			
 
				-	sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
			
 
				-	sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
			
 
				-	sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
			
 
				-	sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
			
 
				-
			
 
				-	if (!sha512_mb_alg_state.alg_cstate)
			
 
				-		return -ENOMEM;
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
			
 
				-		cpu_state->next_flush = 0;
			
 
				-		cpu_state->next_seq_num = 0;
			
 
				-		cpu_state->flusher_engaged = false;
			
 
				-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
			
 
				-		cpu_state->cpu = cpu;
			
 
				-		cpu_state->alg_state = &sha512_mb_alg_state;
			
 
				-		cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
			
 
				-								GFP_KERNEL);
			
 
				-		if (!cpu_state->mgr)
			
 
				-			goto err2;
			
 
				-		sha512_ctx_mgr_init(cpu_state->mgr);
			
 
				-		INIT_LIST_HEAD(&cpu_state->work_list);
			
 
				-		spin_lock_init(&cpu_state->work_lock);
			
 
				-	}
			
 
				-	sha512_mb_alg_state.flusher = &sha512_mb_flusher;
			
 
				-
			
 
				-	err = crypto_register_ahash(&sha512_mb_areq_alg);
			
 
				-	if (err)
			
 
				-		goto err2;
			
 
				-	err = crypto_register_ahash(&sha512_mb_async_alg);
			
 
				-	if (err)
			
 
				-		goto err1;
			
 
				-
			
 
				-
			
 
				-	return 0;
			
 
				-err1:
			
 
				-	crypto_unregister_ahash(&sha512_mb_areq_alg);
			
 
				-err2:
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
			
 
				-		kfree(cpu_state->mgr);
			
 
				-	}
			
 
				-	free_percpu(sha512_mb_alg_state.alg_cstate);
			
 
				-	return -ENODEV;
			
 
				-}
			
 
				-
			
 
				-static void __exit sha512_mb_mod_fini(void)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	struct mcryptd_alg_cstate *cpu_state;
			
 
				-
			
 
				-	crypto_unregister_ahash(&sha512_mb_async_alg);
			
 
				-	crypto_unregister_ahash(&sha512_mb_areq_alg);
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
			
 
				-		kfree(cpu_state->mgr);
			
 
				-	}
			
 
				-	free_percpu(sha512_mb_alg_state.alg_cstate);
			
 
				-}
			
 
				-
			
 
				-module_init(sha512_mb_mod_init);
			
 
				-module_exit(sha512_mb_mod_fini);
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
			
 
				-
			
 
				-MODULE_ALIAS("sha512");
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -1,128 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA512 context
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _SHA_MB_CTX_INTERNAL_H
			
 
				-#define _SHA_MB_CTX_INTERNAL_H
			
 
				-
			
 
				-#include "sha512_mb_mgr.h"
			
 
				-
			
 
				-#define HASH_UPDATE          0x00
			
 
				-#define HASH_LAST            0x01
			
 
				-#define HASH_DONE            0x02
			
 
				-#define HASH_FINAL           0x04
			
 
				-
			
 
				-#define HASH_CTX_STS_IDLE       0x00
			
 
				-#define HASH_CTX_STS_PROCESSING 0x01
			
 
				-#define HASH_CTX_STS_LAST       0x02
			
 
				-#define HASH_CTX_STS_COMPLETE   0x04
			
 
				-
			
 
				-enum hash_ctx_error {
			
 
				-	HASH_CTX_ERROR_NONE               =  0,
			
 
				-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
			
 
				-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
			
 
				-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
			
 
				-};
			
 
				-
			
 
				-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
			
 
				-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
			
 
				-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
			
 
				-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
			
 
				-#define hash_ctx_status(ctx)     ((ctx)->status)
			
 
				-#define hash_ctx_error(ctx)      ((ctx)->error)
			
 
				-#define hash_ctx_init(ctx) \
			
 
				-	do { \
			
 
				-		(ctx)->error = HASH_CTX_ERROR_NONE; \
			
 
				-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
			
 
				-	} while (0)
			
 
				-
			
 
				-/* Hash Constants and Typedefs */
			
 
				-#define SHA512_DIGEST_LENGTH          8
			
 
				-#define SHA512_LOG2_BLOCK_SIZE        7
			
 
				-
			
 
				-#define SHA512_PADLENGTHFIELD_SIZE    16
			
 
				-
			
 
				-#ifdef SHA_MB_DEBUG
			
 
				-#define assert(expr) \
			
 
				-do { \
			
 
				-	if (unlikely(!(expr))) { \
			
 
				-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
			
 
				-		#expr, __FILE__, __func__, __LINE__); \
			
 
				-	} \
			
 
				-} while (0)
			
 
				-#else
			
 
				-#define assert(expr) do {} while (0)
			
 
				-#endif
			
 
				-
			
 
				-struct sha512_ctx_mgr {
			
 
				-	struct sha512_mb_mgr mgr;
			
 
				-};
			
 
				-
			
 
				-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
			
 
				-
			
 
				-struct sha512_hash_ctx {
			
 
				-	/* Must be at struct offset 0 */
			
 
				-	struct job_sha512       job;
			
 
				-	/* status flag */
			
 
				-	int status;
			
 
				-	/* error flag */
			
 
				-	int error;
			
 
				-
			
 
				-	uint64_t        total_length;
			
 
				-	const void      *incoming_buffer;
			
 
				-	uint32_t        incoming_buffer_length;
			
 
				-	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
			
 
				-	uint32_t        partial_block_buffer_length;
			
 
				-	void            *user_data;
			
 
				-};
			
 
				-
			
 
				-#endif
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
@@ -1,104 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA512 algorithm manager
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef __SHA_MB_MGR_H
			
 
				-#define __SHA_MB_MGR_H
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-
			
 
				-#define NUM_SHA512_DIGEST_WORDS 8
			
 
				-
			
 
				-enum job_sts {STS_UNKNOWN = 0,
			
 
				-	STS_BEING_PROCESSED = 1,
			
 
				-	STS_COMPLETED =       2,
			
 
				-	STS_INTERNAL_ERROR = 3,
			
 
				-	STS_ERROR = 4
			
 
				-};
			
 
				-
			
 
				-struct job_sha512 {
			
 
				-	u8  *buffer;
			
 
				-	u64  len;
			
 
				-	u64  result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
			
 
				-	enum job_sts status;
			
 
				-	void   *user_data;
			
 
				-};
			
 
				-
			
 
				-struct sha512_args_x4 {
			
 
				-	uint64_t        digest[8][4];
			
 
				-	uint8_t         *data_ptr[4];
			
 
				-};
			
 
				-
			
 
				-struct sha512_lane_data {
			
 
				-	struct job_sha512 *job_in_lane;
			
 
				-};
			
 
				-
			
 
				-struct sha512_mb_mgr {
			
 
				-	struct sha512_args_x4 args;
			
 
				-
			
 
				-	uint64_t lens[4];
			
 
				-
			
 
				-	/* each byte is index (0...7) of unused lanes */
			
 
				-	uint64_t unused_lanes;
			
 
				-	/* byte 4 is set to FF as a flag */
			
 
				-	struct sha512_lane_data ldata[4];
			
 
				-};
			
 
				-
			
 
				-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
			
 
				-
			
 
				-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
			
 
				-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
			
 
				-						struct job_sha512 *job);
			
 
				-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
			
 
				-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
			
 
				-
			
 
				-#endif
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
@@ -1,281 +0,0 @@
 
				-/*
			
 
				- * Header file for multi buffer SHA256 algorithm data structure
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  This program is free software; you can redistribute it and/or modify
			
 
				- *  it under the terms of version 2 of the GNU General Public License as
			
 
				- *  published by the Free Software Foundation.
			
 
				- *
			
 
				- *  This program is distributed in the hope that it will be useful, but
			
 
				- *  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- *  General Public License for more details.
			
 
				- *
			
 
				- *  Contact Information:
			
 
				- *      Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- *  BSD LICENSE
			
 
				- *
			
 
				- *  Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- *  Redistribution and use in source and binary forms, with or without
			
 
				- *  modification, are permitted provided that the following conditions
			
 
				- *  are met:
			
 
				- *
			
 
				- *    * Redistributions of source code must retain the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer.
			
 
				- *    * Redistributions in binary form must reproduce the above copyright
			
 
				- *      notice, this list of conditions and the following disclaimer in
			
 
				- *      the documentation and/or other materials provided with the
			
 
				- *      distribution.
			
 
				- *    * Neither the name of Intel Corporation nor the names of its
			
 
				- *      contributors may be used to endorse or promote products derived
			
 
				- *      from this software without specific prior written permission.
			
 
				- *
			
 
				- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-# Macros for defining data structures
			
 
				-
			
 
				-# Usage example
			
 
				-
			
 
				-#START_FIELDS   # JOB_AES
			
 
				-###     name            size    align
			
 
				-#FIELD  _plaintext,     8,      8       # pointer to plaintext
			
 
				-#FIELD  _ciphertext,    8,      8       # pointer to ciphertext
			
 
				-#FIELD  _IV,            16,     8       # IV
			
 
				-#FIELD  _keys,          8,      8       # pointer to keys
			
 
				-#FIELD  _len,           4,      4       # length in bytes
			
 
				-#FIELD  _status,        4,      4       # status enumeration
			
 
				-#FIELD  _user_data,     8,      8       # pointer to user data
			
 
				-#UNION  _union,         size1,  align1, \
			
 
				-#                       size2,  align2, \
			
 
				-#                       size3,  align3, \
			
 
				-#                       ...
			
 
				-#END_FIELDS
			
 
				-#%assign _JOB_AES_size  _FIELD_OFFSET
			
 
				-#%assign _JOB_AES_align _STRUCT_ALIGN
			
 
				-
			
 
				-#########################################################################
			
 
				-
			
 
				-# Alternate "struc-like" syntax:
			
 
				-#       STRUCT job_aes2
			
 
				-#       RES_Q   .plaintext,     1
			
 
				-#       RES_Q   .ciphertext,    1
			
 
				-#       RES_DQ  .IV,            1
			
 
				-#       RES_B   .nested,        _JOB_AES_SIZE, _JOB_AES_ALIGN
			
 
				-#       RES_U   .union,         size1, align1, \
			
 
				-#                               size2, align2, \
			
 
				-#                               ...
			
 
				-#       ENDSTRUCT
			
 
				-#       # Following only needed if nesting
			
 
				-#       %assign job_aes2_size   _FIELD_OFFSET
			
 
				-#       %assign job_aes2_align  _STRUCT_ALIGN
			
 
				-#
			
 
				-# RES_* macros take a name, a count and an optional alignment.
			
 
				-# The count in in terms of the base size of the macro, and the
			
 
				-# default alignment is the base size.
			
 
				-# The macros are:
			
 
				-# Macro    Base size
			
 
				-# RES_B     1
			
 
				-# RES_W     2
			
 
				-# RES_D     4
			
 
				-# RES_Q     8
			
 
				-# RES_DQ   16
			
 
				-# RES_Y    32
			
 
				-# RES_Z    64
			
 
				-#
			
 
				-# RES_U defines a union. It's arguments are a name and two or more
			
 
				-# pairs of "size, alignment"
			
 
				-#
			
 
				-# The two assigns are only needed if this structure is being nested
			
 
				-# within another. Even if the assigns are not done, one can still use
			
 
				-# STRUCT_NAME_size as the size of the structure.
			
 
				-#
			
 
				-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
			
 
				-#
			
 
				-# The differences between this and using "struc" directly are that each
			
 
				-# type is implicitly aligned to its natural length (although this can be
			
 
				-# over-ridden with an explicit third parameter), and that the structure
			
 
				-# is padded at the end to its overall alignment.
			
 
				-#
			
 
				-
			
 
				-#########################################################################
			
 
				-
			
 
				-#ifndef _DATASTRUCT_ASM_
			
 
				-#define _DATASTRUCT_ASM_
			
 
				-
			
 
				-#define PTR_SZ                  8
			
 
				-#define SHA512_DIGEST_WORD_SIZE 8
			
 
				-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
			
 
				-#define NUM_SHA512_DIGEST_WORDS 8
			
 
				-#define SZ4                     4*SHA512_DIGEST_WORD_SIZE
			
 
				-#define ROUNDS                  80*SZ4
			
 
				-#define SHA512_DIGEST_ROW_SIZE  (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
			
 
				-
			
 
				-# START_FIELDS
			
 
				-.macro START_FIELDS
			
 
				- _FIELD_OFFSET = 0
			
 
				- _STRUCT_ALIGN = 0
			
 
				-.endm
			
 
				-
			
 
				-# FIELD name size align
			
 
				-.macro FIELD name size align
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
			
 
				- \name  = _FIELD_OFFSET
			
 
				- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
			
 
				-.if (\align > _STRUCT_ALIGN)
			
 
				- _STRUCT_ALIGN = \align
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-# END_FIELDS
			
 
				-.macro END_FIELDS
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
			
 
				-.endm
			
 
				-
			
 
				-.macro STRUCT p1
			
 
				-START_FIELDS
			
 
				-.struc \p1
			
 
				-.endm
			
 
				-
			
 
				-.macro ENDSTRUCT
			
 
				- tmp = _FIELD_OFFSET
			
 
				- END_FIELDS
			
 
				- tmp = (_FIELD_OFFSET - ##tmp)
			
 
				-.if (tmp > 0)
			
 
				-        .lcomm  tmp
			
 
				-.endm
			
 
				-
			
 
				-## RES_int name size align
			
 
				-.macro RES_int p1 p2 p3
			
 
				- name = \p1
			
 
				- size = \p2
			
 
				- align = .\p3
			
 
				-
			
 
				- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
			
 
				-.align align
			
 
				-.lcomm name size
			
 
				- _FIELD_OFFSET = _FIELD_OFFSET + (size)
			
 
				-.if (align > _STRUCT_ALIGN)
			
 
				- _STRUCT_ALIGN = align
			
 
				-.endif
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_B name, size [, align]
			
 
				-.macro RES_B _name, _size, _align=1
			
 
				-RES_int _name _size _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_W name, size [, align]
			
 
				-.macro RES_W _name, _size, _align=2
			
 
				-RES_int _name 2*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_D name, size [, align]
			
 
				-.macro RES_D _name, _size, _align=4
			
 
				-RES_int _name 4*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Q name, size [, align]
			
 
				-.macro RES_Q _name, _size, _align=8
			
 
				-RES_int _name 8*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_DQ name, size [, align]
			
 
				-.macro RES_DQ _name, _size, _align=16
			
 
				-RES_int _name 16*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Y name, size [, align]
			
 
				-.macro RES_Y _name, _size, _align=32
			
 
				-RES_int _name 32*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-# macro RES_Z name, size [, align]
			
 
				-.macro RES_Z _name, _size, _align=64
			
 
				-RES_int _name 64*(_size) _align
			
 
				-.endm
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-###################################################################
			
 
				-### Define SHA512 Out Of Order Data Structures
			
 
				-###################################################################
			
 
				-
			
 
				-START_FIELDS    # LANE_DATA
			
 
				-###     name            size    align
			
 
				-FIELD   _job_in_lane,   8,      8       # pointer to job object
			
 
				-END_FIELDS
			
 
				-
			
 
				- _LANE_DATA_size = _FIELD_OFFSET
			
 
				- _LANE_DATA_align = _STRUCT_ALIGN
			
 
				-
			
 
				-####################################################################
			
 
				-
			
 
				-START_FIELDS    # SHA512_ARGS_X4
			
 
				-###     name            size    align
			
 
				-FIELD   _digest,        8*8*4,  4      # transposed digest
			
 
				-FIELD   _data_ptr,      8*4,    8       # array of pointers to data
			
 
				-END_FIELDS
			
 
				-
			
 
				- _SHA512_ARGS_X4_size  =  _FIELD_OFFSET
			
 
				- _SHA512_ARGS_X4_align =  _STRUCT_ALIGN
			
 
				-
			
 
				-#####################################################################
			
 
				-
			
 
				-START_FIELDS    # MB_MGR
			
 
				-###     name            size    align
			
 
				-FIELD   _args,          _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
			
 
				-FIELD   _lens,          8*4,    8
			
 
				-FIELD   _unused_lanes,  8,      8
			
 
				-FIELD   _ldata,         _LANE_DATA_size*4, _LANE_DATA_align
			
 
				-END_FIELDS
			
 
				-
			
 
				- _MB_MGR_size  =  _FIELD_OFFSET
			
 
				- _MB_MGR_align =  _STRUCT_ALIGN
			
 
				-
			
 
				-_args_digest = _args + _digest
			
 
				-_args_data_ptr = _args + _data_ptr
			
 
				-
			
 
				-#######################################################################
			
 
				-
			
 
				-#######################################################################
			
 
				-#### Define constants
			
 
				-#######################################################################
			
 
				-
			
 
				-#define STS_UNKNOWN             0
			
 
				-#define STS_BEING_PROCESSED     1
			
 
				-#define STS_COMPLETED           2
			
 
				-
			
 
				-#######################################################################
			
 
				-#### Define JOB_SHA512 structure
			
 
				-#######################################################################
			
 
				-
			
 
				-START_FIELDS    # JOB_SHA512
			
 
				-###     name                            size    align
			
 
				-FIELD   _buffer,                        8,      8       # pointer to buffer
			
 
				-FIELD   _len,                           8,      8       # length in bytes
			
 
				-FIELD   _result_digest,                 8*8,    32      # Digest (output)
			
 
				-FIELD   _status,                        4,      4
			
 
				-FIELD   _user_data,                     8,      8
			
 
				-END_FIELDS
			
 
				-
			
 
				- _JOB_SHA512_size = _FIELD_OFFSET
			
 
				- _JOB_SHA512_align = _STRUCT_ALIGN
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
@@ -1,297 +0,0 @@
 
				-/*
			
 
				- * Flush routine for SHA512 multibuffer
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of version 2 of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * Contact Information:
			
 
				- *     Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- * BSD LICENSE
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions
			
 
				- * are met:
			
 
				- *
			
 
				- *   * Redistributions of source code must retain the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer.
			
 
				- *   * Redistributions in binary form must reproduce the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer in
			
 
				- *     the documentation and/or other materials provided with the
			
 
				- *     distribution.
			
 
				- *   * Neither the name of Intel Corporation nor the names of its
			
 
				- *     contributors may be used to endorse or promote products derived
			
 
				- *     from this software without specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/frame.h>
			
 
				-#include "sha512_mb_mgr_datastruct.S"
			
 
				-
			
 
				-.extern sha512_x4_avx2
			
 
				-
			
 
				-# LINUX register definitions
			
 
				-#define arg1    %rdi
			
 
				-#define arg2    %rsi
			
 
				-
			
 
				-# idx needs to be other than arg1, arg2, rbx, r12
			
 
				-#define idx     %rdx
			
 
				-
			
 
				-# Common definitions
			
 
				-#define state   arg1
			
 
				-#define job     arg2
			
 
				-#define len2    arg2
			
 
				-
			
 
				-#define unused_lanes    %rbx
			
 
				-#define lane_data       %rbx
			
 
				-#define tmp2            %rbx
			
 
				-
			
 
				-#define job_rax         %rax
			
 
				-#define tmp1            %rax
			
 
				-#define size_offset     %rax
			
 
				-#define tmp             %rax
			
 
				-#define start_offset    %rax
			
 
				-
			
 
				-#define tmp3            arg1
			
 
				-
			
 
				-#define extra_blocks    arg2
			
 
				-#define p               arg2
			
 
				-
			
 
				-#define tmp4            %r8
			
 
				-#define lens0           %r8
			
 
				-
			
 
				-#define lens1           %r9
			
 
				-#define lens2           %r10
			
 
				-#define lens3           %r11
			
 
				-
			
 
				-.macro LABEL prefix n
			
 
				-\prefix\n\():
			
 
				-.endm
			
 
				-
			
 
				-.macro JNE_SKIP i
			
 
				-jne     skip_\i
			
 
				-.endm
			
 
				-
			
 
				-.altmacro
			
 
				-.macro SET_OFFSET _offset
			
 
				-offset = \_offset
			
 
				-.endm
			
 
				-.noaltmacro
			
 
				-
			
 
				-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
			
 
				-# arg 1 : rcx : state
			
 
				-ENTRY(sha512_mb_mgr_flush_avx2)
			
 
				-	FRAME_BEGIN
			
 
				-	push	%rbx
			
 
				-
			
 
				-	# If bit (32+3) is set, then all lanes are empty
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-        bt      $32+7, unused_lanes
			
 
				-        jc      return_null
			
 
				-
			
 
				-        # find a lane with a non-null job
			
 
				-	xor     idx, idx
			
 
				-        offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
			
 
				-        cmpq    $0, offset(state)
			
 
				-        cmovne  one(%rip), idx
			
 
				-        offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
			
 
				-        cmpq    $0, offset(state)
			
 
				-        cmovne  two(%rip), idx
			
 
				-        offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
			
 
				-        cmpq    $0, offset(state)
			
 
				-        cmovne  three(%rip), idx
			
 
				-
			
 
				-        # copy idx to empty lanes
			
 
				-copy_lane_data:
			
 
				-	offset =  (_args + _data_ptr)
			
 
				-        mov     offset(state,idx,8), tmp
			
 
				-
			
 
				-        I = 0
			
 
				-.rep 4
			
 
				-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
			
 
				-        cmpq    $0, offset(state)
			
 
				-.altmacro
			
 
				-        JNE_SKIP %I
			
 
				-        offset =  (_args + _data_ptr + 8*I)
			
 
				-        mov     tmp, offset(state)
			
 
				-        offset =  (_lens + 8*I +4)
			
 
				-        movl    $0xFFFFFFFF, offset(state)
			
 
				-LABEL skip_ %I
			
 
				-        I = (I+1)
			
 
				-.noaltmacro
			
 
				-.endr
			
 
				-
			
 
				-        # Find min length
			
 
				-        mov     _lens + 0*8(state),lens0
			
 
				-        mov     lens0,idx
			
 
				-        mov     _lens + 1*8(state),lens1
			
 
				-        cmp     idx,lens1
			
 
				-        cmovb   lens1,idx
			
 
				-        mov     _lens + 2*8(state),lens2
			
 
				-        cmp     idx,lens2
			
 
				-        cmovb   lens2,idx
			
 
				-        mov     _lens + 3*8(state),lens3
			
 
				-        cmp     idx,lens3
			
 
				-        cmovb   lens3,idx
			
 
				-        mov     idx,len2
			
 
				-        and     $0xF,idx
			
 
				-        and     $~0xFF,len2
			
 
				-	jz      len_is_0
			
 
				-
			
 
				-        sub     len2, lens0
			
 
				-        sub     len2, lens1
			
 
				-        sub     len2, lens2
			
 
				-        sub     len2, lens3
			
 
				-        shr     $32,len2
			
 
				-        mov     lens0, _lens + 0*8(state)
			
 
				-        mov     lens1, _lens + 1*8(state)
			
 
				-        mov     lens2, _lens + 2*8(state)
			
 
				-        mov     lens3, _lens + 3*8(state)
			
 
				-
			
 
				-        # "state" and "args" are the same address, arg1
			
 
				-        # len is arg2
			
 
				-        call    sha512_x4_avx2
			
 
				-        # state and idx are intact
			
 
				-
			
 
				-len_is_0:
			
 
				-        # process completed job "idx"
			
 
				-	imul    $_LANE_DATA_size, idx, lane_data
			
 
				-        lea     _ldata(state, lane_data), lane_data
			
 
				-
			
 
				-        mov     _job_in_lane(lane_data), job_rax
			
 
				-        movq    $0,  _job_in_lane(lane_data)
			
 
				-        movl    $STS_COMPLETED, _status(job_rax)
			
 
				-        mov     _unused_lanes(state), unused_lanes
			
 
				-        shl     $8, unused_lanes
			
 
				-        or      idx, unused_lanes
			
 
				-        mov     unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
			
 
				-
			
 
				-	vmovq _args_digest+0*32(state, idx, 8), %xmm0
			
 
				-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
			
 
				-	vmovq _args_digest+2*32(state, idx, 8), %xmm1
			
 
				-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
			
 
				-	vmovq _args_digest+4*32(state, idx, 8), %xmm2
			
 
				-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
			
 
				-	vmovq _args_digest+6*32(state, idx, 8), %xmm3
			
 
				-	vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
			
 
				-
			
 
				-	vmovdqu %xmm0, _result_digest(job_rax)
			
 
				-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
			
 
				-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
			
 
				-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
			
 
				-
			
 
				-return:
			
 
				-	pop	%rbx
			
 
				-	FRAME_END
			
 
				-        ret
			
 
				-
			
 
				-return_null:
			
 
				-        xor     job_rax, job_rax
			
 
				-        jmp     return
			
 
				-ENDPROC(sha512_mb_mgr_flush_avx2)
			
 
				-.align 16
			
 
				-
			
 
				-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
			
 
				-        push    %rbx
			
 
				-
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-        bt      $(32+7), unused_lanes
			
 
				-        jc      .return_null
			
 
				-
			
 
				-        # Find min length
			
 
				-        mov     _lens(state),lens0
			
 
				-        mov     lens0,idx
			
 
				-        mov     _lens+1*8(state),lens1
			
 
				-        cmp     idx,lens1
			
 
				-        cmovb   lens1,idx
			
 
				-        mov     _lens+2*8(state),lens2
			
 
				-        cmp     idx,lens2
			
 
				-        cmovb   lens2,idx
			
 
				-        mov     _lens+3*8(state),lens3
			
 
				-        cmp     idx,lens3
			
 
				-        cmovb   lens3,idx
			
 
				-        test    $~0xF,idx
			
 
				-        jnz     .return_null
			
 
				-        and     $0xF,idx
			
 
				-
			
 
				-        #process completed job "idx"
			
 
				-	imul    $_LANE_DATA_size, idx, lane_data
			
 
				-        lea     _ldata(state, lane_data), lane_data
			
 
				-
			
 
				-        mov     _job_in_lane(lane_data), job_rax
			
 
				-        movq    $0,  _job_in_lane(lane_data)
			
 
				-        movl    $STS_COMPLETED, _status(job_rax)
			
 
				-        mov     _unused_lanes(state), unused_lanes
			
 
				-        shl     $8, unused_lanes
			
 
				-        or      idx, unused_lanes
			
 
				-        mov     unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-        movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
			
 
				-
			
 
				-	vmovq   _args_digest(state, idx, 8), %xmm0
			
 
				-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
			
 
				-	vmovq    _args_digest+2*32(state, idx, 8), %xmm1
			
 
				-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
			
 
				-	vmovq    _args_digest+4*32(state, idx, 8), %xmm2
			
 
				-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
			
 
				-        vmovq    _args_digest+6*32(state, idx, 8), %xmm3
			
 
				-        vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
			
 
				-
			
 
				-	vmovdqu %xmm0, _result_digest+0*16(job_rax)
			
 
				-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
			
 
				-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
			
 
				-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
			
 
				-
			
 
				-	pop     %rbx
			
 
				-
			
 
				-        ret
			
 
				-
			
 
				-.return_null:
			
 
				-        xor     job_rax, job_rax
			
 
				-	pop     %rbx
			
 
				-        ret
			
 
				-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
			
 
				-
			
 
				-.section	.rodata.cst8.one, "aM", @progbits, 8
			
 
				-.align 8
			
 
				-one:
			
 
				-.quad  1
			
 
				-
			
 
				-.section	.rodata.cst8.two, "aM", @progbits, 8
			
 
				-.align 8
			
 
				-two:
			
 
				-.quad  2
			
 
				-
			
 
				-.section	.rodata.cst8.three, "aM", @progbits, 8
			
 
				-.align 8
			
 
				-three:
			
 
				-.quad  3
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -1,69 +0,0 @@
 
				-/*
			
 
				- * Initialization code for multi buffer SHA256 algorithm for AVX2
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of version 2 of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * Contact Information:
			
 
				- *     Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- * BSD LICENSE
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions
			
 
				- * are met:
			
 
				- *
			
 
				- *   * Redistributions of source code must retain the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer.
			
 
				- *   * Redistributions in binary form must reproduce the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer in
			
 
				- *     the documentation and/or other materials provided with the
			
 
				- *     distribution.
			
 
				- *   * Neither the name of Intel Corporation nor the names of its
			
 
				- *     contributors may be used to endorse or promote products derived
			
 
				- *     from this software without specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "sha512_mb_mgr.h"
			
 
				-
			
 
				-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
			
 
				-{
			
 
				-	unsigned int j;
			
 
				-
			
 
				-	/* initially all lanes are unused */
			
 
				-	state->lens[0] = 0xFFFFFFFF00000000;
			
 
				-	state->lens[1] = 0xFFFFFFFF00000001;
			
 
				-	state->lens[2] = 0xFFFFFFFF00000002;
			
 
				-	state->lens[3] = 0xFFFFFFFF00000003;
			
 
				-
			
 
				-	state->unused_lanes = 0xFF03020100;
			
 
				-	for (j = 0; j < 4; j++)
			
 
				-		state->ldata[j].job_in_lane = NULL;
			
 
				-}
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
@@ -1,224 +0,0 @@
 
				-/*
			
 
				- * Buffer submit code for multi buffer SHA512 algorithm
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of version 2 of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * Contact Information:
			
 
				- *     Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- * BSD LICENSE
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions
			
 
				- * are met:
			
 
				- *
			
 
				- *   * Redistributions of source code must retain the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer.
			
 
				- *   * Redistributions in binary form must reproduce the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer in
			
 
				- *     the documentation and/or other materials provided with the
			
 
				- *     distribution.
			
 
				- *   * Neither the name of Intel Corporation nor the names of its
			
 
				- *     contributors may be used to endorse or promote products derived
			
 
				- *     from this software without specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include <asm/frame.h>
			
 
				-#include "sha512_mb_mgr_datastruct.S"
			
 
				-
			
 
				-.extern sha512_x4_avx2
			
 
				-
			
 
				-#define arg1    %rdi
			
 
				-#define arg2    %rsi
			
 
				-
			
 
				-#define idx             %rdx
			
 
				-#define last_len        %rdx
			
 
				-
			
 
				-#define size_offset     %rcx
			
 
				-#define tmp2            %rcx
			
 
				-
			
 
				-# Common definitions
			
 
				-#define state   arg1
			
 
				-#define job     arg2
			
 
				-#define len2    arg2
			
 
				-#define p2      arg2
			
 
				-
			
 
				-#define p               %r11
			
 
				-#define start_offset    %r11
			
 
				-
			
 
				-#define unused_lanes    %rbx
			
 
				-
			
 
				-#define job_rax         %rax
			
 
				-#define len             %rax
			
 
				-
			
 
				-#define lane            %r12
			
 
				-#define tmp3            %r12
			
 
				-#define lens3           %r12
			
 
				-
			
 
				-#define extra_blocks    %r8
			
 
				-#define lens0           %r8
			
 
				-
			
 
				-#define tmp             %r9
			
 
				-#define lens1           %r9
			
 
				-
			
 
				-#define lane_data       %r10
			
 
				-#define lens2           %r10
			
 
				-
			
 
				-#define DWORD_len %eax
			
 
				-
			
 
				-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
			
 
				-# arg 1 : rcx : state
			
 
				-# arg 2 : rdx : job
			
 
				-ENTRY(sha512_mb_mgr_submit_avx2)
			
 
				-	FRAME_BEGIN
			
 
				-	push	%rbx
			
 
				-	push	%r12
			
 
				-
			
 
				-        mov     _unused_lanes(state), unused_lanes
			
 
				-        movzb     %bl,lane
			
 
				-        shr     $8, unused_lanes
			
 
				-        imul    $_LANE_DATA_size, lane,lane_data
			
 
				-        movl    $STS_BEING_PROCESSED, _status(job)
			
 
				-	lea     _ldata(state, lane_data), lane_data
			
 
				-        mov     unused_lanes, _unused_lanes(state)
			
 
				-        movl    _len(job),  DWORD_len
			
 
				-
			
 
				-	mov     job, _job_in_lane(lane_data)
			
 
				-        movl    DWORD_len,_lens+4(state , lane, 8)
			
 
				-
			
 
				-	# Load digest words from result_digest
			
 
				-	vmovdqu	_result_digest+0*16(job), %xmm0
			
 
				-	vmovdqu _result_digest+1*16(job), %xmm1
			
 
				-	vmovdqu	_result_digest+2*16(job), %xmm2
			
 
				-        vmovdqu	_result_digest+3*16(job), %xmm3
			
 
				-
			
 
				-	vmovq    %xmm0, _args_digest(state, lane, 8)
			
 
				-	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
			
 
				-	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
			
 
				-	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
			
 
				-	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
			
 
				-	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
			
 
				-	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
			
 
				-	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
			
 
				-
			
 
				-	mov     _buffer(job), p
			
 
				-	mov     p, _args_data_ptr(state, lane, 8)
			
 
				-
			
 
				-	cmp     $0xFF, unused_lanes
			
 
				-	jne     return_null
			
 
				-
			
 
				-start_loop:
			
 
				-
			
 
				-	# Find min length
			
 
				-	mov     _lens+0*8(state),lens0
			
 
				-	mov     lens0,idx
			
 
				-	mov     _lens+1*8(state),lens1
			
 
				-	cmp     idx,lens1
			
 
				-	cmovb   lens1, idx
			
 
				-	mov     _lens+2*8(state),lens2
			
 
				-	cmp     idx,lens2
			
 
				-	cmovb   lens2,idx
			
 
				-	mov     _lens+3*8(state),lens3
			
 
				-	cmp     idx,lens3
			
 
				-	cmovb   lens3,idx
			
 
				-	mov     idx,len2
			
 
				-	and     $0xF,idx
			
 
				-	and     $~0xFF,len2
			
 
				-	jz      len_is_0
			
 
				-
			
 
				-	sub     len2,lens0
			
 
				-	sub     len2,lens1
			
 
				-	sub     len2,lens2
			
 
				-	sub     len2,lens3
			
 
				-	shr     $32,len2
			
 
				-	mov     lens0, _lens + 0*8(state)
			
 
				-	mov     lens1, _lens + 1*8(state)
			
 
				-	mov     lens2, _lens + 2*8(state)
			
 
				-	mov     lens3, _lens + 3*8(state)
			
 
				-
			
 
				-	# "state" and "args" are the same address, arg1
			
 
				-	# len is arg2
			
 
				-	call    sha512_x4_avx2
			
 
				-	# state and idx are intact
			
 
				-
			
 
				-len_is_0:
			
 
				-
			
 
				-	# process completed job "idx"
			
 
				-	imul    $_LANE_DATA_size, idx, lane_data
			
 
				-	lea     _ldata(state, lane_data), lane_data
			
 
				-
			
 
				-	mov     _job_in_lane(lane_data), job_rax
			
 
				-	mov     _unused_lanes(state), unused_lanes
			
 
				-	movq    $0, _job_in_lane(lane_data)
			
 
				-	movl    $STS_COMPLETED, _status(job_rax)
			
 
				-	shl     $8, unused_lanes
			
 
				-	or      idx, unused_lanes
			
 
				-	mov     unused_lanes, _unused_lanes(state)
			
 
				-
			
 
				-	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
			
 
				-	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
			
 
				-	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
			
 
				-	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
			
 
				-	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
			
 
				-	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
			
 
				-	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
			
 
				-	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
			
 
				-	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
			
 
				-
			
 
				-	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
			
 
				-	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
			
 
				-	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
			
 
				-	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
			
 
				-
			
 
				-return:
			
 
				-	pop	%r12
			
 
				-	pop	%rbx
			
 
				-	FRAME_END
			
 
				-	ret
			
 
				-
			
 
				-return_null:
			
 
				-	xor     job_rax, job_rax
			
 
				-	jmp     return
			
 
				-ENDPROC(sha512_mb_mgr_submit_avx2)
			
 
				-
			
 
				-/* UNUSED?
			
 
				-.section	.rodata.cst16, "aM", @progbits, 16
			
 
				-.align 16
			
 
				-H0:     .int  0x6a09e667
			
 
				-H1:     .int  0xbb67ae85
			
 
				-H2:     .int  0x3c6ef372
			
 
				-H3:     .int  0xa54ff53a
			
 
				-H4:     .int  0x510e527f
			
 
				-H5:     .int  0x9b05688c
			
 
				-H6:     .int  0x1f83d9ab
			
 
				-H7:     .int  0x5be0cd19
			
 
				-*/
			
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
@@ -1,531 +0,0 @@
 
				-/*
			
 
				- * Multi-buffer SHA512 algorithm hash compute routine
			
 
				- *
			
 
				- * This file is provided under a dual BSD/GPLv2 license.  When using or
			
 
				- * redistributing this file, you may do so under either license.
			
 
				- *
			
 
				- * GPL LICENSE SUMMARY
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of version 2 of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * Contact Information:
			
 
				- *     Megha Dey <megha.dey@linux.intel.com>
			
 
				- *
			
 
				- * BSD LICENSE
			
 
				- *
			
 
				- * Copyright(c) 2016 Intel Corporation.
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions
			
 
				- * are met:
			
 
				- *
			
 
				- *   * Redistributions of source code must retain the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer.
			
 
				- *   * Redistributions in binary form must reproduce the above copyright
			
 
				- *     notice, this list of conditions and the following disclaimer in
			
 
				- *     the documentation and/or other materials provided with the
			
 
				- *     distribution.
			
 
				- *   * Neither the name of Intel Corporation nor the names of its
			
 
				- *     contributors may be used to endorse or promote products derived
			
 
				- *     from this software without specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-# code to compute quad SHA512 using AVX2
			
 
				-# use YMMs to tackle the larger digest size
			
 
				-# outer calling routine takes care of save and restore of XMM registers
			
 
				-# Logic designed/laid out by JDG
			
 
				-
			
 
				-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
			
 
				-# Stack must be aligned to 32 bytes before call
			
 
				-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
			
 
				-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
			
 
				-# clobbers ymm0-15
			
 
				-
			
 
				-#include <linux/linkage.h>
			
 
				-#include "sha512_mb_mgr_datastruct.S"
			
 
				-
			
 
				-arg1 = %rdi
			
 
				-arg2 = %rsi
			
 
				-
			
 
				-# Common definitions
			
 
				-STATE = arg1
			
 
				-INP_SIZE = arg2
			
 
				-
			
 
				-IDX = %rax
			
 
				-ROUND = %rbx
			
 
				-TBL = %r8
			
 
				-
			
 
				-inp0 = %r9
			
 
				-inp1 = %r10
			
 
				-inp2 = %r11
			
 
				-inp3 = %r12
			
 
				-
			
 
				-a = %ymm0
			
 
				-b = %ymm1
			
 
				-c = %ymm2
			
 
				-d = %ymm3
			
 
				-e = %ymm4
			
 
				-f = %ymm5
			
 
				-g = %ymm6
			
 
				-h = %ymm7
			
 
				-
			
 
				-a0 = %ymm8
			
 
				-a1 = %ymm9
			
 
				-a2 = %ymm10
			
 
				-
			
 
				-TT0 = %ymm14
			
 
				-TT1 = %ymm13
			
 
				-TT2 = %ymm12
			
 
				-TT3 = %ymm11
			
 
				-TT4 = %ymm10
			
 
				-TT5 = %ymm9
			
 
				-
			
 
				-T1 = %ymm14
			
 
				-TMP = %ymm15
			
 
				-
			
 
				-# Define stack usage
			
 
				-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
			
 
				-
			
 
				-#define VMOVPD	vmovupd
			
 
				-_digest = SZ4*16
			
 
				-
			
 
				-# transpose r0, r1, r2, r3, t0, t1
			
 
				-# "transpose" data in {r0..r3} using temps {t0..t3}
			
 
				-# Input looks like: {r0 r1 r2 r3}
			
 
				-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
			
 
				-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
			
 
				-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
			
 
				-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
			
 
				-#
			
 
				-# output looks like: {t0 r1 r0 r3}
			
 
				-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
			
 
				-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
			
 
				-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
			
 
				-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
			
 
				-
			
 
				-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
			
 
				-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
			
 
				-        vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
			
 
				-        vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
			
 
				-        vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
			
 
				-
			
 
				-	vperm2f128      $0x20, \r2, \r0, \r1  # h6...a6
			
 
				-        vperm2f128      $0x31, \r2, \r0, \r3  # h2...a2
			
 
				-        vperm2f128      $0x31, \t1, \t0, \r0  # h5...a5
			
 
				-        vperm2f128      $0x20, \t1, \t0, \t0  # h1...a1
			
 
				-.endm
			
 
				-
			
 
				-.macro ROTATE_ARGS
			
 
				-TMP_ = h
			
 
				-h = g
			
 
				-g = f
			
 
				-f = e
			
 
				-e = d
			
 
				-d = c
			
 
				-c = b
			
 
				-b = a
			
 
				-a = TMP_
			
 
				-.endm
			
 
				-
			
 
				-# PRORQ reg, imm, tmp
			
 
				-# packed-rotate-right-double
			
 
				-# does a rotate by doing two shifts and an or
			
 
				-.macro _PRORQ reg imm tmp
			
 
				-	vpsllq	$(64-\imm),\reg,\tmp
			
 
				-	vpsrlq	$\imm,\reg, \reg
			
 
				-	vpor	\tmp,\reg, \reg
			
 
				-.endm
			
 
				-
			
 
				-# non-destructive
			
 
				-# PRORQ_nd reg, imm, tmp, src
			
 
				-.macro _PRORQ_nd reg imm tmp src
			
 
				-	vpsllq	$(64-\imm), \src, \tmp
			
 
				-	vpsrlq	$\imm, \src, \reg
			
 
				-	vpor	\tmp, \reg, \reg
			
 
				-.endm
			
 
				-
			
 
				-# PRORQ dst/src, amt
			
 
				-.macro PRORQ reg imm
			
 
				-	_PRORQ	\reg, \imm, TMP
			
 
				-.endm
			
 
				-
			
 
				-# PRORQ_nd dst, src, amt
			
 
				-.macro PRORQ_nd reg tmp imm
			
 
				-	_PRORQ_nd	\reg, \imm, TMP, \tmp
			
 
				-.endm
			
 
				-
			
 
				-#; arguments passed implicitly in preprocessor symbols i, a...h
			
 
				-.macro ROUND_00_15 _T1 i
			
 
				-	PRORQ_nd a0, e, (18-14)	# sig1: a0 = (e >> 4)
			
 
				-
			
 
				-	vpxor   g, f, a2        # ch: a2 = f^g
			
 
				-        vpand   e,a2, a2                # ch: a2 = (f^g)&e
			
 
				-        vpxor   g, a2, a2               # a2 = ch
			
 
				-
			
 
				-        PRORQ_nd        a1,e,41         # sig1: a1 = (e >> 25)
			
 
				-
			
 
				-        offset = SZ4*(\i & 0xf)
			
 
				-        vmovdqu \_T1,offset(%rsp)
			
 
				-        vpaddq  (TBL,ROUND,1), \_T1, \_T1       # T1 = W + K
			
 
				-        vpxor   e,a0, a0        # sig1: a0 = e ^ (e >> 5)
			
 
				-        PRORQ   a0, 14           # sig1: a0 = (e >> 6) ^ (e >> 11)
			
 
				-        vpaddq  a2, h, h        # h = h + ch
			
 
				-        PRORQ_nd        a2,a,6  # sig0: a2 = (a >> 11)
			
 
				-        vpaddq  \_T1,h, h       # h = h + ch + W + K
			
 
				-        vpxor   a1, a0, a0      # a0 = sigma1
			
 
				-	vmovdqu a,\_T1
			
 
				-        PRORQ_nd        a1,a,39 # sig0: a1 = (a >> 22)
			
 
				-        vpxor   c, \_T1, \_T1      # maj: T1 = a^c
			
 
				-        add     $SZ4, ROUND     # ROUND++
			
 
				-        vpand   b, \_T1, \_T1   # maj: T1 = (a^c)&b
			
 
				-        vpaddq  a0, h, h
			
 
				-        vpaddq  h, d, d
			
 
				-        vpxor   a, a2, a2       # sig0: a2 = a ^ (a >> 11)
			
 
				-        PRORQ   a2,28            # sig0: a2 = (a >> 2) ^ (a >> 13)
			
 
				-        vpxor   a1, a2, a2      # a2 = sig0
			
 
				-        vpand   c, a, a1        # maj: a1 = a&c
			
 
				-        vpor    \_T1, a1, a1    # a1 = maj
			
 
				-        vpaddq  a1, h, h        # h = h + ch + W + K + maj
			
 
				-        vpaddq  a2, h, h        # h = h + ch + W + K + maj + sigma0
			
 
				-        ROTATE_ARGS
			
 
				-.endm
			
 
				-
			
 
				-
			
 
				-#; arguments passed implicitly in preprocessor symbols i, a...h
			
 
				-.macro ROUND_16_XX _T1 i
			
 
				-	vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
			
 
				-        vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
			
 
				-        vmovdqu \_T1, a0
			
 
				-        PRORQ   \_T1,7
			
 
				-        vmovdqu a1, a2
			
 
				-        PRORQ   a1,42
			
 
				-        vpxor   a0, \_T1, \_T1
			
 
				-        PRORQ   \_T1, 1
			
 
				-        vpxor   a2, a1, a1
			
 
				-        PRORQ   a1, 19
			
 
				-        vpsrlq  $7, a0, a0
			
 
				-        vpxor   a0, \_T1, \_T1
			
 
				-        vpsrlq  $6, a2, a2
			
 
				-        vpxor   a2, a1, a1
			
 
				-        vpaddq  SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
			
 
				-        vpaddq  SZ4*((\i-7)&0xf)(%rsp), a1, a1
			
 
				-        vpaddq  a1, \_T1, \_T1
			
 
				-
			
 
				-        ROUND_00_15 \_T1,\i
			
 
				-.endm
			
 
				-
			
 
				-
			
 
				-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
			
 
				-# arg 1 : STATE    : pointer to input data
			
 
				-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
			
 
				-ENTRY(sha512_x4_avx2)
			
 
				-	# general registers preserved in outer calling routine
			
 
				-	# outer calling routine saves all the XMM registers
			
 
				-	# save callee-saved clobbered registers to comply with C function ABI
			
 
				-	push    %r12
			
 
				-	push    %r13
			
 
				-	push    %r14
			
 
				-	push    %r15
			
 
				-
			
 
				-	sub     $STACK_SPACE1, %rsp
			
 
				-
			
 
				-        # Load the pre-transposed incoming digest.
			
 
				-        vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
			
 
				-        vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
			
 
				-        vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
			
 
				-        vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
			
 
				-        vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
			
 
				-        vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
			
 
				-        vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
			
 
				-        vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
			
 
				-
			
 
				-        lea     K512_4(%rip),TBL
			
 
				-
			
 
				-        # load the address of each of the 4 message lanes
			
 
				-        # getting ready to transpose input onto stack
			
 
				-        mov     _data_ptr+0*PTR_SZ(STATE),inp0
			
 
				-        mov     _data_ptr+1*PTR_SZ(STATE),inp1
			
 
				-        mov     _data_ptr+2*PTR_SZ(STATE),inp2
			
 
				-        mov     _data_ptr+3*PTR_SZ(STATE),inp3
			
 
				-
			
 
				-        xor     IDX, IDX
			
 
				-lloop:
			
 
				-        xor     ROUND, ROUND
			
 
				-
			
 
				-	# save old digest
			
 
				-        vmovdqu a, _digest(%rsp)
			
 
				-        vmovdqu b, _digest+1*SZ4(%rsp)
			
 
				-        vmovdqu c, _digest+2*SZ4(%rsp)
			
 
				-        vmovdqu d, _digest+3*SZ4(%rsp)
			
 
				-        vmovdqu e, _digest+4*SZ4(%rsp)
			
 
				-        vmovdqu f, _digest+5*SZ4(%rsp)
			
 
				-        vmovdqu g, _digest+6*SZ4(%rsp)
			
 
				-        vmovdqu h, _digest+7*SZ4(%rsp)
			
 
				-        i = 0
			
 
				-.rep 4
			
 
				-	vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
			
 
				-        VMOVPD  i*32(inp0, IDX), TT2
			
 
				-        VMOVPD  i*32(inp1, IDX), TT1
			
 
				-        VMOVPD  i*32(inp2, IDX), TT4
			
 
				-        VMOVPD  i*32(inp3, IDX), TT3
			
 
				-	TRANSPOSE	TT2, TT1, TT4, TT3, TT0, TT5
			
 
				-	vpshufb	TMP, TT0, TT0
			
 
				-	vpshufb	TMP, TT1, TT1
			
 
				-	vpshufb	TMP, TT2, TT2
			
 
				-	vpshufb	TMP, TT3, TT3
			
 
				-	ROUND_00_15	TT0,(i*4+0)
			
 
				-	ROUND_00_15	TT1,(i*4+1)
			
 
				-	ROUND_00_15	TT2,(i*4+2)
			
 
				-	ROUND_00_15	TT3,(i*4+3)
			
 
				-	i = (i+1)
			
 
				-.endr
			
 
				-        add     $128, IDX
			
 
				-
			
 
				-        i = (i*4)
			
 
				-
			
 
				-        jmp     Lrounds_16_xx
			
 
				-.align 16
			
 
				-Lrounds_16_xx:
			
 
				-.rep 16
			
 
				-        ROUND_16_XX     T1, i
			
 
				-        i = (i+1)
			
 
				-.endr
			
 
				-        cmp     $0xa00,ROUND
			
 
				-        jb      Lrounds_16_xx
			
 
				-
			
 
				-	# add old digest
			
 
				-        vpaddq  _digest(%rsp), a, a
			
 
				-        vpaddq  _digest+1*SZ4(%rsp), b, b
			
 
				-        vpaddq  _digest+2*SZ4(%rsp), c, c
			
 
				-        vpaddq  _digest+3*SZ4(%rsp), d, d
			
 
				-        vpaddq  _digest+4*SZ4(%rsp), e, e
			
 
				-        vpaddq  _digest+5*SZ4(%rsp), f, f
			
 
				-        vpaddq  _digest+6*SZ4(%rsp), g, g
			
 
				-        vpaddq  _digest+7*SZ4(%rsp), h, h
			
 
				-
			
 
				-        sub     $1, INP_SIZE  # unit is blocks
			
 
				-        jne     lloop
			
 
				-
			
 
				-        # write back to memory (state object) the transposed digest
			
 
				-        vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-        vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
			
 
				-
			
 
				-	# update input data pointers
			
 
				-	add     IDX, inp0
			
 
				-        mov     inp0, _data_ptr+0*PTR_SZ(STATE)
			
 
				-        add     IDX, inp1
			
 
				-        mov     inp1, _data_ptr+1*PTR_SZ(STATE)
			
 
				-        add     IDX, inp2
			
 
				-        mov     inp2, _data_ptr+2*PTR_SZ(STATE)
			
 
				-        add     IDX, inp3
			
 
				-        mov     inp3, _data_ptr+3*PTR_SZ(STATE)
			
 
				-
			
 
				-	#;;;;;;;;;;;;;;;
			
 
				-	#; Postamble
			
 
				-	add $STACK_SPACE1, %rsp
			
 
				-	# restore callee-saved clobbered registers
			
 
				-
			
 
				-	pop     %r15
			
 
				-	pop     %r14
			
 
				-	pop     %r13
			
 
				-	pop     %r12
			
 
				-
			
 
				-	# outer calling routine restores XMM and other GP registers
			
 
				-	ret
			
 
				-ENDPROC(sha512_x4_avx2)
			
 
				-
			
 
				-.section	.rodata.K512_4, "a", @progbits
			
 
				-.align 64
			
 
				-K512_4:
			
 
				-	.octa 0x428a2f98d728ae22428a2f98d728ae22,\
			
 
				-		0x428a2f98d728ae22428a2f98d728ae22
			
 
				-	.octa 0x7137449123ef65cd7137449123ef65cd,\
			
 
				-		0x7137449123ef65cd7137449123ef65cd
			
 
				-	.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
			
 
				-		0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
			
 
				-	.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
			
 
				-		0xe9b5dba58189dbbce9b5dba58189dbbc
			
 
				-	.octa 0x3956c25bf348b5383956c25bf348b538,\
			
 
				-		0x3956c25bf348b5383956c25bf348b538
			
 
				-	.octa 0x59f111f1b605d01959f111f1b605d019,\
			
 
				-		0x59f111f1b605d01959f111f1b605d019
			
 
				-	.octa 0x923f82a4af194f9b923f82a4af194f9b,\
			
 
				-		0x923f82a4af194f9b923f82a4af194f9b
			
 
				-	.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
			
 
				-		0xab1c5ed5da6d8118ab1c5ed5da6d8118
			
 
				-	.octa 0xd807aa98a3030242d807aa98a3030242,\
			
 
				-		0xd807aa98a3030242d807aa98a3030242
			
 
				-	.octa 0x12835b0145706fbe12835b0145706fbe,\
			
 
				-		0x12835b0145706fbe12835b0145706fbe
			
 
				-	.octa 0x243185be4ee4b28c243185be4ee4b28c,\
			
 
				-		0x243185be4ee4b28c243185be4ee4b28c
			
 
				-	.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
			
 
				-		0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
			
 
				-	.octa 0x72be5d74f27b896f72be5d74f27b896f,\
			
 
				-		0x72be5d74f27b896f72be5d74f27b896f
			
 
				-	.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
			
 
				-		0x80deb1fe3b1696b180deb1fe3b1696b1
			
 
				-	.octa 0x9bdc06a725c712359bdc06a725c71235,\
			
 
				-		0x9bdc06a725c712359bdc06a725c71235
			
 
				-	.octa 0xc19bf174cf692694c19bf174cf692694,\
			
 
				-		0xc19bf174cf692694c19bf174cf692694
			
 
				-	.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
			
 
				-		0xe49b69c19ef14ad2e49b69c19ef14ad2
			
 
				-	.octa 0xefbe4786384f25e3efbe4786384f25e3,\
			
 
				-		0xefbe4786384f25e3efbe4786384f25e3
			
 
				-	.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
			
 
				-		0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
			
 
				-	.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
			
 
				-		0x240ca1cc77ac9c65240ca1cc77ac9c65
			
 
				-	.octa 0x2de92c6f592b02752de92c6f592b0275,\
			
 
				-		0x2de92c6f592b02752de92c6f592b0275
			
 
				-	.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
			
 
				-		0x4a7484aa6ea6e4834a7484aa6ea6e483
			
 
				-	.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
			
 
				-		0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
			
 
				-	.octa 0x76f988da831153b576f988da831153b5,\
			
 
				-		0x76f988da831153b576f988da831153b5
			
 
				-	.octa 0x983e5152ee66dfab983e5152ee66dfab,\
			
 
				-		0x983e5152ee66dfab983e5152ee66dfab
			
 
				-	.octa 0xa831c66d2db43210a831c66d2db43210,\
			
 
				-		0xa831c66d2db43210a831c66d2db43210
			
 
				-	.octa 0xb00327c898fb213fb00327c898fb213f,\
			
 
				-		0xb00327c898fb213fb00327c898fb213f
			
 
				-	.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
			
 
				-		0xbf597fc7beef0ee4bf597fc7beef0ee4
			
 
				-	.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
			
 
				-		0xc6e00bf33da88fc2c6e00bf33da88fc2
			
 
				-	.octa 0xd5a79147930aa725d5a79147930aa725,\
			
 
				-		0xd5a79147930aa725d5a79147930aa725
			
 
				-	.octa 0x06ca6351e003826f06ca6351e003826f,\
			
 
				-		0x06ca6351e003826f06ca6351e003826f
			
 
				-	.octa 0x142929670a0e6e70142929670a0e6e70,\
			
 
				-		0x142929670a0e6e70142929670a0e6e70
			
 
				-	.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
			
 
				-		0x27b70a8546d22ffc27b70a8546d22ffc
			
 
				-	.octa 0x2e1b21385c26c9262e1b21385c26c926,\
			
 
				-		0x2e1b21385c26c9262e1b21385c26c926
			
 
				-	.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
			
 
				-		0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
			
 
				-	.octa 0x53380d139d95b3df53380d139d95b3df,\
			
 
				-		0x53380d139d95b3df53380d139d95b3df
			
 
				-	.octa 0x650a73548baf63de650a73548baf63de,\
			
 
				-		0x650a73548baf63de650a73548baf63de
			
 
				-	.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
			
 
				-		0x766a0abb3c77b2a8766a0abb3c77b2a8
			
 
				-	.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
			
 
				-		0x81c2c92e47edaee681c2c92e47edaee6
			
 
				-	.octa 0x92722c851482353b92722c851482353b,\
			
 
				-		0x92722c851482353b92722c851482353b
			
 
				-	.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
			
 
				-		0xa2bfe8a14cf10364a2bfe8a14cf10364
			
 
				-	.octa 0xa81a664bbc423001a81a664bbc423001,\
			
 
				-		0xa81a664bbc423001a81a664bbc423001
			
 
				-	.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
			
 
				-		0xc24b8b70d0f89791c24b8b70d0f89791
			
 
				-	.octa 0xc76c51a30654be30c76c51a30654be30,\
			
 
				-		0xc76c51a30654be30c76c51a30654be30
			
 
				-	.octa 0xd192e819d6ef5218d192e819d6ef5218,\
			
 
				-		0xd192e819d6ef5218d192e819d6ef5218
			
 
				-	.octa 0xd69906245565a910d69906245565a910,\
			
 
				-		0xd69906245565a910d69906245565a910
			
 
				-	.octa 0xf40e35855771202af40e35855771202a,\
			
 
				-		0xf40e35855771202af40e35855771202a
			
 
				-	.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
			
 
				-		0x106aa07032bbd1b8106aa07032bbd1b8
			
 
				-	.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
			
 
				-		0x19a4c116b8d2d0c819a4c116b8d2d0c8
			
 
				-	.octa 0x1e376c085141ab531e376c085141ab53,\
			
 
				-		0x1e376c085141ab531e376c085141ab53
			
 
				-	.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
			
 
				-		0x2748774cdf8eeb992748774cdf8eeb99
			
 
				-	.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
			
 
				-		0x34b0bcb5e19b48a834b0bcb5e19b48a8
			
 
				-	.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
			
 
				-		0x391c0cb3c5c95a63391c0cb3c5c95a63
			
 
				-	.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
			
 
				-		0x4ed8aa4ae3418acb4ed8aa4ae3418acb
			
 
				-	.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
			
 
				-		0x5b9cca4f7763e3735b9cca4f7763e373
			
 
				-	.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
			
 
				-		0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
			
 
				-	.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
			
 
				-		0x748f82ee5defb2fc748f82ee5defb2fc
			
 
				-	.octa 0x78a5636f43172f6078a5636f43172f60,\
			
 
				-		0x78a5636f43172f6078a5636f43172f60
			
 
				-	.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
			
 
				-		0x84c87814a1f0ab7284c87814a1f0ab72
			
 
				-	.octa 0x8cc702081a6439ec8cc702081a6439ec,\
			
 
				-		0x8cc702081a6439ec8cc702081a6439ec
			
 
				-	.octa 0x90befffa23631e2890befffa23631e28,\
			
 
				-		0x90befffa23631e2890befffa23631e28
			
 
				-	.octa 0xa4506cebde82bde9a4506cebde82bde9,\
			
 
				-		0xa4506cebde82bde9a4506cebde82bde9
			
 
				-	.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
			
 
				-		0xbef9a3f7b2c67915bef9a3f7b2c67915
			
 
				-	.octa 0xc67178f2e372532bc67178f2e372532b,\
			
 
				-		0xc67178f2e372532bc67178f2e372532b
			
 
				-	.octa 0xca273eceea26619cca273eceea26619c,\
			
 
				-		0xca273eceea26619cca273eceea26619c
			
 
				-	.octa 0xd186b8c721c0c207d186b8c721c0c207,\
			
 
				-		0xd186b8c721c0c207d186b8c721c0c207
			
 
				-	.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
			
 
				-		0xeada7dd6cde0eb1eeada7dd6cde0eb1e
			
 
				-	.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
			
 
				-		0xf57d4f7fee6ed178f57d4f7fee6ed178
			
 
				-	.octa 0x06f067aa72176fba06f067aa72176fba,\
			
 
				-		0x06f067aa72176fba06f067aa72176fba
			
 
				-	.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
			
 
				-		0x0a637dc5a2c898a60a637dc5a2c898a6
			
 
				-	.octa 0x113f9804bef90dae113f9804bef90dae,\
			
 
				-		0x113f9804bef90dae113f9804bef90dae
			
 
				-	.octa 0x1b710b35131c471b1b710b35131c471b,\
			
 
				-		0x1b710b35131c471b1b710b35131c471b
			
 
				-	.octa 0x28db77f523047d8428db77f523047d84,\
			
 
				-		0x28db77f523047d8428db77f523047d84
			
 
				-	.octa 0x32caab7b40c7249332caab7b40c72493,\
			
 
				-		0x32caab7b40c7249332caab7b40c72493
			
 
				-	.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
			
 
				-		0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
			
 
				-	.octa 0x431d67c49c100d4c431d67c49c100d4c,\
			
 
				-		0x431d67c49c100d4c431d67c49c100d4c
			
 
				-	.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
			
 
				-		0x4cc5d4becb3e42b64cc5d4becb3e42b6
			
 
				-	.octa 0x597f299cfc657e2a597f299cfc657e2a,\
			
 
				-		0x597f299cfc657e2a597f299cfc657e2a
			
 
				-	.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
			
 
				-		0x5fcb6fab3ad6faec5fcb6fab3ad6faec
			
 
				-	.octa 0x6c44198c4a4758176c44198c4a475817,\
			
 
				-		0x6c44198c4a4758176c44198c4a475817
			
 
				-
			
 
				-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
			
 
				-.align 32
			
 
				-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
			
 
				-                         .octa 0x18191a1b1c1d1e1f1011121314151617
			
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
 
				 	  converts an arbitrary synchronous software crypto algorithm
			
 
				 	  into an asynchronous algorithm that executes in a kernel thread.
			
 
				 
			
 
				-config CRYPTO_MCRYPTD
			
 
				-	tristate "Software async multi-buffer crypto daemon"
			
 
				-	select CRYPTO_BLKCIPHER
			
 
				-	select CRYPTO_HASH
			
 
				-	select CRYPTO_MANAGER
			
 
				-	select CRYPTO_WORKQUEUE
			
 
				-	help
			
 
				-	  This is a generic software asynchronous crypto daemon that
			
 
				-	  provides the kernel thread to assist multi-buffer crypto
			
 
				-	  algorithms for submitting jobs and flushing jobs in multi-buffer
			
 
				-	  crypto algorithms.  Multi-buffer crypto algorithms are executed
			
 
				-	  in the context of this kernel thread and drivers can post
			
 
				-	  their crypto request asynchronously to be processed by this daemon.
			
 
				-
			
 
				 config CRYPTO_AUTHENC
			
 
				 	tristate "Authenc support"
			
 
				 	select CRYPTO_AEAD
			
@@ -470,6 +456,18 @@ config CRYPTO_LRW
 
				 	  The first 128, 192 or 256 bits in the key are used for AES and the
			
 
				 	  rest is used to tie each cipher block to its logical position.
			
 
				 
			
 
				+config CRYPTO_OFB
			
 
				+	tristate "OFB support"
			
 
				+	select CRYPTO_BLKCIPHER
			
 
				+	select CRYPTO_MANAGER
			
 
				+	help
			
 
				+	  OFB: the Output Feedback mode makes a block cipher into a synchronous
			
 
				+	  stream cipher. It generates keystream blocks, which are then XORed
			
 
				+	  with the plaintext blocks to get the ciphertext. Flipping a bit in the
			
 
				+	  ciphertext produces a flipped bit in the plaintext at the same
			
 
				+	  location. This property allows many error correcting codes to function
			
 
				+	  normally even when applied before encryption.
			
 
				+
			
 
				 config CRYPTO_PCBC
			
 
				 	tristate "PCBC support"
			
 
				 	select CRYPTO_BLKCIPHER
			
@@ -848,54 +846,6 @@ config CRYPTO_SHA1_PPC_SPE
 
				 	  SHA-1 secure hash standard (DFIPS 180-4) implemented
			
 
				 	  using powerpc SPE SIMD instruction set.
			
 
				 
			
 
				-config CRYPTO_SHA1_MB
			
 
				-	tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
			
 
				-	depends on X86 && 64BIT
			
 
				-	select CRYPTO_SHA1
			
 
				-	select CRYPTO_HASH
			
 
				-	select CRYPTO_MCRYPTD
			
 
				-	help
			
 
				-	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
			
 
				-	  using multi-buffer technique.  This algorithm computes on
			
 
				-	  multiple data lanes concurrently with SIMD instructions for
			
 
				-	  better throughput.  It should not be enabled by default but
			
 
				-	  used when there is significant amount of work to keep the keep
			
 
				-	  the data lanes filled to get performance benefit.  If the data
			
 
				-	  lanes remain unfilled, a flush operation will be initiated to
			
 
				-	  process the crypto jobs, adding a slight latency.
			
 
				-
			
 
				-config CRYPTO_SHA256_MB
			
 
				-	tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
			
 
				-	depends on X86 && 64BIT
			
 
				-	select CRYPTO_SHA256
			
 
				-	select CRYPTO_HASH
			
 
				-	select CRYPTO_MCRYPTD
			
 
				-	help
			
 
				-	  SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
			
 
				-	  using multi-buffer technique.  This algorithm computes on
			
 
				-	  multiple data lanes concurrently with SIMD instructions for
			
 
				-	  better throughput.  It should not be enabled by default but
			
 
				-	  used when there is significant amount of work to keep the keep
			
 
				-	  the data lanes filled to get performance benefit.  If the data
			
 
				-	  lanes remain unfilled, a flush operation will be initiated to
			
 
				-	  process the crypto jobs, adding a slight latency.
			
 
				-
			
 
				-config CRYPTO_SHA512_MB
			
 
				-        tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
			
 
				-        depends on X86 && 64BIT
			
 
				-        select CRYPTO_SHA512
			
 
				-        select CRYPTO_HASH
			
 
				-        select CRYPTO_MCRYPTD
			
 
				-        help
			
 
				-          SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
			
 
				-          using multi-buffer technique.  This algorithm computes on
			
 
				-          multiple data lanes concurrently with SIMD instructions for
			
 
				-          better throughput.  It should not be enabled by default but
			
 
				-          used when there is significant amount of work to keep the keep
			
 
				-          the data lanes filled to get performance benefit.  If the data
			
 
				-          lanes remain unfilled, a flush operation will be initiated to
			
 
				-          process the crypto jobs, adding a slight latency.
			
 
				-
			
 
				 config CRYPTO_SHA256
			
 
				 	tristate "SHA224 and SHA256 digest algorithm"
			
 
				 	select CRYPTO_HASH
			
@@ -1133,7 +1083,7 @@ config CRYPTO_AES_NI_INTEL
 
				 
			
 
				 	  In addition to AES cipher algorithm support, the acceleration
			
 
				 	  for some popular block cipher mode is supported too, including
			
 
				-	  ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
			
 
				+	  ECB, CBC, LRW, XTS. The 64 bit version has additional
			
 
				 	  acceleration for CTR.
			
 
				 
			
 
				 config CRYPTO_AES_SPARC64
			
@@ -1590,20 +1540,6 @@ config CRYPTO_SM4
 
				 
			
 
				 	  If unsure, say N.
			
 
				 
			
 
				-config CRYPTO_SPECK
			
 
				-	tristate "Speck cipher algorithm"
			
 
				-	select CRYPTO_ALGAPI
			
 
				-	help
			
 
				-	  Speck is a lightweight block cipher that is tuned for optimal
			
 
				-	  performance in software (rather than hardware).
			
 
				-
			
 
				-	  Speck may not be as secure as AES, and should only be used on systems
			
 
				-	  where AES is not fast enough.
			
 
				-
			
 
				-	  See also: <https://eprint.iacr.org/2013/404.pdf>
			
 
				-
			
 
				-	  If unsure, say N.
			
 
				-
			
 
				 config CRYPTO_TEA
			
 
				 	tristate "TEA, XTEA and XETA cipher algorithms"
			
 
				 	select CRYPTO_ALGAPI
			
@@ -1875,6 +1811,17 @@ config CRYPTO_USER_API_AEAD
 
				 	  This option enables the user-spaces interface for AEAD
			
 
				 	  cipher algorithms.
			
 
				 
			
 
				+config CRYPTO_STATS
			
 
				+	bool "Crypto usage statistics for User-space"
			
 
				+	help
			
 
				+	  This option enables the gathering of crypto stats.
			
 
				+	  This will collect:
			
 
				+	  - encrypt/decrypt size and numbers of symmeric operations
			
 
				+	  - compress/decompress size and numbers of compress operations
			
 
				+	  - size and numbers of hash operations
			
 
				+	  - encrypt/decrypt/sign/verify numbers for asymmetric operations
			
 
				+	  - generate/seed numbers for rng operations
			
 
				+
			
 
				 config CRYPTO_HASH_INFO
			
 
				 	bool
			
 
				 
			
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -54,6 +54,7 @@ cryptomgr-y := algboss.o testmgr.o
 
				 
			
 
				 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
			
 
				 obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
			
 
				+crypto_user-y := crypto_user_base.o crypto_user_stat.o
			
 
				 obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
			
 
				 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
			
 
				 obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
			
@@ -93,7 +94,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
 
				 obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
			
 
				 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
			
 
				 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
			
 
				-obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
			
 
				 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
			
 
				 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
			
 
				 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
			
@@ -115,7 +115,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 
				 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
			
 
				 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
			
 
				 obj-$(CONFIG_CRYPTO_SEED) += seed.o
			
 
				-obj-$(CONFIG_CRYPTO_SPECK) += speck.o
			
 
				 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
			
 
				 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
			
 
				 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
			
@@ -143,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 
				 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
			
 
				 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
			
 
				 obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
			
 
				+obj-$(CONFIG_CRYPTO_OFB) += ofb.o
			
 
				 
			
 
				 ecdh_generic-y := ecc.o
			
 
				 ecdh_generic-y += ecdh.o
			
--- a/crypto/aegis.h
+++ b/crypto/aegis.h
@@ -21,7 +21,7 @@
 
				 
			
 
				 union aegis_block {
			
 
				 	__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
			
 
				-	u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
			
 
				+	__le32 words32[AEGIS_BLOCK_SIZE / sizeof(__le32)];
			
 
				 	u8 bytes[AEGIS_BLOCK_SIZE];
			
 
				 };
			
 
				 
			
@@ -57,24 +57,22 @@ static void crypto_aegis_aesenc(union aegis_block *dst,
 
				 				const union aegis_block *src,
			
 
				 				const union aegis_block *key)
			
 
				 {
			
 
				-	u32 *d = dst->words32;
			
 
				 	const u8  *s  = src->bytes;
			
 
				-	const u32 *k  = key->words32;
			
 
				 	const u32 *t0 = crypto_ft_tab[0];
			
 
				 	const u32 *t1 = crypto_ft_tab[1];
			
 
				 	const u32 *t2 = crypto_ft_tab[2];
			
 
				 	const u32 *t3 = crypto_ft_tab[3];
			
 
				 	u32 d0, d1, d2, d3;
			
 
				 
			
 
				-	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
			
 
				-	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
			
 
				-	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
			
 
				-	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
			
 
				+	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]];
			
 
				+	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]];
			
 
				+	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]];
			
 
				+	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]];
			
 
				 
			
 
				-	d[0] = d0;
			
 
				-	d[1] = d1;
			
 
				-	d[2] = d2;
			
 
				-	d[3] = d3;
			
 
				+	dst->words32[0] = cpu_to_le32(d0) ^ key->words32[0];
			
 
				+	dst->words32[1] = cpu_to_le32(d1) ^ key->words32[1];
			
 
				+	dst->words32[2] = cpu_to_le32(d2) ^ key->words32[2];
			
 
				+	dst->words32[3] = cpu_to_le32(d3) ^ key->words32[3];
			
 
				 }
			
 
				 
			
 
				 #endif /* _CRYPTO_AEGIS_H */
			
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -364,24 +364,35 @@ static int crypto_ahash_op(struct ahash_request *req,
 
				 
			
 
				 int crypto_ahash_final(struct ahash_request *req)
			
 
				 {
			
 
				-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
			
 
				+	crypto_stat_ahash_final(req, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_ahash_final);
			
 
				 
			
 
				 int crypto_ahash_finup(struct ahash_request *req)
			
 
				 {
			
 
				-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
			
 
				+	crypto_stat_ahash_final(req, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_ahash_finup);
			
 
				 
			
 
				 int crypto_ahash_digest(struct ahash_request *req)
			
 
				 {
			
 
				 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				+	int ret;
			
 
				 
			
 
				 	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
			
 
				-		return -ENOKEY;
			
 
				-
			
 
				-	return crypto_ahash_op(req, tfm->digest);
			
 
				+		ret = -ENOKEY;
			
 
				+	else
			
 
				+		ret = crypto_ahash_op(req, tfm->digest);
			
 
				+	crypto_stat_ahash_final(req, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
			
 
				 
			
@@ -550,8 +561,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
 
				 {
			
 
				 	struct crypto_alg *base = &alg->halg.base;
			
 
				 
			
 
				-	if (alg->halg.digestsize > PAGE_SIZE / 8 ||
			
 
				-	    alg->halg.statesize > PAGE_SIZE / 8 ||
			
 
				+	if (alg->halg.digestsize > HASH_MAX_DIGESTSIZE ||
			
 
				+	    alg->halg.statesize > HASH_MAX_STATESIZE ||
			
 
				 	    alg->halg.statesize == 0)
			
 
				 		return -EINVAL;
			
 
				 
			
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -57,9 +57,14 @@ static int crypto_check_alg(struct crypto_alg *alg)
 
				 	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	if (alg->cra_blocksize > PAGE_SIZE / 8)
			
 
				+	/* General maximums for all algs. */
			
 
				+	if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* Lower maximums for specific alg types. */
			
 
				 	if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
			
 
				 			       CRYPTO_ALG_TYPE_CIPHER) {
			
 
				 		if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
			
@@ -253,6 +258,14 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 
				 	list_add(&alg->cra_list, &crypto_alg_list);
			
 
				 	list_add(&larval->alg.cra_list, &crypto_alg_list);
			
 
				 
			
 
				+	atomic_set(&alg->encrypt_cnt, 0);
			
 
				+	atomic_set(&alg->decrypt_cnt, 0);
			
 
				+	atomic64_set(&alg->encrypt_tlen, 0);
			
 
				+	atomic64_set(&alg->decrypt_tlen, 0);
			
 
				+	atomic_set(&alg->verify_cnt, 0);
			
 
				+	atomic_set(&alg->cipher_err_cnt, 0);
			
 
				+	atomic_set(&alg->sign_cnt, 0);
			
 
				+
			
 
				 out:
			
 
				 	return larval;
			
 
				 
			
@@ -367,6 +380,8 @@ static void crypto_wait_for_test(struct crypto_larval *larval)
 
				 
			
 
				 	err = wait_for_completion_killable(&larval->completion);
			
 
				 	WARN_ON(err);
			
 
				+	if (!err)
			
 
				+		crypto_probing_notify(CRYPTO_MSG_ALG_LOADED, larval);
			
 
				 
			
 
				 out:
			
 
				 	crypto_larval_kill(&larval->alg);
			
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -274,6 +274,8 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
 
				 		return cryptomgr_schedule_probe(data);
			
 
				 	case CRYPTO_MSG_ALG_REGISTER:
			
 
				 		return cryptomgr_schedule_test(data);
			
 
				+	case CRYPTO_MSG_ALG_LOADED:
			
 
				+		break;
			
 
				 	}
			
 
				 
			
 
				 	return NOTIFY_DONE;
			
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -42,7 +42,7 @@
 
				 
			
 
				 struct aead_tfm {
			
 
				 	struct crypto_aead *aead;
			
 
				-	struct crypto_skcipher *null_tfm;
			
 
				+	struct crypto_sync_skcipher *null_tfm;
			
 
				 };
			
 
				 
			
 
				 static inline bool aead_sufficient_data(struct sock *sk)
			
@@ -75,13 +75,13 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
				 	return af_alg_sendmsg(sock, msg, size, ivsize);
			
 
				 }
			
 
				 
			
 
				-static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
			
 
				+static int crypto_aead_copy_sgl(struct crypto_sync_skcipher *null_tfm,
			
 
				 				struct scatterlist *src,
			
 
				 				struct scatterlist *dst, unsigned int len)
			
 
				 {
			
 
				-	SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
			
 
				 
			
 
				-	skcipher_request_set_tfm(skreq, null_tfm);
			
 
				+	skcipher_request_set_sync_tfm(skreq, null_tfm);
			
 
				 	skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
			
 
				 				      NULL, NULL);
			
 
				 	skcipher_request_set_crypt(skreq, src, dst, len, NULL);
			
@@ -99,7 +99,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 
				 	struct af_alg_ctx *ctx = ask->private;
			
 
				 	struct aead_tfm *aeadc = pask->private;
			
 
				 	struct crypto_aead *tfm = aeadc->aead;
			
 
				-	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
			
 
				+	struct crypto_sync_skcipher *null_tfm = aeadc->null_tfm;
			
 
				 	unsigned int i, as = crypto_aead_authsize(tfm);
			
 
				 	struct af_alg_async_req *areq;
			
 
				 	struct af_alg_tsgl *tsgl, *tmp;
			
@@ -478,7 +478,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 
				 {
			
 
				 	struct aead_tfm *tfm;
			
 
				 	struct crypto_aead *aead;
			
 
				-	struct crypto_skcipher *null_tfm;
			
 
				+	struct crypto_sync_skcipher *null_tfm;
			
 
				 
			
 
				 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
			
 
				 	if (!tfm)
			
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
 
				 	struct alg_sock *ask = alg_sk(sk);
			
 
				 	struct hash_ctx *ctx = ask->private;
			
 
				 	struct ahash_request *req = &ctx->req;
			
 
				-	char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
			
 
				+	char state[HASH_MAX_STATESIZE];
			
 
				 	struct sock *sk2;
			
 
				 	struct alg_sock *ask2;
			
 
				 	struct hash_ctx *ctx2;
			
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -33,7 +33,7 @@ struct authenc_instance_ctx {
 
				 struct crypto_authenc_ctx {
			
 
				 	struct crypto_ahash *auth;
			
 
				 	struct crypto_skcipher *enc;
			
 
				-	struct crypto_skcipher *null;
			
 
				+	struct crypto_sync_skcipher *null;
			
 
				 };
			
 
				 
			
 
				 struct authenc_request_ctx {
			
@@ -185,9 +185,9 @@ static int crypto_authenc_copy_assoc(struct aead_request *req)
 
				 {
			
 
				 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
			
 
				 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
			
 
				 
			
 
				-	skcipher_request_set_tfm(skreq, ctx->null);
			
 
				+	skcipher_request_set_sync_tfm(skreq, ctx->null);
			
 
				 	skcipher_request_set_callback(skreq, aead_request_flags(req),
			
 
				 				      NULL, NULL);
			
 
				 	skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
			
@@ -318,7 +318,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 
				 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
			
 
				 	struct crypto_ahash *auth;
			
 
				 	struct crypto_skcipher *enc;
			
 
				-	struct crypto_skcipher *null;
			
 
				+	struct crypto_sync_skcipher *null;
			
 
				 	int err;
			
 
				 
			
 
				 	auth = crypto_spawn_ahash(&ictx->auth);
			
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -36,7 +36,7 @@ struct crypto_authenc_esn_ctx {
 
				 	unsigned int reqoff;
			
 
				 	struct crypto_ahash *auth;
			
 
				 	struct crypto_skcipher *enc;
			
 
				-	struct crypto_skcipher *null;
			
 
				+	struct crypto_sync_skcipher *null;
			
 
				 };
			
 
				 
			
 
				 struct authenc_esn_request_ctx {
			
@@ -183,9 +183,9 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
 
				 {
			
 
				 	struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
			
 
				 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
			
 
				 
			
 
				-	skcipher_request_set_tfm(skreq, ctx->null);
			
 
				+	skcipher_request_set_sync_tfm(skreq, ctx->null);
			
 
				 	skcipher_request_set_callback(skreq, aead_request_flags(req),
			
 
				 				      NULL, NULL);
			
 
				 	skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
			
@@ -341,7 +341,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 
				 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
			
 
				 	struct crypto_ahash *auth;
			
 
				 	struct crypto_skcipher *enc;
			
 
				-	struct crypto_skcipher *null;
			
 
				+	struct crypto_sync_skcipher *null;
			
 
				 	int err;
			
 
				 
			
 
				 	auth = crypto_spawn_ahash(&ictx->auth);
			
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -50,7 +50,10 @@ struct crypto_ccm_req_priv_ctx {
 
				 	u32 flags;
			
 
				 	struct scatterlist src[3];
			
 
				 	struct scatterlist dst[3];
			
 
				-	struct skcipher_request skreq;
			
 
				+	union {
			
 
				+		struct ahash_request ahreq;
			
 
				+		struct skcipher_request skreq;
			
 
				+	};
			
 
				 };
			
 
				 
			
 
				 struct cbcmac_tfm_ctx {
			
@@ -181,7 +184,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 
				 	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
			
 
				 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
			
 
				 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
			
 
				-	AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
			
 
				+	struct ahash_request *ahreq = &pctx->ahreq;
			
 
				 	unsigned int assoclen = req->assoclen;
			
 
				 	struct scatterlist sg[3];
			
 
				 	u8 *odata = pctx->odata;
			
@@ -427,7 +430,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 
				 	crypto_aead_set_reqsize(
			
 
				 		tfm,
			
 
				 		align + sizeof(struct crypto_ccm_req_priv_ctx) +
			
 
				-		crypto_skcipher_reqsize(ctr));
			
 
				+		max(crypto_ahash_reqsize(mac), crypto_skcipher_reqsize(ctr)));
			
 
				 
			
 
				 	return 0;
			
 
				 
			
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -18,20 +18,21 @@
 
				 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
			
 
				 			     unsigned int bytes)
			
 
				 {
			
 
				-	u32 stream[CHACHA20_BLOCK_WORDS];
			
 
				+	/* aligned to potentially speed up crypto_xor() */
			
 
				+	u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long));
			
 
				 
			
 
				 	if (dst != src)
			
 
				 		memcpy(dst, src, bytes);
			
 
				 
			
 
				 	while (bytes >= CHACHA20_BLOCK_SIZE) {
			
 
				 		chacha20_block(state, stream);
			
 
				-		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
			
 
				+		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
			
 
				 		bytes -= CHACHA20_BLOCK_SIZE;
			
 
				 		dst += CHACHA20_BLOCK_SIZE;
			
 
				 	}
			
 
				 	if (bytes) {
			
 
				 		chacha20_block(state, stream);
			
 
				-		crypto_xor(dst, (const u8 *)stream, bytes);
			
 
				+		crypto_xor(dst, stream, bytes);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -76,7 +76,7 @@ struct cryptd_blkcipher_request_ctx {
 
				 
			
 
				 struct cryptd_skcipher_ctx {
			
 
				 	atomic_t refcnt;
			
 
				-	struct crypto_skcipher *child;
			
 
				+	struct crypto_sync_skcipher *child;
			
 
				 };
			
 
				 
			
 
				 struct cryptd_skcipher_request_ctx {
			
@@ -449,14 +449,16 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
 
				 				  const u8 *key, unsigned int keylen)
			
 
				 {
			
 
				 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				-	struct crypto_skcipher *child = ctx->child;
			
 
				+	struct crypto_sync_skcipher *child = ctx->child;
			
 
				 	int err;
			
 
				 
			
 
				-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+	crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_sync_skcipher_set_flags(child,
			
 
				+				       crypto_skcipher_get_flags(parent) &
			
 
				 					 CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_skcipher_setkey(child, key, keylen);
			
 
				-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
			
 
				+	err = crypto_sync_skcipher_setkey(child, key, keylen);
			
 
				+	crypto_skcipher_set_flags(parent,
			
 
				+				  crypto_sync_skcipher_get_flags(child) &
			
 
				 					  CRYPTO_TFM_RES_MASK);
			
 
				 	return err;
			
 
				 }
			
@@ -483,13 +485,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
 
				 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
			
 
				 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct crypto_skcipher *child = ctx->child;
			
 
				-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				+	struct crypto_sync_skcipher *child = ctx->child;
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				 
			
 
				 	if (unlikely(err == -EINPROGRESS))
			
 
				 		goto out;
			
 
				 
			
 
				-	skcipher_request_set_tfm(subreq, child);
			
 
				+	skcipher_request_set_sync_tfm(subreq, child);
			
 
				 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				 				      NULL, NULL);
			
 
				 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
@@ -511,13 +513,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
 
				 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
			
 
				 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	struct crypto_skcipher *child = ctx->child;
			
 
				-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				+	struct crypto_sync_skcipher *child = ctx->child;
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				 
			
 
				 	if (unlikely(err == -EINPROGRESS))
			
 
				 		goto out;
			
 
				 
			
 
				-	skcipher_request_set_tfm(subreq, child);
			
 
				+	skcipher_request_set_sync_tfm(subreq, child);
			
 
				 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				 				      NULL, NULL);
			
 
				 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
@@ -568,7 +570,7 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
 
				 	if (IS_ERR(cipher))
			
 
				 		return PTR_ERR(cipher);
			
 
				 
			
 
				-	ctx->child = cipher;
			
 
				+	ctx->child = (struct crypto_sync_skcipher *)cipher;
			
 
				 	crypto_skcipher_set_reqsize(
			
 
				 		tfm, sizeof(struct cryptd_skcipher_request_ctx));
			
 
				 	return 0;
			
@@ -578,7 +580,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
 
				 {
			
 
				 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				-	crypto_free_skcipher(ctx->child);
			
 
				+	crypto_free_sync_skcipher(ctx->child);
			
 
				 }
			
 
				 
			
 
				 static void cryptd_skcipher_free(struct skcipher_instance *inst)
			
@@ -1243,7 +1245,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
 
				 {
			
 
				 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
			
 
				 
			
 
				-	return ctx->child;
			
 
				+	return &ctx->child->base;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
			
 
				 
			
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -26,7 +26,7 @@
 
				 #include <linux/string.h>
			
 
				 
			
 
				 static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
			
 
				-static struct crypto_skcipher *crypto_default_null_skcipher;
			
 
				+static struct crypto_sync_skcipher *crypto_default_null_skcipher;
			
 
				 static int crypto_default_null_skcipher_refcnt;
			
 
				 
			
 
				 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
			
@@ -152,16 +152,15 @@ MODULE_ALIAS_CRYPTO("compress_null");
 
				 MODULE_ALIAS_CRYPTO("digest_null");
			
 
				 MODULE_ALIAS_CRYPTO("cipher_null");
			
 
				 
			
 
				-struct crypto_skcipher *crypto_get_default_null_skcipher(void)
			
 
				+struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void)
			
 
				 {
			
 
				-	struct crypto_skcipher *tfm;
			
 
				+	struct crypto_sync_skcipher *tfm;
			
 
				 
			
 
				 	mutex_lock(&crypto_default_null_skcipher_lock);
			
 
				 	tfm = crypto_default_null_skcipher;
			
 
				 
			
 
				 	if (!tfm) {
			
 
				-		tfm = crypto_alloc_skcipher("ecb(cipher_null)",
			
 
				-					    0, CRYPTO_ALG_ASYNC);
			
 
				+		tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0);
			
 
				 		if (IS_ERR(tfm))
			
 
				 			goto unlock;
			
 
				 
			
@@ -181,7 +180,7 @@ void crypto_put_default_null_skcipher(void)
 
				 {
			
 
				 	mutex_lock(&crypto_default_null_skcipher_lock);
			
 
				 	if (!--crypto_default_null_skcipher_refcnt) {
			
 
				-		crypto_free_skcipher(crypto_default_null_skcipher);
			
 
				+		crypto_free_sync_skcipher(crypto_default_null_skcipher);
			
 
				 		crypto_default_null_skcipher = NULL;
			
 
				 	}
			
 
				 	mutex_unlock(&crypto_default_null_skcipher_lock);
			
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -29,6 +29,7 @@
 
				 #include <crypto/internal/rng.h>
			
 
				 #include <crypto/akcipher.h>
			
 
				 #include <crypto/kpp.h>
			
 
				+#include <crypto/internal/cryptouser.h>
			
 
				 
			
 
				 #include "internal.h"
			
 
				 
			
@@ -37,7 +38,7 @@
 
				 static DEFINE_MUTEX(crypto_cfg_mutex);
			
 
				 
			
 
				 /* The crypto netlink socket */
			
 
				-static struct sock *crypto_nlsk;
			
 
				+struct sock *crypto_nlsk;
			
 
				 
			
 
				 struct crypto_dump_info {
			
 
				 	struct sk_buff *in_skb;
			
@@ -46,7 +47,7 @@ struct crypto_dump_info {
 
				 	u16 nlmsg_flags;
			
 
				 };
			
 
				 
			
 
				-static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
			
 
				+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
			
 
				 {
			
 
				 	struct crypto_alg *q, *alg = NULL;
			
 
				 
			
@@ -461,6 +462,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 
				 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
			
 
				 	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
			
 
				 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
			
 
				+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
			
 
				 };
			
 
				 
			
 
				 static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
			
@@ -481,6 +483,9 @@ static const struct crypto_link {
 
				 						       .dump = crypto_dump_report,
			
 
				 						       .done = crypto_dump_report_done},
			
 
				 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
			
 
				+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
			
 
				+						       .dump = crypto_dump_reportstat,
			
 
				+						       .done = crypto_dump_reportstat_done},
			
 
				 };
			
 
				 
			
 
				 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
			
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -0,0 +1,463 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+/*
			
 
				+ * Crypto user configuration API.
			
 
				+ *
			
 
				+ * Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com>
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <linux/crypto.h>
			
 
				+#include <linux/cryptouser.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <net/netlink.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				+#include <crypto/internal/rng.h>
			
 
				+#include <crypto/akcipher.h>
			
 
				+#include <crypto/kpp.h>
			
 
				+#include <crypto/internal/cryptouser.h>
			
 
				+
			
 
				+#include "internal.h"
			
 
				+
			
 
				+#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
			
 
				+
			
 
				+static DEFINE_MUTEX(crypto_cfg_mutex);
			
 
				+
			
 
				+extern struct sock *crypto_nlsk;
			
 
				+
			
 
				+struct crypto_dump_info {
			
 
				+	struct sk_buff *in_skb;
			
 
				+	struct sk_buff *out_skb;
			
 
				+	u32 nlmsg_seq;
			
 
				+	u16 nlmsg_flags;
			
 
				+};
			
 
				+
			
 
				+static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat raead;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strncpy(raead.type, "aead", sizeof(raead.type));
			
 
				+
			
 
				+	v32 = atomic_read(&alg->encrypt_cnt);
			
 
				+	raead.stat_encrypt_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->encrypt_tlen);
			
 
				+	raead.stat_encrypt_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->decrypt_cnt);
			
 
				+	raead.stat_decrypt_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->decrypt_tlen);
			
 
				+	raead.stat_decrypt_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->aead_err_cnt);
			
 
				+	raead.stat_aead_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
			
 
				+		    sizeof(struct crypto_stat), &raead))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rcipher;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
			
 
				+
			
 
				+	v32 = atomic_read(&alg->encrypt_cnt);
			
 
				+	rcipher.stat_encrypt_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->encrypt_tlen);
			
 
				+	rcipher.stat_encrypt_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->decrypt_cnt);
			
 
				+	rcipher.stat_decrypt_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->decrypt_tlen);
			
 
				+	rcipher.stat_decrypt_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->cipher_err_cnt);
			
 
				+	rcipher.stat_cipher_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
			
 
				+		    sizeof(struct crypto_stat), &rcipher))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rcomp;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
			
 
				+	v32 = atomic_read(&alg->compress_cnt);
			
 
				+	rcomp.stat_compress_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->compress_tlen);
			
 
				+	rcomp.stat_compress_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->decompress_cnt);
			
 
				+	rcomp.stat_decompress_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->decompress_tlen);
			
 
				+	rcomp.stat_decompress_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->cipher_err_cnt);
			
 
				+	rcomp.stat_compress_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
			
 
				+		    sizeof(struct crypto_stat), &rcomp))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat racomp;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
			
 
				+	v32 = atomic_read(&alg->compress_cnt);
			
 
				+	racomp.stat_compress_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->compress_tlen);
			
 
				+	racomp.stat_compress_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->decompress_cnt);
			
 
				+	racomp.stat_decompress_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->decompress_tlen);
			
 
				+	racomp.stat_decompress_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->cipher_err_cnt);
			
 
				+	racomp.stat_compress_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
			
 
				+		    sizeof(struct crypto_stat), &racomp))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rakcipher;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
			
 
				+	v32 = atomic_read(&alg->encrypt_cnt);
			
 
				+	rakcipher.stat_encrypt_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->encrypt_tlen);
			
 
				+	rakcipher.stat_encrypt_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->decrypt_cnt);
			
 
				+	rakcipher.stat_decrypt_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->decrypt_tlen);
			
 
				+	rakcipher.stat_decrypt_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->sign_cnt);
			
 
				+	rakcipher.stat_sign_cnt = v32;
			
 
				+	v32 = atomic_read(&alg->verify_cnt);
			
 
				+	rakcipher.stat_verify_cnt = v32;
			
 
				+	v32 = atomic_read(&alg->akcipher_err_cnt);
			
 
				+	rakcipher.stat_akcipher_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
			
 
				+		    sizeof(struct crypto_stat), &rakcipher))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rkpp;
			
 
				+	u32 v;
			
 
				+
			
 
				+	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
			
 
				+
			
 
				+	v = atomic_read(&alg->setsecret_cnt);
			
 
				+	rkpp.stat_setsecret_cnt = v;
			
 
				+	v = atomic_read(&alg->generate_public_key_cnt);
			
 
				+	rkpp.stat_generate_public_key_cnt = v;
			
 
				+	v = atomic_read(&alg->compute_shared_secret_cnt);
			
 
				+	rkpp.stat_compute_shared_secret_cnt = v;
			
 
				+	v = atomic_read(&alg->kpp_err_cnt);
			
 
				+	rkpp.stat_kpp_err_cnt = v;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
			
 
				+		    sizeof(struct crypto_stat), &rkpp))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rhash;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strncpy(rhash.type, "ahash", sizeof(rhash.type));
			
 
				+
			
 
				+	v32 = atomic_read(&alg->hash_cnt);
			
 
				+	rhash.stat_hash_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->hash_tlen);
			
 
				+	rhash.stat_hash_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->hash_err_cnt);
			
 
				+	rhash.stat_hash_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
			
 
				+		    sizeof(struct crypto_stat), &rhash))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rhash;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strncpy(rhash.type, "shash", sizeof(rhash.type));
			
 
				+
			
 
				+	v32 = atomic_read(&alg->hash_cnt);
			
 
				+	rhash.stat_hash_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->hash_tlen);
			
 
				+	rhash.stat_hash_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->hash_err_cnt);
			
 
				+	rhash.stat_hash_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
			
 
				+		    sizeof(struct crypto_stat), &rhash))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_stat rrng;
			
 
				+	u64 v64;
			
 
				+	u32 v32;
			
 
				+
			
 
				+	strncpy(rrng.type, "rng", sizeof(rrng.type));
			
 
				+
			
 
				+	v32 = atomic_read(&alg->generate_cnt);
			
 
				+	rrng.stat_generate_cnt = v32;
			
 
				+	v64 = atomic64_read(&alg->generate_tlen);
			
 
				+	rrng.stat_generate_tlen = v64;
			
 
				+	v32 = atomic_read(&alg->seed_cnt);
			
 
				+	rrng.stat_seed_cnt = v32;
			
 
				+	v32 = atomic_read(&alg->hash_err_cnt);
			
 
				+	rrng.stat_rng_err_cnt = v32;
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
			
 
				+		    sizeof(struct crypto_stat), &rrng))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_reportstat_one(struct crypto_alg *alg,
			
 
				+				 struct crypto_user_alg *ualg,
			
 
				+				 struct sk_buff *skb)
			
 
				+{
			
 
				+	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
			
 
				+	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
			
 
				+		sizeof(ualg->cru_driver_name));
			
 
				+	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
			
 
				+		sizeof(ualg->cru_module_name));
			
 
				+
			
 
				+	ualg->cru_type = 0;
			
 
				+	ualg->cru_mask = 0;
			
 
				+	ualg->cru_flags = alg->cra_flags;
			
 
				+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
			
 
				+
			
 
				+	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
			
 
				+		goto nla_put_failure;
			
 
				+	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
			
 
				+		struct crypto_stat rl;
			
 
				+
			
 
				+		strlcpy(rl.type, "larval", sizeof(rl.type));
			
 
				+		if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
			
 
				+			    sizeof(struct crypto_stat), &rl))
			
 
				+			goto nla_put_failure;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
			
 
				+	case CRYPTO_ALG_TYPE_AEAD:
			
 
				+		if (crypto_report_aead(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_SKCIPHER:
			
 
				+		if (crypto_report_cipher(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_BLKCIPHER:
			
 
				+		if (crypto_report_cipher(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_CIPHER:
			
 
				+		if (crypto_report_cipher(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_COMPRESS:
			
 
				+		if (crypto_report_comp(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_ACOMPRESS:
			
 
				+		if (crypto_report_acomp(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_SCOMPRESS:
			
 
				+		if (crypto_report_acomp(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_AKCIPHER:
			
 
				+		if (crypto_report_akcipher(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_KPP:
			
 
				+		if (crypto_report_kpp(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_AHASH:
			
 
				+		if (crypto_report_ahash(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_HASH:
			
 
				+		if (crypto_report_shash(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	case CRYPTO_ALG_TYPE_RNG:
			
 
				+		if (crypto_report_rng(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				+		break;
			
 
				+	default:
			
 
				+		pr_err("ERROR: Unhandled alg %d in %s\n",
			
 
				+		       alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL),
			
 
				+		       __func__);
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				+static int crypto_reportstat_alg(struct crypto_alg *alg,
			
 
				+				 struct crypto_dump_info *info)
			
 
				+{
			
 
				+	struct sk_buff *in_skb = info->in_skb;
			
 
				+	struct sk_buff *skb = info->out_skb;
			
 
				+	struct nlmsghdr *nlh;
			
 
				+	struct crypto_user_alg *ualg;
			
 
				+	int err = 0;
			
 
				+
			
 
				+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
			
 
				+			CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags);
			
 
				+	if (!nlh) {
			
 
				+		err = -EMSGSIZE;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ualg = nlmsg_data(nlh);
			
 
				+
			
 
				+	err = crypto_reportstat_one(alg, ualg, skb);
			
 
				+	if (err) {
			
 
				+		nlmsg_cancel(skb, nlh);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	nlmsg_end(skb, nlh);
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
			
 
				+		      struct nlattr **attrs)
			
 
				+{
			
 
				+	struct crypto_user_alg *p = nlmsg_data(in_nlh);
			
 
				+	struct crypto_alg *alg;
			
 
				+	struct sk_buff *skb;
			
 
				+	struct crypto_dump_info info;
			
 
				+	int err;
			
 
				+
			
 
				+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	alg = crypto_alg_match(p, 0);
			
 
				+	if (!alg)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	err = -ENOMEM;
			
 
				+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
			
 
				+	if (!skb)
			
 
				+		goto drop_alg;
			
 
				+
			
 
				+	info.in_skb = in_skb;
			
 
				+	info.out_skb = skb;
			
 
				+	info.nlmsg_seq = in_nlh->nlmsg_seq;
			
 
				+	info.nlmsg_flags = 0;
			
 
				+
			
 
				+	err = crypto_reportstat_alg(alg, &info);
			
 
				+
			
 
				+drop_alg:
			
 
				+	crypto_mod_put(alg);
			
 
				+
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
			
 
				+}
			
 
				+
			
 
				+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
			
 
				+{
			
 
				+	struct crypto_alg *alg;
			
 
				+	struct crypto_dump_info info;
			
 
				+	int err;
			
 
				+
			
 
				+	if (cb->args[0])
			
 
				+		goto out;
			
 
				+
			
 
				+	cb->args[0] = 1;
			
 
				+
			
 
				+	info.in_skb = cb->skb;
			
 
				+	info.out_skb = skb;
			
 
				+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
			
 
				+	info.nlmsg_flags = NLM_F_MULTI;
			
 
				+
			
 
				+	list_for_each_entry(alg, &crypto_alg_list, cra_list) {
			
 
				+		err = crypto_reportstat_alg(alg, &info);
			
 
				+		if (err)
			
 
				+			goto out_err;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return skb->len;
			
 
				+out_err:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+int crypto_dump_reportstat_done(struct netlink_callback *cb)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+MODULE_LICENSE("GPL");
			
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -47,9 +47,9 @@ static int echainiv_encrypt(struct aead_request *req)
 
				 	info = req->iv;
			
 
				 
			
 
				 	if (req->src != req->dst) {
			
 
				-		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
			
 
				+		SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
			
 
				 
			
 
				-		skcipher_request_set_tfm(nreq, ctx->sknull);
			
 
				+		skcipher_request_set_sync_tfm(nreq, ctx->sknull);
			
 
				 		skcipher_request_set_callback(nreq, req->base.flags,
			
 
				 					      NULL, NULL);
			
 
				 		skcipher_request_set_crypt(nreq, req->src, req->dst,
			
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx {
 
				 
			
 
				 struct crypto_rfc4543_ctx {
			
 
				 	struct crypto_aead *child;
			
 
				-	struct crypto_skcipher *null;
			
 
				+	struct crypto_sync_skcipher *null;
			
 
				 	u8 nonce[4];
			
 
				 };
			
 
				 
			
@@ -1067,9 +1067,9 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
 
				 	unsigned int authsize = crypto_aead_authsize(aead);
			
 
				 	unsigned int nbytes = req->assoclen + req->cryptlen -
			
 
				 			      (enc ? 0 : authsize);
			
 
				-	SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
			
 
				+	SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
			
 
				 
			
 
				-	skcipher_request_set_tfm(nreq, ctx->null);
			
 
				+	skcipher_request_set_sync_tfm(nreq, ctx->null);
			
 
				 	skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
			
 
				 	skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
			
 
				 
			
@@ -1093,7 +1093,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 
				 	struct crypto_aead_spawn *spawn = &ictx->aead;
			
 
				 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
			
 
				 	struct crypto_aead *aead;
			
 
				-	struct crypto_skcipher *null;
			
 
				+	struct crypto_sync_skcipher *null;
			
 
				 	unsigned long align;
			
 
				 	int err = 0;
			
 
				 
			
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -26,12 +26,6 @@
 
				 #include <linux/rwsem.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				-/* Crypto notification events. */
			
 
				-enum {
			
 
				-	CRYPTO_MSG_ALG_REQUEST,
			
 
				-	CRYPTO_MSG_ALG_REGISTER,
			
 
				-};
			
 
				-
			
 
				 struct crypto_instance;
			
 
				 struct crypto_template;
			
 
				 
			
@@ -90,8 +84,6 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 
				 void *crypto_alloc_tfm(const char *alg_name,
			
 
				 		       const struct crypto_type *frontend, u32 type, u32 mask);
			
 
				 
			
 
				-int crypto_register_notifier(struct notifier_block *nb);
			
 
				-int crypto_unregister_notifier(struct notifier_block *nb);
			
 
				 int crypto_probing_notify(unsigned long val, void *v);
			
 
				 
			
 
				 unsigned int crypto_alg_extsize(struct crypto_alg *alg);
			
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -29,8 +29,6 @@
 
				 #include <crypto/b128ops.h>
			
 
				 #include <crypto/gf128mul.h>
			
 
				 
			
 
				-#define LRW_BUFFER_SIZE 128u
			
 
				-
			
 
				 #define LRW_BLOCK_SIZE 16
			
 
				 
			
 
				 struct priv {
			
@@ -56,19 +54,7 @@ struct priv {
 
				 };
			
 
				 
			
 
				 struct rctx {
			
 
				-	be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
			
 
				-
			
 
				 	be128 t;
			
 
				-
			
 
				-	be128 *ext;
			
 
				-
			
 
				-	struct scatterlist srcbuf[2];
			
 
				-	struct scatterlist dstbuf[2];
			
 
				-	struct scatterlist *src;
			
 
				-	struct scatterlist *dst;
			
 
				-
			
 
				-	unsigned int left;
			
 
				-
			
 
				 	struct skcipher_request subreq;
			
 
				 };
			
 
				 
			
@@ -120,112 +106,68 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline void inc(be128 *iv)
			
 
				-{
			
 
				-	be64_add_cpu(&iv->b, 1);
			
 
				-	if (!iv->b)
			
 
				-		be64_add_cpu(&iv->a, 1);
			
 
				-}
			
 
				-
			
 
				-/* this returns the number of consequative 1 bits starting
			
 
				- * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
			
 
				-static inline int get_index128(be128 *block)
			
 
				+/*
			
 
				+ * Returns the number of trailing '1' bits in the words of the counter, which is
			
 
				+ * represented by 4 32-bit words, arranged from least to most significant.
			
 
				+ * At the same time, increments the counter by one.
			
 
				+ *
			
 
				+ * For example:
			
 
				+ *
			
 
				+ * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
			
 
				+ * int i = next_index(&counter);
			
 
				+ * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
			
 
				+ */
			
 
				+static int next_index(u32 *counter)
			
 
				 {
			
 
				-	int x;
			
 
				-	__be32 *p = (__be32 *) block;
			
 
				+	int i, res = 0;
			
 
				 
			
 
				-	for (p += 3, x = 0; x < 128; p--, x += 32) {
			
 
				-		u32 val = be32_to_cpup(p);
			
 
				+	for (i = 0; i < 4; i++) {
			
 
				+		if (counter[i] + 1 != 0)
			
 
				+			return res + ffz(counter[i]++);
			
 
				 
			
 
				-		if (!~val)
			
 
				-			continue;
			
 
				-
			
 
				-		return x + ffz(val);
			
 
				+		counter[i] = 0;
			
 
				+		res += 32;
			
 
				 	}
			
 
				 
			
 
				-	return x;
			
 
				+	/*
			
 
				+	 * If we get here, then x == 128 and we are incrementing the counter
			
 
				+	 * from all ones to all zeros. This means we must return index 127, i.e.
			
 
				+	 * the one corresponding to key2*{ 1,...,1 }.
			
 
				+	 */
			
 
				+	return 127;
			
 
				 }
			
 
				 
			
 
				-static int post_crypt(struct skcipher_request *req)
			
 
				+/*
			
 
				+ * We compute the tweak masks twice (both before and after the ECB encryption or
			
 
				+ * decryption) to avoid having to allocate a temporary buffer and/or make
			
 
				+ * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
			
 
				+ * just doing the next_index() calls again.
			
 
				+ */
			
 
				+static int xor_tweak(struct skcipher_request *req, bool second_pass)
			
 
				 {
			
 
				-	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				-	be128 *buf = rctx->ext ?: rctx->buf;
			
 
				-	struct skcipher_request *subreq;
			
 
				 	const int bs = LRW_BLOCK_SIZE;
			
 
				-	struct skcipher_walk w;
			
 
				-	struct scatterlist *sg;
			
 
				-	unsigned offset;
			
 
				-	int err;
			
 
				-
			
 
				-	subreq = &rctx->subreq;
			
 
				-	err = skcipher_walk_virt(&w, subreq, false);
			
 
				-
			
 
				-	while (w.nbytes) {
			
 
				-		unsigned int avail = w.nbytes;
			
 
				-		be128 *wdst;
			
 
				-
			
 
				-		wdst = w.dst.virt.addr;
			
 
				-
			
 
				-		do {
			
 
				-			be128_xor(wdst, buf++, wdst);
			
 
				-			wdst++;
			
 
				-		} while ((avail -= bs) >= bs);
			
 
				-
			
 
				-		err = skcipher_walk_done(&w, avail);
			
 
				-	}
			
 
				-
			
 
				-	rctx->left -= subreq->cryptlen;
			
 
				-
			
 
				-	if (err || !rctx->left)
			
 
				-		goto out;
			
 
				-
			
 
				-	rctx->dst = rctx->dstbuf;
			
 
				-
			
 
				-	scatterwalk_done(&w.out, 0, 1);
			
 
				-	sg = w.out.sg;
			
 
				-	offset = w.out.offset;
			
 
				-
			
 
				-	if (rctx->dst != sg) {
			
 
				-		rctx->dst[0] = *sg;
			
 
				-		sg_unmark_end(rctx->dst);
			
 
				-		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
			
 
				-	}
			
 
				-	rctx->dst[0].length -= offset - sg->offset;
			
 
				-	rctx->dst[0].offset = offset;
			
 
				-
			
 
				-out:
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int pre_crypt(struct skcipher_request *req)
			
 
				-{
			
 
				 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				-	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				 	struct priv *ctx = crypto_skcipher_ctx(tfm);
			
 
				-	be128 *buf = rctx->ext ?: rctx->buf;
			
 
				-	struct skcipher_request *subreq;
			
 
				-	const int bs = LRW_BLOCK_SIZE;
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	be128 t = rctx->t;
			
 
				 	struct skcipher_walk w;
			
 
				-	struct scatterlist *sg;
			
 
				-	unsigned cryptlen;
			
 
				-	unsigned offset;
			
 
				-	be128 *iv;
			
 
				-	bool more;
			
 
				+	__be32 *iv;
			
 
				+	u32 counter[4];
			
 
				 	int err;
			
 
				 
			
 
				-	subreq = &rctx->subreq;
			
 
				-	skcipher_request_set_tfm(subreq, tfm);
			
 
				-
			
 
				-	cryptlen = subreq->cryptlen;
			
 
				-	more = rctx->left > cryptlen;
			
 
				-	if (!more)
			
 
				-		cryptlen = rctx->left;
			
 
				+	if (second_pass) {
			
 
				+		req = &rctx->subreq;
			
 
				+		/* set to our TFM to enforce correct alignment: */
			
 
				+		skcipher_request_set_tfm(req, tfm);
			
 
				+	}
			
 
				 
			
 
				-	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
			
 
				-				   cryptlen, req->iv);
			
 
				+	err = skcipher_walk_virt(&w, req, false);
			
 
				+	iv = (__be32 *)w.iv;
			
 
				 
			
 
				-	err = skcipher_walk_virt(&w, subreq, false);
			
 
				-	iv = w.iv;
			
 
				+	counter[0] = be32_to_cpu(iv[3]);
			
 
				+	counter[1] = be32_to_cpu(iv[2]);
			
 
				+	counter[2] = be32_to_cpu(iv[1]);
			
 
				+	counter[3] = be32_to_cpu(iv[0]);
			
 
				 
			
 
				 	while (w.nbytes) {
			
 
				 		unsigned int avail = w.nbytes;
			
@@ -236,188 +178,85 @@ static int pre_crypt(struct skcipher_request *req)
 
				 		wdst = w.dst.virt.addr;
			
 
				 
			
 
				 		do {
			
 
				-			*buf++ = rctx->t;
			
 
				-			be128_xor(wdst++, &rctx->t, wsrc++);
			
 
				+			be128_xor(wdst++, &t, wsrc++);
			
 
				 
			
 
				 			/* T <- I*Key2, using the optimization
			
 
				 			 * discussed in the specification */
			
 
				-			be128_xor(&rctx->t, &rctx->t,
			
 
				-				  &ctx->mulinc[get_index128(iv)]);
			
 
				-			inc(iv);
			
 
				+			be128_xor(&t, &t, &ctx->mulinc[next_index(counter)]);
			
 
				 		} while ((avail -= bs) >= bs);
			
 
				 
			
 
				-		err = skcipher_walk_done(&w, avail);
			
 
				-	}
			
 
				-
			
 
				-	skcipher_request_set_tfm(subreq, ctx->child);
			
 
				-	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
			
 
				-				   cryptlen, NULL);
			
 
				-
			
 
				-	if (err || !more)
			
 
				-		goto out;
			
 
				-
			
 
				-	rctx->src = rctx->srcbuf;
			
 
				-
			
 
				-	scatterwalk_done(&w.in, 0, 1);
			
 
				-	sg = w.in.sg;
			
 
				-	offset = w.in.offset;
			
 
				+		if (second_pass && w.nbytes == w.total) {
			
 
				+			iv[0] = cpu_to_be32(counter[3]);
			
 
				+			iv[1] = cpu_to_be32(counter[2]);
			
 
				+			iv[2] = cpu_to_be32(counter[1]);
			
 
				+			iv[3] = cpu_to_be32(counter[0]);
			
 
				+		}
			
 
				 
			
 
				-	if (rctx->src != sg) {
			
 
				-		rctx->src[0] = *sg;
			
 
				-		sg_unmark_end(rctx->src);
			
 
				-		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
			
 
				+		err = skcipher_walk_done(&w, avail);
			
 
				 	}
			
 
				-	rctx->src[0].length -= offset - sg->offset;
			
 
				-	rctx->src[0].offset = offset;
			
 
				 
			
 
				-out:
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
			
 
				+static int xor_tweak_pre(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
			
 
				-	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				-	struct skcipher_request *subreq;
			
 
				-	gfp_t gfp;
			
 
				-
			
 
				-	subreq = &rctx->subreq;
			
 
				-	skcipher_request_set_callback(subreq, req->base.flags, done, req);
			
 
				-
			
 
				-	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
			
 
				-							   GFP_ATOMIC;
			
 
				-	rctx->ext = NULL;
			
 
				-
			
 
				-	subreq->cryptlen = LRW_BUFFER_SIZE;
			
 
				-	if (req->cryptlen > LRW_BUFFER_SIZE) {
			
 
				-		unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
			
 
				-
			
 
				-		rctx->ext = kmalloc(n, gfp);
			
 
				-		if (rctx->ext)
			
 
				-			subreq->cryptlen = n;
			
 
				-	}
			
 
				-
			
 
				-	rctx->src = req->src;
			
 
				-	rctx->dst = req->dst;
			
 
				-	rctx->left = req->cryptlen;
			
 
				-
			
 
				-	/* calculate first value of T */
			
 
				-	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
			
 
				-
			
 
				-	/* T <- I*Key2 */
			
 
				-	gf128mul_64k_bbe(&rctx->t, ctx->table);
			
 
				-
			
 
				-	return 0;
			
 
				+	return xor_tweak(req, false);
			
 
				 }
			
 
				 
			
 
				-static void exit_crypt(struct skcipher_request *req)
			
 
				+static int xor_tweak_post(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				-
			
 
				-	rctx->left = 0;
			
 
				-
			
 
				-	if (rctx->ext)
			
 
				-		kzfree(rctx->ext);
			
 
				+	return xor_tweak(req, true);
			
 
				 }
			
 
				 
			
 
				-static int do_encrypt(struct skcipher_request *req, int err)
			
 
				-{
			
 
				-	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				-	struct skcipher_request *subreq;
			
 
				-
			
 
				-	subreq = &rctx->subreq;
			
 
				-
			
 
				-	while (!err && rctx->left) {
			
 
				-		err = pre_crypt(req) ?:
			
 
				-		      crypto_skcipher_encrypt(subreq) ?:
			
 
				-		      post_crypt(req);
			
 
				-
			
 
				-		if (err == -EINPROGRESS || err == -EBUSY)
			
 
				-			return err;
			
 
				-	}
			
 
				-
			
 
				-	exit_crypt(req);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static void encrypt_done(struct crypto_async_request *areq, int err)
			
 
				+static void crypt_done(struct crypto_async_request *areq, int err)
			
 
				 {
			
 
				 	struct skcipher_request *req = areq->data;
			
 
				-	struct skcipher_request *subreq;
			
 
				-	struct rctx *rctx;
			
 
				 
			
 
				-	rctx = skcipher_request_ctx(req);
			
 
				+	if (!err)
			
 
				+		err = xor_tweak_post(req);
			
 
				 
			
 
				-	if (err == -EINPROGRESS) {
			
 
				-		if (rctx->left != req->cryptlen)
			
 
				-			return;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	subreq = &rctx->subreq;
			
 
				-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
			
 
				-
			
 
				-	err = do_encrypt(req, err ?: post_crypt(req));
			
 
				-	if (rctx->left)
			
 
				-		return;
			
 
				-
			
 
				-out:
			
 
				 	skcipher_request_complete(req, err);
			
 
				 }
			
 
				 
			
 
				-static int encrypt(struct skcipher_request *req)
			
 
				-{
			
 
				-	return do_encrypt(req, init_crypt(req, encrypt_done));
			
 
				-}
			
 
				-
			
 
				-static int do_decrypt(struct skcipher_request *req, int err)
			
 
				+static void init_crypt(struct skcipher_request *req)
			
 
				 {
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
			
 
				 	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				-	struct skcipher_request *subreq;
			
 
				-
			
 
				-	subreq = &rctx->subreq;
			
 
				+	struct skcipher_request *subreq = &rctx->subreq;
			
 
				 
			
 
				-	while (!err && rctx->left) {
			
 
				-		err = pre_crypt(req) ?:
			
 
				-		      crypto_skcipher_decrypt(subreq) ?:
			
 
				-		      post_crypt(req);
			
 
				+	skcipher_request_set_tfm(subreq, ctx->child);
			
 
				+	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
			
 
				+	/* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */
			
 
				+	skcipher_request_set_crypt(subreq, req->dst, req->dst,
			
 
				+				   req->cryptlen, req->iv);
			
 
				 
			
 
				-		if (err == -EINPROGRESS || err == -EBUSY)
			
 
				-			return err;
			
 
				-	}
			
 
				+	/* calculate first value of T */
			
 
				+	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
			
 
				 
			
 
				-	exit_crypt(req);
			
 
				-	return err;
			
 
				+	/* T <- I*Key2 */
			
 
				+	gf128mul_64k_bbe(&rctx->t, ctx->table);
			
 
				 }
			
 
				 
			
 
				-static void decrypt_done(struct crypto_async_request *areq, int err)
			
 
				+static int encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct skcipher_request *req = areq->data;
			
 
				-	struct skcipher_request *subreq;
			
 
				-	struct rctx *rctx;
			
 
				-
			
 
				-	rctx = skcipher_request_ctx(req);
			
 
				-
			
 
				-	if (err == -EINPROGRESS) {
			
 
				-		if (rctx->left != req->cryptlen)
			
 
				-			return;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	subreq = &rctx->subreq;
			
 
				-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
			
 
				-
			
 
				-	err = do_decrypt(req, err ?: post_crypt(req));
			
 
				-	if (rctx->left)
			
 
				-		return;
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq = &rctx->subreq;
			
 
				 
			
 
				-out:
			
 
				-	skcipher_request_complete(req, err);
			
 
				+	init_crypt(req);
			
 
				+	return xor_tweak_pre(req) ?:
			
 
				+		crypto_skcipher_encrypt(subreq) ?:
			
 
				+		xor_tweak_post(req);
			
 
				 }
			
 
				 
			
 
				 static int decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	return do_decrypt(req, init_crypt(req, decrypt_done));
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq = &rctx->subreq;
			
 
				+
			
 
				+	init_crypt(req);
			
 
				+	return xor_tweak_pre(req) ?:
			
 
				+		crypto_skcipher_decrypt(subreq) ?:
			
 
				+		xor_tweak_post(req);
			
 
				 }
			
 
				 
			
 
				 static int init_tfm(struct crypto_skcipher *tfm)
			
@@ -543,7 +382,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 
				 	inst->alg.base.cra_priority = alg->base.cra_priority;
			
 
				 	inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
			
 
				 	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
			
 
				-				       (__alignof__(u64) - 1);
			
 
				+				       (__alignof__(__be32) - 1);
			
 
				 
			
 
				 	inst->alg.ivsize = LRW_BLOCK_SIZE;
			
 
				 	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
			
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -1,675 +0,0 @@
 
				-/*
			
 
				- * Software multibuffer async crypto daemon.
			
 
				- *
			
 
				- * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
			
 
				- *
			
 
				- * Adapted from crypto daemon.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify it
			
 
				- * under the terms of the GNU General Public License as published by the Free
			
 
				- * Software Foundation; either version 2 of the License, or (at your option)
			
 
				- * any later version.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#include <crypto/algapi.h>
			
 
				-#include <crypto/internal/hash.h>
			
 
				-#include <crypto/internal/aead.h>
			
 
				-#include <crypto/mcryptd.h>
			
 
				-#include <crypto/crypto_wq.h>
			
 
				-#include <linux/err.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/list.h>
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/scatterlist.h>
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/sched/stat.h>
			
 
				-#include <linux/slab.h>
			
 
				-
			
 
				-#define MCRYPTD_MAX_CPU_QLEN 100
			
 
				-#define MCRYPTD_BATCH 9
			
 
				-
			
 
				-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
			
 
				-				   unsigned int tail);
			
 
				-
			
 
				-struct mcryptd_flush_list {
			
 
				-	struct list_head list;
			
 
				-	struct mutex lock;
			
 
				-};
			
 
				-
			
 
				-static struct mcryptd_flush_list __percpu *mcryptd_flist;
			
 
				-
			
 
				-struct hashd_instance_ctx {
			
 
				-	struct crypto_ahash_spawn spawn;
			
 
				-	struct mcryptd_queue *queue;
			
 
				-};
			
 
				-
			
 
				-static void mcryptd_queue_worker(struct work_struct *work);
			
 
				-
			
 
				-void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
			
 
				-{
			
 
				-	struct mcryptd_flush_list *flist;
			
 
				-
			
 
				-	if (!cstate->flusher_engaged) {
			
 
				-		/* put the flusher on the flush list */
			
 
				-		flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
			
 
				-		mutex_lock(&flist->lock);
			
 
				-		list_add_tail(&cstate->flush_list, &flist->list);
			
 
				-		cstate->flusher_engaged = true;
			
 
				-		cstate->next_flush = jiffies + delay;
			
 
				-		queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
			
 
				-			&cstate->flush, delay);
			
 
				-		mutex_unlock(&flist->lock);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL(mcryptd_arm_flusher);
			
 
				-
			
 
				-static int mcryptd_init_queue(struct mcryptd_queue *queue,
			
 
				-			     unsigned int max_cpu_qlen)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	struct mcryptd_cpu_queue *cpu_queue;
			
 
				-
			
 
				-	queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
			
 
				-	pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
			
 
				-	if (!queue->cpu_queue)
			
 
				-		return -ENOMEM;
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
			
 
				-		pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
			
 
				-		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
			
 
				-		INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
			
 
				-		spin_lock_init(&cpu_queue->q_lock);
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_fini_queue(struct mcryptd_queue *queue)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	struct mcryptd_cpu_queue *cpu_queue;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
			
 
				-		BUG_ON(cpu_queue->queue.qlen);
			
 
				-	}
			
 
				-	free_percpu(queue->cpu_queue);
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
			
 
				-				  struct crypto_async_request *request,
			
 
				-				  struct mcryptd_hash_request_ctx *rctx)
			
 
				-{
			
 
				-	int cpu, err;
			
 
				-	struct mcryptd_cpu_queue *cpu_queue;
			
 
				-
			
 
				-	cpu_queue = raw_cpu_ptr(queue->cpu_queue);
			
 
				-	spin_lock(&cpu_queue->q_lock);
			
 
				-	cpu = smp_processor_id();
			
 
				-	rctx->tag.cpu = smp_processor_id();
			
 
				-
			
 
				-	err = crypto_enqueue_request(&cpu_queue->queue, request);
			
 
				-	pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
			
 
				-		 cpu, cpu_queue, request);
			
 
				-	spin_unlock(&cpu_queue->q_lock);
			
 
				-	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Try to opportunisticlly flush the partially completed jobs if
			
 
				- * crypto daemon is the only task running.
			
 
				- */
			
 
				-static void mcryptd_opportunistic_flush(void)
			
 
				-{
			
 
				-	struct mcryptd_flush_list *flist;
			
 
				-	struct mcryptd_alg_cstate *cstate;
			
 
				-
			
 
				-	flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
			
 
				-	while (single_task_running()) {
			
 
				-		mutex_lock(&flist->lock);
			
 
				-		cstate = list_first_entry_or_null(&flist->list,
			
 
				-				struct mcryptd_alg_cstate, flush_list);
			
 
				-		if (!cstate || !cstate->flusher_engaged) {
			
 
				-			mutex_unlock(&flist->lock);
			
 
				-			return;
			
 
				-		}
			
 
				-		list_del(&cstate->flush_list);
			
 
				-		cstate->flusher_engaged = false;
			
 
				-		mutex_unlock(&flist->lock);
			
 
				-		cstate->alg_state->flusher(cstate);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Called in workqueue context, do one real cryption work (via
			
 
				- * req->complete) and reschedule itself if there are more work to
			
 
				- * do.
			
 
				- */
			
 
				-static void mcryptd_queue_worker(struct work_struct *work)
			
 
				-{
			
 
				-	struct mcryptd_cpu_queue *cpu_queue;
			
 
				-	struct crypto_async_request *req, *backlog;
			
 
				-	int i;
			
 
				-
			
 
				-	/*
			
 
				-	 * Need to loop through more than once for multi-buffer to
			
 
				-	 * be effective.
			
 
				-	 */
			
 
				-
			
 
				-	cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
			
 
				-	i = 0;
			
 
				-	while (i < MCRYPTD_BATCH || single_task_running()) {
			
 
				-
			
 
				-		spin_lock_bh(&cpu_queue->q_lock);
			
 
				-		backlog = crypto_get_backlog(&cpu_queue->queue);
			
 
				-		req = crypto_dequeue_request(&cpu_queue->queue);
			
 
				-		spin_unlock_bh(&cpu_queue->q_lock);
			
 
				-
			
 
				-		if (!req) {
			
 
				-			mcryptd_opportunistic_flush();
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		if (backlog)
			
 
				-			backlog->complete(backlog, -EINPROGRESS);
			
 
				-		req->complete(req, 0);
			
 
				-		if (!cpu_queue->queue.qlen)
			
 
				-			return;
			
 
				-		++i;
			
 
				-	}
			
 
				-	if (cpu_queue->queue.qlen)
			
 
				-		queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
			
 
				-}
			
 
				-
			
 
				-void mcryptd_flusher(struct work_struct *__work)
			
 
				-{
			
 
				-	struct	mcryptd_alg_cstate	*alg_cpu_state;
			
 
				-	struct	mcryptd_alg_state	*alg_state;
			
 
				-	struct	mcryptd_flush_list	*flist;
			
 
				-	int	cpu;
			
 
				-
			
 
				-	cpu = smp_processor_id();
			
 
				-	alg_cpu_state = container_of(to_delayed_work(__work),
			
 
				-				     struct mcryptd_alg_cstate, flush);
			
 
				-	alg_state = alg_cpu_state->alg_state;
			
 
				-	if (alg_cpu_state->cpu != cpu)
			
 
				-		pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
			
 
				-				cpu, alg_cpu_state->cpu);
			
 
				-
			
 
				-	if (alg_cpu_state->flusher_engaged) {
			
 
				-		flist = per_cpu_ptr(mcryptd_flist, cpu);
			
 
				-		mutex_lock(&flist->lock);
			
 
				-		list_del(&alg_cpu_state->flush_list);
			
 
				-		alg_cpu_state->flusher_engaged = false;
			
 
				-		mutex_unlock(&flist->lock);
			
 
				-		alg_state->flusher(alg_cpu_state);
			
 
				-	}
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(mcryptd_flusher);
			
 
				-
			
 
				-static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
			
 
				-	struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
			
 
				-
			
 
				-	return ictx->queue;
			
 
				-}
			
 
				-
			
 
				-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
			
 
				-				   unsigned int tail)
			
 
				-{
			
 
				-	char *p;
			
 
				-	struct crypto_instance *inst;
			
 
				-	int err;
			
 
				-
			
 
				-	p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
			
 
				-	if (!p)
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-
			
 
				-	inst = (void *)(p + head);
			
 
				-
			
 
				-	err = -ENAMETOOLONG;
			
 
				-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
			
 
				-		    "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
			
 
				-		goto out_free_inst;
			
 
				-
			
 
				-	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
			
 
				-
			
 
				-	inst->alg.cra_priority = alg->cra_priority + 50;
			
 
				-	inst->alg.cra_blocksize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_alignmask = alg->cra_alignmask;
			
 
				-
			
 
				-out:
			
 
				-	return p;
			
 
				-
			
 
				-out_free_inst:
			
 
				-	kfree(p);
			
 
				-	p = ERR_PTR(err);
			
 
				-	goto out;
			
 
				-}
			
 
				-
			
 
				-static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
			
 
				-					  u32 *mask)
			
 
				-{
			
 
				-	struct crypto_attr_type *algt;
			
 
				-
			
 
				-	algt = crypto_get_attr_type(tb);
			
 
				-	if (IS_ERR(algt))
			
 
				-		return false;
			
 
				-
			
 
				-	*type |= algt->type & CRYPTO_ALG_INTERNAL;
			
 
				-	*mask |= algt->mask & CRYPTO_ALG_INTERNAL;
			
 
				-
			
 
				-	if (*type & *mask & CRYPTO_ALG_INTERNAL)
			
 
				-		return true;
			
 
				-	else
			
 
				-		return false;
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
			
 
				-	struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
			
 
				-	struct crypto_ahash_spawn *spawn = &ictx->spawn;
			
 
				-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	struct crypto_ahash *hash;
			
 
				-
			
 
				-	hash = crypto_spawn_ahash(spawn);
			
 
				-	if (IS_ERR(hash))
			
 
				-		return PTR_ERR(hash);
			
 
				-
			
 
				-	ctx->child = hash;
			
 
				-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
			
 
				-				 sizeof(struct mcryptd_hash_request_ctx) +
			
 
				-				 crypto_ahash_reqsize(hash));
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	crypto_free_ahash(ctx->child);
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_setkey(struct crypto_ahash *parent,
			
 
				-				   const u8 *key, unsigned int keylen)
			
 
				-{
			
 
				-	struct mcryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
			
 
				-	struct crypto_ahash *child = ctx->child;
			
 
				-	int err;
			
 
				-
			
 
				-	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
			
 
				-				      CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_ahash_setkey(child, key, keylen);
			
 
				-	crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
			
 
				-				       CRYPTO_TFM_RES_MASK);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_enqueue(struct ahash_request *req,
			
 
				-				crypto_completion_t complete)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
			
 
				-	struct mcryptd_queue *queue =
			
 
				-		mcryptd_get_queue(crypto_ahash_tfm(tfm));
			
 
				-
			
 
				-	rctx->complete = req->base.complete;
			
 
				-	req->base.complete = complete;
			
 
				-
			
 
				-	ret = mcryptd_enqueue_request(queue, &req->base, rctx);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
			
 
				-{
			
 
				-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
			
 
				-	struct crypto_ahash *child = ctx->child;
			
 
				-	struct ahash_request *req = ahash_request_cast(req_async);
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-	struct ahash_request *desc = &rctx->areq;
			
 
				-
			
 
				-	if (unlikely(err == -EINPROGRESS))
			
 
				-		goto out;
			
 
				-
			
 
				-	ahash_request_set_tfm(desc, child);
			
 
				-	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-						rctx->complete, req_async);
			
 
				-
			
 
				-	rctx->out = req->result;
			
 
				-	err = crypto_ahash_init(desc);
			
 
				-
			
 
				-out:
			
 
				-	local_bh_disable();
			
 
				-	rctx->complete(&req->base, err);
			
 
				-	local_bh_enable();
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_init_enqueue(struct ahash_request *req)
			
 
				-{
			
 
				-	return mcryptd_hash_enqueue(req, mcryptd_hash_init);
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
			
 
				-{
			
 
				-	struct ahash_request *req = ahash_request_cast(req_async);
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-
			
 
				-	if (unlikely(err == -EINPROGRESS))
			
 
				-		goto out;
			
 
				-
			
 
				-	rctx->out = req->result;
			
 
				-	err = crypto_ahash_update(&rctx->areq);
			
 
				-	if (err) {
			
 
				-		req->base.complete = rctx->complete;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	return;
			
 
				-out:
			
 
				-	local_bh_disable();
			
 
				-	rctx->complete(&req->base, err);
			
 
				-	local_bh_enable();
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_update_enqueue(struct ahash_request *req)
			
 
				-{
			
 
				-	return mcryptd_hash_enqueue(req, mcryptd_hash_update);
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
			
 
				-{
			
 
				-	struct ahash_request *req = ahash_request_cast(req_async);
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-
			
 
				-	if (unlikely(err == -EINPROGRESS))
			
 
				-		goto out;
			
 
				-
			
 
				-	rctx->out = req->result;
			
 
				-	err = crypto_ahash_final(&rctx->areq);
			
 
				-	if (err) {
			
 
				-		req->base.complete = rctx->complete;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	return;
			
 
				-out:
			
 
				-	local_bh_disable();
			
 
				-	rctx->complete(&req->base, err);
			
 
				-	local_bh_enable();
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_final_enqueue(struct ahash_request *req)
			
 
				-{
			
 
				-	return mcryptd_hash_enqueue(req, mcryptd_hash_final);
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
			
 
				-{
			
 
				-	struct ahash_request *req = ahash_request_cast(req_async);
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-
			
 
				-	if (unlikely(err == -EINPROGRESS))
			
 
				-		goto out;
			
 
				-	rctx->out = req->result;
			
 
				-	err = crypto_ahash_finup(&rctx->areq);
			
 
				-
			
 
				-	if (err) {
			
 
				-		req->base.complete = rctx->complete;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	return;
			
 
				-out:
			
 
				-	local_bh_disable();
			
 
				-	rctx->complete(&req->base, err);
			
 
				-	local_bh_enable();
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
			
 
				-{
			
 
				-	return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
			
 
				-{
			
 
				-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
			
 
				-	struct crypto_ahash *child = ctx->child;
			
 
				-	struct ahash_request *req = ahash_request_cast(req_async);
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-	struct ahash_request *desc = &rctx->areq;
			
 
				-
			
 
				-	if (unlikely(err == -EINPROGRESS))
			
 
				-		goto out;
			
 
				-
			
 
				-	ahash_request_set_tfm(desc, child);
			
 
				-	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-						rctx->complete, req_async);
			
 
				-
			
 
				-	rctx->out = req->result;
			
 
				-	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
			
 
				-
			
 
				-out:
			
 
				-	local_bh_disable();
			
 
				-	rctx->complete(&req->base, err);
			
 
				-	local_bh_enable();
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
			
 
				-{
			
 
				-	return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_export(struct ahash_request *req, void *out)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-
			
 
				-	return crypto_ahash_export(&rctx->areq, out);
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_hash_import(struct ahash_request *req, const void *in)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-
			
 
				-	return crypto_ahash_import(&rctx->areq, in);
			
 
				-}
			
 
				-
			
 
				-static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
			
 
				-			      struct mcryptd_queue *queue)
			
 
				-{
			
 
				-	struct hashd_instance_ctx *ctx;
			
 
				-	struct ahash_instance *inst;
			
 
				-	struct hash_alg_common *halg;
			
 
				-	struct crypto_alg *alg;
			
 
				-	u32 type = 0;
			
 
				-	u32 mask = 0;
			
 
				-	int err;
			
 
				-
			
 
				-	if (!mcryptd_check_internal(tb, &type, &mask))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	halg = ahash_attr_alg(tb[1], type, mask);
			
 
				-	if (IS_ERR(halg))
			
 
				-		return PTR_ERR(halg);
			
 
				-
			
 
				-	alg = &halg->base;
			
 
				-	pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
			
 
				-	inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
			
 
				-					sizeof(*ctx));
			
 
				-	err = PTR_ERR(inst);
			
 
				-	if (IS_ERR(inst))
			
 
				-		goto out_put_alg;
			
 
				-
			
 
				-	ctx = ahash_instance_ctx(inst);
			
 
				-	ctx->queue = queue;
			
 
				-
			
 
				-	err = crypto_init_ahash_spawn(&ctx->spawn, halg,
			
 
				-				      ahash_crypto_instance(inst));
			
 
				-	if (err)
			
 
				-		goto out_free_inst;
			
 
				-
			
 
				-	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
			
 
				-		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
			
 
				-				   CRYPTO_ALG_OPTIONAL_KEY));
			
 
				-
			
 
				-	inst->alg.halg.digestsize = halg->digestsize;
			
 
				-	inst->alg.halg.statesize = halg->statesize;
			
 
				-	inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
			
 
				-
			
 
				-	inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
			
 
				-	inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
			
 
				-
			
 
				-	inst->alg.init   = mcryptd_hash_init_enqueue;
			
 
				-	inst->alg.update = mcryptd_hash_update_enqueue;
			
 
				-	inst->alg.final  = mcryptd_hash_final_enqueue;
			
 
				-	inst->alg.finup  = mcryptd_hash_finup_enqueue;
			
 
				-	inst->alg.export = mcryptd_hash_export;
			
 
				-	inst->alg.import = mcryptd_hash_import;
			
 
				-	if (crypto_hash_alg_has_setkey(halg))
			
 
				-		inst->alg.setkey = mcryptd_hash_setkey;
			
 
				-	inst->alg.digest = mcryptd_hash_digest_enqueue;
			
 
				-
			
 
				-	err = ahash_register_instance(tmpl, inst);
			
 
				-	if (err) {
			
 
				-		crypto_drop_ahash(&ctx->spawn);
			
 
				-out_free_inst:
			
 
				-		kfree(inst);
			
 
				-	}
			
 
				-
			
 
				-out_put_alg:
			
 
				-	crypto_mod_put(alg);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static struct mcryptd_queue mqueue;
			
 
				-
			
 
				-static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				-{
			
 
				-	struct crypto_attr_type *algt;
			
 
				-
			
 
				-	algt = crypto_get_attr_type(tb);
			
 
				-	if (IS_ERR(algt))
			
 
				-		return PTR_ERR(algt);
			
 
				-
			
 
				-	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
			
 
				-	case CRYPTO_ALG_TYPE_DIGEST:
			
 
				-		return mcryptd_create_hash(tmpl, tb, &mqueue);
			
 
				-	break;
			
 
				-	}
			
 
				-
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-static void mcryptd_free(struct crypto_instance *inst)
			
 
				-{
			
 
				-	struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
			
 
				-	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
			
 
				-
			
 
				-	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
			
 
				-	case CRYPTO_ALG_TYPE_AHASH:
			
 
				-		crypto_drop_ahash(&hctx->spawn);
			
 
				-		kfree(ahash_instance(inst));
			
 
				-		return;
			
 
				-	default:
			
 
				-		crypto_drop_spawn(&ctx->spawn);
			
 
				-		kfree(inst);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static struct crypto_template mcryptd_tmpl = {
			
 
				-	.name = "mcryptd",
			
 
				-	.create = mcryptd_create,
			
 
				-	.free = mcryptd_free,
			
 
				-	.module = THIS_MODULE,
			
 
				-};
			
 
				-
			
 
				-struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
			
 
				-					u32 type, u32 mask)
			
 
				-{
			
 
				-	char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
			
 
				-	struct crypto_ahash *tfm;
			
 
				-
			
 
				-	if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
			
 
				-		     "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
			
 
				-		return ERR_PTR(-EINVAL);
			
 
				-	tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
			
 
				-	if (IS_ERR(tfm))
			
 
				-		return ERR_CAST(tfm);
			
 
				-	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
			
 
				-		crypto_free_ahash(tfm);
			
 
				-		return ERR_PTR(-EINVAL);
			
 
				-	}
			
 
				-
			
 
				-	return __mcryptd_ahash_cast(tfm);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
			
 
				-
			
 
				-struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
			
 
				-{
			
 
				-	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
			
 
				-
			
 
				-	return ctx->child;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
			
 
				-
			
 
				-struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
			
 
				-{
			
 
				-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
			
 
				-	return &rctx->areq;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
			
 
				-
			
 
				-void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
			
 
				-{
			
 
				-	crypto_free_ahash(&tfm->base);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
			
 
				-
			
 
				-static int __init mcryptd_init(void)
			
 
				-{
			
 
				-	int err, cpu;
			
 
				-	struct mcryptd_flush_list *flist;
			
 
				-
			
 
				-	mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		flist = per_cpu_ptr(mcryptd_flist, cpu);
			
 
				-		INIT_LIST_HEAD(&flist->list);
			
 
				-		mutex_init(&flist->lock);
			
 
				-	}
			
 
				-
			
 
				-	err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
			
 
				-	if (err) {
			
 
				-		free_percpu(mcryptd_flist);
			
 
				-		return err;
			
 
				-	}
			
 
				-
			
 
				-	err = crypto_register_template(&mcryptd_tmpl);
			
 
				-	if (err) {
			
 
				-		mcryptd_fini_queue(&mqueue);
			
 
				-		free_percpu(mcryptd_flist);
			
 
				-	}
			
 
				-
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static void __exit mcryptd_exit(void)
			
 
				-{
			
 
				-	mcryptd_fini_queue(&mqueue);
			
 
				-	crypto_unregister_template(&mcryptd_tmpl);
			
 
				-	free_percpu(mcryptd_flist);
			
 
				-}
			
 
				-
			
 
				-subsys_initcall(mcryptd_init);
			
 
				-module_exit(mcryptd_exit);
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
			
 
				-MODULE_ALIAS_CRYPTO("mcryptd");
			
--- a/crypto/morus1280.c
+++ b/crypto/morus1280.c
@@ -385,14 +385,11 @@ static void crypto_morus1280_final(struct morus1280_state *state,
 
				 				   struct morus1280_block *tag_xor,
			
 
				 				   u64 assoclen, u64 cryptlen)
			
 
				 {
			
 
				-	u64 assocbits = assoclen * 8;
			
 
				-	u64 cryptbits = cryptlen * 8;
			
 
				-
			
 
				 	struct morus1280_block tmp;
			
 
				 	unsigned int i;
			
 
				 
			
 
				-	tmp.words[0] = cpu_to_le64(assocbits);
			
 
				-	tmp.words[1] = cpu_to_le64(cryptbits);
			
 
				+	tmp.words[0] = assoclen * 8;
			
 
				+	tmp.words[1] = cryptlen * 8;
			
 
				 	tmp.words[2] = 0;
			
 
				 	tmp.words[3] = 0;
			
 
				 
			
--- a/crypto/morus640.c
+++ b/crypto/morus640.c
@@ -384,21 +384,13 @@ static void crypto_morus640_final(struct morus640_state *state,
 
				 				  struct morus640_block *tag_xor,
			
 
				 				  u64 assoclen, u64 cryptlen)
			
 
				 {
			
 
				-	u64 assocbits = assoclen * 8;
			
 
				-	u64 cryptbits = cryptlen * 8;
			
 
				-
			
 
				-	u32 assocbits_lo = (u32)assocbits;
			
 
				-	u32 assocbits_hi = (u32)(assocbits >> 32);
			
 
				-	u32 cryptbits_lo = (u32)cryptbits;
			
 
				-	u32 cryptbits_hi = (u32)(cryptbits >> 32);
			
 
				-
			
 
				 	struct morus640_block tmp;
			
 
				 	unsigned int i;
			
 
				 
			
 
				-	tmp.words[0] = cpu_to_le32(assocbits_lo);
			
 
				-	tmp.words[1] = cpu_to_le32(assocbits_hi);
			
 
				-	tmp.words[2] = cpu_to_le32(cryptbits_lo);
			
 
				-	tmp.words[3] = cpu_to_le32(cryptbits_hi);
			
 
				+	tmp.words[0] = lower_32_bits(assoclen * 8);
			
 
				+	tmp.words[1] = upper_32_bits(assoclen * 8);
			
 
				+	tmp.words[2] = lower_32_bits(cryptlen * 8);
			
 
				+	tmp.words[3] = upper_32_bits(cryptlen * 8);
			
 
				 
			
 
				 	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
			
 
				 		state->s[4].words[i] ^= state->s[0].words[i];
			
--- a/crypto/ofb.c
+++ b/crypto/ofb.c
@@ -0,0 +1,225 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+
			
 
				+/*
			
 
				+ * OFB: Output FeedBack mode
			
 
				+ *
			
 
				+ * Copyright (C) 2018 ARM Limited or its affiliates.
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * Based loosely on public domain code gleaned from libtomcrypt
			
 
				+ * (https://github.com/libtom/libtomcrypt).
			
 
				+ */
			
 
				+
			
 
				+#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				+#include <linux/err.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/scatterlist.h>
			
 
				+#include <linux/slab.h>
			
 
				+
			
 
				+struct crypto_ofb_ctx {
			
 
				+	struct crypto_cipher *child;
			
 
				+	int cnt;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+static int crypto_ofb_setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				+			     unsigned int keylen)
			
 
				+{
			
 
				+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				+	struct crypto_cipher *child = ctx->child;
			
 
				+	int err;
			
 
				+
			
 
				+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+				       CRYPTO_TFM_REQ_MASK);
			
 
				+	err = crypto_cipher_setkey(child, key, keylen);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				+				  CRYPTO_TFM_RES_MASK);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int crypto_ofb_encrypt_segment(struct crypto_ofb_ctx *ctx,
			
 
				+				      struct skcipher_walk *walk,
			
 
				+				      struct crypto_cipher *tfm)
			
 
				+{
			
 
				+	int bsize = crypto_cipher_blocksize(tfm);
			
 
				+	int nbytes = walk->nbytes;
			
 
				+
			
 
				+	u8 *src = walk->src.virt.addr;
			
 
				+	u8 *dst = walk->dst.virt.addr;
			
 
				+	u8 *iv = walk->iv;
			
 
				+
			
 
				+	do {
			
 
				+		if (ctx->cnt == bsize) {
			
 
				+			if (nbytes < bsize)
			
 
				+				break;
			
 
				+			crypto_cipher_encrypt_one(tfm, iv, iv);
			
 
				+			ctx->cnt = 0;
			
 
				+		}
			
 
				+		*dst = *src ^ iv[ctx->cnt];
			
 
				+		src++;
			
 
				+		dst++;
			
 
				+		ctx->cnt++;
			
 
				+	} while (--nbytes);
			
 
				+	return nbytes;
			
 
				+}
			
 
				+
			
 
				+static int crypto_ofb_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct skcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	unsigned int bsize;
			
 
				+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_cipher *child = ctx->child;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	bsize =  crypto_cipher_blocksize(child);
			
 
				+	ctx->cnt = bsize;
			
 
				+
			
 
				+	ret = skcipher_walk_virt(&walk, req, false);
			
 
				+
			
 
				+	while (walk.nbytes) {
			
 
				+		ret = crypto_ofb_encrypt_segment(ctx, &walk, child);
			
 
				+		ret = skcipher_walk_done(&walk, ret);
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* OFB encrypt and decrypt are identical */
			
 
				+static int crypto_ofb_decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return crypto_ofb_encrypt(req);
			
 
				+}
			
 
				+
			
 
				+static int crypto_ofb_init_tfm(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
			
 
				+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_cipher *cipher;
			
 
				+
			
 
				+	cipher = crypto_spawn_cipher(spawn);
			
 
				+	if (IS_ERR(cipher))
			
 
				+		return PTR_ERR(cipher);
			
 
				+
			
 
				+	ctx->child = cipher;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void crypto_ofb_exit_tfm(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	crypto_free_cipher(ctx->child);
			
 
				+}
			
 
				+
			
 
				+static void crypto_ofb_free(struct skcipher_instance *inst)
			
 
				+{
			
 
				+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				+	kfree(inst);
			
 
				+}
			
 
				+
			
 
				+static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				+{
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct crypto_attr_type *algt;
			
 
				+	struct crypto_spawn *spawn;
			
 
				+	struct crypto_alg *alg;
			
 
				+	u32 mask;
			
 
				+	int err;
			
 
				+
			
 
				+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	algt = crypto_get_attr_type(tb);
			
 
				+	err = PTR_ERR(algt);
			
 
				+	if (IS_ERR(algt))
			
 
				+		goto err_free_inst;
			
 
				+
			
 
				+	mask = CRYPTO_ALG_TYPE_MASK |
			
 
				+		crypto_requires_off(algt->type, algt->mask,
			
 
				+				    CRYPTO_ALG_NEED_FALLBACK);
			
 
				+
			
 
				+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
			
 
				+	err = PTR_ERR(alg);
			
 
				+	if (IS_ERR(alg))
			
 
				+		goto err_free_inst;
			
 
				+
			
 
				+	spawn = skcipher_instance_ctx(inst);
			
 
				+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
			
 
				+				CRYPTO_ALG_TYPE_MASK);
			
 
				+	crypto_mod_put(alg);
			
 
				+	if (err)
			
 
				+		goto err_free_inst;
			
 
				+
			
 
				+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "ofb", alg);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+	inst->alg.base.cra_priority = alg->cra_priority;
			
 
				+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
			
 
				+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
			
 
				+
			
 
				+	/* We access the data as u32s when xoring. */
			
 
				+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
			
 
				+
			
 
				+	inst->alg.ivsize = alg->cra_blocksize;
			
 
				+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
			
 
				+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
			
 
				+
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_ofb_ctx);
			
 
				+
			
 
				+	inst->alg.init = crypto_ofb_init_tfm;
			
 
				+	inst->alg.exit = crypto_ofb_exit_tfm;
			
 
				+
			
 
				+	inst->alg.setkey = crypto_ofb_setkey;
			
 
				+	inst->alg.encrypt = crypto_ofb_encrypt;
			
 
				+	inst->alg.decrypt = crypto_ofb_decrypt;
			
 
				+
			
 
				+	inst->free = crypto_ofb_free;
			
 
				+
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+
			
 
				+err_drop_spawn:
			
 
				+	crypto_drop_spawn(spawn);
			
 
				+err_free_inst:
			
 
				+	kfree(inst);
			
 
				+	goto out;
			
 
				+}
			
 
				+
			
 
				+static struct crypto_template crypto_ofb_tmpl = {
			
 
				+	.name = "ofb",
			
 
				+	.create = crypto_ofb_create,
			
 
				+	.module = THIS_MODULE,
			
 
				+};
			
 
				+
			
 
				+static int __init crypto_ofb_module_init(void)
			
 
				+{
			
 
				+	return crypto_register_template(&crypto_ofb_tmpl);
			
 
				+}
			
 
				+
			
 
				+static void __exit crypto_ofb_module_exit(void)
			
 
				+{
			
 
				+	crypto_unregister_template(&crypto_ofb_tmpl);
			
 
				+}
			
 
				+
			
 
				+module_init(crypto_ofb_module_init);
			
 
				+module_exit(crypto_ofb_module_exit);
			
 
				+
			
 
				+MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("OFB block cipher algorithm");
			
 
				+MODULE_ALIAS_CRYPTO("ofb");
			
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -50,6 +50,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 
				 	}
			
 
				 
			
 
				 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
			
 
				+	crypto_stat_rng_seed(tfm, err);
			
 
				 out:
			
 
				 	kzfree(buf);
			
 
				 	return err;
			
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
 
				 	pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
			
 
				 			ctx->key_size - 1 - req->src_len, req->src);
			
 
				 
			
 
				-	req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
			
 
				-	if (!req_ctx->out_buf) {
			
 
				-		kfree(req_ctx->in_buf);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf,
			
 
				-			ctx->key_size, NULL);
			
 
				-
			
 
				 	akcipher_request_set_tfm(&req_ctx->child_req, ctx->child);
			
 
				 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
			
 
				 			pkcs1pad_encrypt_sign_complete_cb, req);
			
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -73,9 +73,9 @@ static int seqiv_aead_encrypt(struct aead_request *req)
 
				 	info = req->iv;
			
 
				 
			
 
				 	if (req->src != req->dst) {
			
 
				-		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
			
 
				+		SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
			
 
				 
			
 
				-		skcipher_request_set_tfm(nreq, ctx->sknull);
			
 
				+		skcipher_request_set_sync_tfm(nreq, ctx->sknull);
			
 
				 		skcipher_request_set_callback(nreq, req->base.flags,
			
 
				 					      NULL, NULL);
			
 
				 		skcipher_request_set_crypt(nreq, req->src, req->dst,
			
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -73,13 +73,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_shash_setkey);
			
 
				 
			
 
				-static inline unsigned int shash_align_buffer_size(unsigned len,
			
 
				-						   unsigned long mask)
			
 
				-{
			
 
				-	typedef u8 __aligned_largest u8_aligned;
			
 
				-	return len + (mask & ~(__alignof__(u8_aligned) - 1));
			
 
				-}
			
 
				-
			
 
				 static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
			
 
				 				  unsigned int len)
			
 
				 {
			
@@ -88,11 +81,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 
				 	unsigned long alignmask = crypto_shash_alignmask(tfm);
			
 
				 	unsigned int unaligned_len = alignmask + 1 -
			
 
				 				     ((unsigned long)data & alignmask);
			
 
				-	u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
			
 
				-		__aligned_largest;
			
 
				+	/*
			
 
				+	 * We cannot count on __aligned() working for large values:
			
 
				+	 * https://patchwork.kernel.org/patch/9507697/
			
 
				+	 */
			
 
				+	u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
			
 
				 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
			
 
				 	int err;
			
 
				 
			
 
				+	if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf)))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (unaligned_len > len)
			
 
				 		unaligned_len = len;
			
 
				 
			
@@ -124,11 +123,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
 
				 	unsigned long alignmask = crypto_shash_alignmask(tfm);
			
 
				 	struct shash_alg *shash = crypto_shash_alg(tfm);
			
 
				 	unsigned int ds = crypto_shash_digestsize(tfm);
			
 
				-	u8 ubuf[shash_align_buffer_size(ds, alignmask)]
			
 
				-		__aligned_largest;
			
 
				+	/*
			
 
				+	 * We cannot count on __aligned() working for large values:
			
 
				+	 * https://patchwork.kernel.org/patch/9507697/
			
 
				+	 */
			
 
				+	u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
			
 
				 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
			
 
				 	int err;
			
 
				 
			
 
				+	if (WARN_ON(buf + ds > ubuf + sizeof(ubuf)))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	err = shash->final(desc, buf);
			
 
				 	if (err)
			
 
				 		goto out;
			
@@ -458,9 +463,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
 
				 {
			
 
				 	struct crypto_alg *base = &alg->base;
			
 
				 
			
 
				-	if (alg->digestsize > PAGE_SIZE / 8 ||
			
 
				-	    alg->descsize > PAGE_SIZE / 8 ||
			
 
				-	    alg->statesize > PAGE_SIZE / 8)
			
 
				+	if (alg->digestsize > HASH_MAX_DIGESTSIZE ||
			
 
				+	    alg->descsize > HASH_MAX_DESCSIZE ||
			
 
				+	    alg->statesize > HASH_MAX_STATESIZE)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	base->cra_type = &crypto_shash_type;
			
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -949,6 +949,30 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
			
 
				 
			
 
				+struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
			
 
				+				const char *alg_name, u32 type, u32 mask)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm;
			
 
				+
			
 
				+	/* Only sync algorithms allowed. */
			
 
				+	mask |= CRYPTO_ALG_ASYNC;
			
 
				+
			
 
				+	tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
			
 
				+
			
 
				+	/*
			
 
				+	 * Make sure we do not allocate something that might get used with
			
 
				+	 * an on-stack request: check the request size.
			
 
				+	 */
			
 
				+	if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) >
			
 
				+				    MAX_SYNC_SKCIPHER_REQSIZE)) {
			
 
				+		crypto_free_skcipher(tfm);
			
 
				+		return ERR_PTR(-EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	return (struct crypto_sync_skcipher *)tfm;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher);
			
 
				+
			
 
				 int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
			
 
				 {
			
 
				 	return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
			
--- a/crypto/speck.c
+++ b/crypto/speck.c
@@ -1,307 +0,0 @@
 
				-// SPDX-License-Identifier: GPL-2.0
			
 
				-/*
			
 
				- * Speck: a lightweight block cipher
			
 
				- *
			
 
				- * Copyright (c) 2018 Google, Inc
			
 
				- *
			
 
				- * Speck has 10 variants, including 5 block sizes.  For now we only implement
			
 
				- * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
			
 
				- * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
			
 
				- * and a key size of K bits.  The Speck128 variants are believed to be the most
			
 
				- * secure variants, and they use the same block size and key sizes as AES.  The
			
 
				- * Speck64 variants are less secure, but on 32-bit processors are usually
			
 
				- * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
			
 
				- * secure and/or not as well suited for implementation on either 32-bit or
			
 
				- * 64-bit processors, so are omitted.
			
 
				- *
			
 
				- * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
			
 
				- * https://eprint.iacr.org/2013/404.pdf
			
 
				- *
			
 
				- * In a correspondence, the Speck designers have also clarified that the words
			
 
				- * should be interpreted in little-endian format, and the words should be
			
 
				- * ordered such that the first word of each block is 'y' rather than 'x', and
			
 
				- * the first key word (rather than the last) becomes the first round key.
			
 
				- */
			
 
				-
			
 
				-#include <asm/unaligned.h>
			
 
				-#include <crypto/speck.h>
			
 
				-#include <linux/bitops.h>
			
 
				-#include <linux/crypto.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/module.h>
			
 
				-
			
 
				-/* Speck128 */
			
 
				-
			
 
				-static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
			
 
				-{
			
 
				-	*x = ror64(*x, 8);
			
 
				-	*x += *y;
			
 
				-	*x ^= k;
			
 
				-	*y = rol64(*y, 3);
			
 
				-	*y ^= *x;
			
 
				-}
			
 
				-
			
 
				-static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
			
 
				-{
			
 
				-	*y ^= *x;
			
 
				-	*y = ror64(*y, 3);
			
 
				-	*x ^= k;
			
 
				-	*x -= *y;
			
 
				-	*x = rol64(*x, 8);
			
 
				-}
			
 
				-
			
 
				-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
			
 
				-			     u8 *out, const u8 *in)
			
 
				-{
			
 
				-	u64 y = get_unaligned_le64(in);
			
 
				-	u64 x = get_unaligned_le64(in + 8);
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < ctx->nrounds; i++)
			
 
				-		speck128_round(&x, &y, ctx->round_keys[i]);
			
 
				-
			
 
				-	put_unaligned_le64(y, out);
			
 
				-	put_unaligned_le64(x, out + 8);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
			
 
				-
			
 
				-static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
			
 
				-{
			
 
				-	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
			
 
				-}
			
 
				-
			
 
				-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
			
 
				-			     u8 *out, const u8 *in)
			
 
				-{
			
 
				-	u64 y = get_unaligned_le64(in);
			
 
				-	u64 x = get_unaligned_le64(in + 8);
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = ctx->nrounds - 1; i >= 0; i--)
			
 
				-		speck128_unround(&x, &y, ctx->round_keys[i]);
			
 
				-
			
 
				-	put_unaligned_le64(y, out);
			
 
				-	put_unaligned_le64(x, out + 8);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
			
 
				-
			
 
				-static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
			
 
				-{
			
 
				-	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
			
 
				-}
			
 
				-
			
 
				-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
			
 
				-			   unsigned int keylen)
			
 
				-{
			
 
				-	u64 l[3];
			
 
				-	u64 k;
			
 
				-	int i;
			
 
				-
			
 
				-	switch (keylen) {
			
 
				-	case SPECK128_128_KEY_SIZE:
			
 
				-		k = get_unaligned_le64(key);
			
 
				-		l[0] = get_unaligned_le64(key + 8);
			
 
				-		ctx->nrounds = SPECK128_128_NROUNDS;
			
 
				-		for (i = 0; i < ctx->nrounds; i++) {
			
 
				-			ctx->round_keys[i] = k;
			
 
				-			speck128_round(&l[0], &k, i);
			
 
				-		}
			
 
				-		break;
			
 
				-	case SPECK128_192_KEY_SIZE:
			
 
				-		k = get_unaligned_le64(key);
			
 
				-		l[0] = get_unaligned_le64(key + 8);
			
 
				-		l[1] = get_unaligned_le64(key + 16);
			
 
				-		ctx->nrounds = SPECK128_192_NROUNDS;
			
 
				-		for (i = 0; i < ctx->nrounds; i++) {
			
 
				-			ctx->round_keys[i] = k;
			
 
				-			speck128_round(&l[i % 2], &k, i);
			
 
				-		}
			
 
				-		break;
			
 
				-	case SPECK128_256_KEY_SIZE:
			
 
				-		k = get_unaligned_le64(key);
			
 
				-		l[0] = get_unaligned_le64(key + 8);
			
 
				-		l[1] = get_unaligned_le64(key + 16);
			
 
				-		l[2] = get_unaligned_le64(key + 24);
			
 
				-		ctx->nrounds = SPECK128_256_NROUNDS;
			
 
				-		for (i = 0; i < ctx->nrounds; i++) {
			
 
				-			ctx->round_keys[i] = k;
			
 
				-			speck128_round(&l[i % 3], &k, i);
			
 
				-		}
			
 
				-		break;
			
 
				-	default:
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
			
 
				-
			
 
				-static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
			
 
				-			   unsigned int keylen)
			
 
				-{
			
 
				-	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
			
 
				-}
			
 
				-
			
 
				-/* Speck64 */
			
 
				-
			
 
				-static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
			
 
				-{
			
 
				-	*x = ror32(*x, 8);
			
 
				-	*x += *y;
			
 
				-	*x ^= k;
			
 
				-	*y = rol32(*y, 3);
			
 
				-	*y ^= *x;
			
 
				-}
			
 
				-
			
 
				-static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
			
 
				-{
			
 
				-	*y ^= *x;
			
 
				-	*y = ror32(*y, 3);
			
 
				-	*x ^= k;
			
 
				-	*x -= *y;
			
 
				-	*x = rol32(*x, 8);
			
 
				-}
			
 
				-
			
 
				-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
			
 
				-			    u8 *out, const u8 *in)
			
 
				-{
			
 
				-	u32 y = get_unaligned_le32(in);
			
 
				-	u32 x = get_unaligned_le32(in + 4);
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < ctx->nrounds; i++)
			
 
				-		speck64_round(&x, &y, ctx->round_keys[i]);
			
 
				-
			
 
				-	put_unaligned_le32(y, out);
			
 
				-	put_unaligned_le32(x, out + 4);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
			
 
				-
			
 
				-static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
			
 
				-{
			
 
				-	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
			
 
				-}
			
 
				-
			
 
				-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
			
 
				-			    u8 *out, const u8 *in)
			
 
				-{
			
 
				-	u32 y = get_unaligned_le32(in);
			
 
				-	u32 x = get_unaligned_le32(in + 4);
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = ctx->nrounds - 1; i >= 0; i--)
			
 
				-		speck64_unround(&x, &y, ctx->round_keys[i]);
			
 
				-
			
 
				-	put_unaligned_le32(y, out);
			
 
				-	put_unaligned_le32(x, out + 4);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
			
 
				-
			
 
				-static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
			
 
				-{
			
 
				-	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
			
 
				-}
			
 
				-
			
 
				-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
			
 
				-			  unsigned int keylen)
			
 
				-{
			
 
				-	u32 l[3];
			
 
				-	u32 k;
			
 
				-	int i;
			
 
				-
			
 
				-	switch (keylen) {
			
 
				-	case SPECK64_96_KEY_SIZE:
			
 
				-		k = get_unaligned_le32(key);
			
 
				-		l[0] = get_unaligned_le32(key + 4);
			
 
				-		l[1] = get_unaligned_le32(key + 8);
			
 
				-		ctx->nrounds = SPECK64_96_NROUNDS;
			
 
				-		for (i = 0; i < ctx->nrounds; i++) {
			
 
				-			ctx->round_keys[i] = k;
			
 
				-			speck64_round(&l[i % 2], &k, i);
			
 
				-		}
			
 
				-		break;
			
 
				-	case SPECK64_128_KEY_SIZE:
			
 
				-		k = get_unaligned_le32(key);
			
 
				-		l[0] = get_unaligned_le32(key + 4);
			
 
				-		l[1] = get_unaligned_le32(key + 8);
			
 
				-		l[2] = get_unaligned_le32(key + 12);
			
 
				-		ctx->nrounds = SPECK64_128_NROUNDS;
			
 
				-		for (i = 0; i < ctx->nrounds; i++) {
			
 
				-			ctx->round_keys[i] = k;
			
 
				-			speck64_round(&l[i % 3], &k, i);
			
 
				-		}
			
 
				-		break;
			
 
				-	default:
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
			
 
				-
			
 
				-static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
			
 
				-			  unsigned int keylen)
			
 
				-{
			
 
				-	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
			
 
				-}
			
 
				-
			
 
				-/* Algorithm definitions */
			
 
				-
			
 
				-static struct crypto_alg speck_algs[] = {
			
 
				-	{
			
 
				-		.cra_name		= "speck128",
			
 
				-		.cra_driver_name	= "speck128-generic",
			
 
				-		.cra_priority		= 100,
			
 
				-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
			
 
				-		.cra_blocksize		= SPECK128_BLOCK_SIZE,
			
 
				-		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
			
 
				-		.cra_module		= THIS_MODULE,
			
 
				-		.cra_u			= {
			
 
				-			.cipher = {
			
 
				-				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
			
 
				-				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
			
 
				-				.cia_setkey		= speck128_setkey,
			
 
				-				.cia_encrypt		= speck128_encrypt,
			
 
				-				.cia_decrypt		= speck128_decrypt
			
 
				-			}
			
 
				-		}
			
 
				-	}, {
			
 
				-		.cra_name		= "speck64",
			
 
				-		.cra_driver_name	= "speck64-generic",
			
 
				-		.cra_priority		= 100,
			
 
				-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
			
 
				-		.cra_blocksize		= SPECK64_BLOCK_SIZE,
			
 
				-		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
			
 
				-		.cra_module		= THIS_MODULE,
			
 
				-		.cra_u			= {
			
 
				-			.cipher = {
			
 
				-				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
			
 
				-				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
			
 
				-				.cia_setkey		= speck64_setkey,
			
 
				-				.cia_encrypt		= speck64_encrypt,
			
 
				-				.cia_decrypt		= speck64_decrypt
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-static int __init speck_module_init(void)
			
 
				-{
			
 
				-	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
			
 
				-}
			
 
				-
			
 
				-static void __exit speck_module_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
			
 
				-}
			
 
				-
			
 
				-module_init(speck_module_init);
			
 
				-module_exit(speck_module_exit);
			
 
				-
			
 
				-MODULE_DESCRIPTION("Speck block cipher (generic)");
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
			
 
				-MODULE_ALIAS_CRYPTO("speck128");
			
 
				-MODULE_ALIAS_CRYPTO("speck128-generic");
			
 
				-MODULE_ALIAS_CRYPTO("speck64");
			
 
				-MODULE_ALIAS_CRYPTO("speck64-generic");
			
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -76,8 +76,7 @@ static char *check[] = {
 
				 	"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
			
 
				 	"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta",  "fcrypt",
			
 
				 	"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
			
 
				-	"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
			
 
				-	NULL
			
 
				+	"lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL
			
 
				 };
			
 
				 
			
 
				 static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
			
@@ -1103,6 +1102,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				+		if (speed[i].klen)
			
 
				+			crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
			
 
				+
			
 
				 		pr_info("test%3u "
			
 
				 			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
			
 
				 			i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
			
@@ -1733,6 +1735,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
				 		ret += tcrypt_test("xts(aes)");
			
 
				 		ret += tcrypt_test("ctr(aes)");
			
 
				 		ret += tcrypt_test("rfc3686(ctr(aes))");
			
 
				+		ret += tcrypt_test("ofb(aes)");
			
 
				 		break;
			
 
				 
			
 
				 	case 11:
			
@@ -1878,10 +1881,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
				 		ret += tcrypt_test("ecb(seed)");
			
 
				 		break;
			
 
				 
			
 
				-	case 44:
			
 
				-		ret += tcrypt_test("zlib");
			
 
				-		break;
			
 
				-
			
 
				 	case 45:
			
 
				 		ret += tcrypt_test("rfc4309(ccm(aes))");
			
 
				 		break;
			
@@ -2033,6 +2032,8 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
				 		break;
			
 
				 	case 191:
			
 
				 		ret += tcrypt_test("ecb(sm4)");
			
 
				+		ret += tcrypt_test("cbc(sm4)");
			
 
				+		ret += tcrypt_test("ctr(sm4)");
			
 
				 		break;
			
 
				 	case 200:
			
 
				 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
			
@@ -2282,6 +2283,20 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 
				 				   num_mb);
			
 
				 		break;
			
 
				 
			
 
				+	case 218:
			
 
				+		test_cipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
			
 
				+				speed_template_16);
			
 
				+		test_cipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
			
 
				+				speed_template_16);
			
 
				+		test_cipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
			
 
				+				speed_template_16);
			
 
				+		test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
			
 
				+				speed_template_16);
			
 
				+		test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
			
 
				+				speed_template_16);
			
 
				+		test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
			
 
				+				speed_template_16);
			
 
				+		break;
			
 
				 	case 300:
			
 
				 		if (alg) {
			
 
				 			test_hash_speed(alg, sec, generic_hash_speed_template);