9 лет назад · 0f1d6dfe03
--- a/Documentation/crypto/api-intro.txt
+++ b/Documentation/crypto/api-intro.txt
@@ -44,12 +44,9 @@ one block while the former can operate on an arbitrary amount of data,
 
				 subject to block size requirements (i.e., non-stream ciphers can only
			
 
				 process multiples of blocks).
			
 
				 
			
 
				-Support for hardware crypto devices via an asynchronous interface is
			
 
				-under development.
			
 
				-
			
 
				 Here's an example of how to use the API:
			
 
				 
			
 
				-	#include <crypto/ahash.h>
			
 
				+	#include <crypto/hash.h>
			
 
				 	#include <linux/err.h>
			
 
				 	#include <linux/scatterlist.h>
			
 
				 	
			
--- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -123,6 +123,9 @@ PROPERTIES
 
				 
			
 
				 
			
 
				 EXAMPLE
			
 
				+
			
 
				+iMX6QDL/SX requires four clocks
			
 
				+
			
 
				 	crypto@300000 {
			
 
				 		compatible = "fsl,sec-v4.0";
			
 
				 		fsl,sec-era = <2>;
			
@@ -139,6 +142,23 @@ EXAMPLE
 
				 		clock-names = "mem", "aclk", "ipg", "emi_slow";
			
 
				 	};
			
 
				 
			
 
				+
			
 
				+iMX6UL does only require three clocks
			
 
				+
			
 
				+	crypto: caam@2140000 {
			
 
				+		compatible = "fsl,sec-v4.0";
			
 
				+		#address-cells = <1>;
			
 
				+		#size-cells = <1>;
			
 
				+		reg = <0x2140000 0x3c000>;
			
 
				+		ranges = <0 0x2140000 0x3c000>;
			
 
				+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
			
 
				+
			
 
				+		clocks = <&clks IMX6UL_CLK_CAAM_MEM>,
			
 
				+			 <&clks IMX6UL_CLK_CAAM_ACLK>,
			
 
				+			 <&clks IMX6UL_CLK_CAAM_IPG>;
			
 
				+		clock-names = "mem", "aclk", "ipg";
			
 
				+	};
			
 
				+
			
 
				 =====================================================================
			
 
				 Job Ring (JR) Node
			
 
				 
			
--- a/Documentation/devicetree/bindings/rng/omap_rng.txt
+++ b/Documentation/devicetree/bindings/rng/omap_rng.txt
@@ -1,4 +1,4 @@
 
				-OMAP SoC HWRNG Module
			
 
				+OMAP SoC and Inside-Secure HWRNG Module
			
 
				 
			
 
				 Required properties:
			
 
				 
			
@@ -6,11 +6,13 @@ Required properties:
 
				   RNG versions:
			
 
				   - "ti,omap2-rng" for OMAP2.
			
 
				   - "ti,omap4-rng" for OMAP4, OMAP5 and AM33XX.
			
 
				+  - "inside-secure,safexcel-eip76" for SoCs with EIP76 IP block
			
 
				   Note that these two versions are incompatible.
			
 
				 - ti,hwmods: Name of the hwmod associated with the RNG module
			
 
				 - reg : Offset and length of the register set for the module
			
 
				 - interrupts : the interrupt number for the RNG module.
			
 
				-		Only used for "ti,omap4-rng".
			
 
				+		Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
			
 
				+- clocks: the trng clock source
			
 
				 
			
 
				 Example:
			
 
				 /* AM335x */
			
@@ -20,3 +22,11 @@ rng: rng@48310000 {
 
				 	reg = <0x48310000 0x2000>;
			
 
				 	interrupts = <111>;
			
 
				 };
			
 
				+
			
 
				+/* SafeXcel IP-76 */
			
 
				+trng: rng@f2760000 {
			
 
				+	compatible = "inside-secure,safexcel-eip76";
			
 
				+	reg = <0xf2760000 0x7d>;
			
 
				+	interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
			
 
				+	clocks = <&cpm_syscon0 1 25>;
			
 
				+};
			
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -137,6 +137,7 @@ infineon Infineon Technologies
 
				 inforce	Inforce Computing
			
 
				 ingenic	Ingenic Semiconductor
			
 
				 innolux	Innolux Corporation
			
 
				+inside-secure	INSIDE Secure
			
 
				 intel	Intel Corporation
			
 
				 intercontrol	Inter Control Group
			
 
				 invensense	InvenSense Inc.
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3470,6 +3470,7 @@ F:	arch/*/crypto/
 
				 F:	crypto/
			
 
				 F:	drivers/crypto/
			
 
				 F:	include/crypto/
			
 
				+F:	include/linux/crypto*
			
 
				 
			
 
				 CRYPTOGRAPHIC RANDOM NUMBER GENERATOR
			
 
				 M:	Neil Horman <nhorman@tuxdriver.com>
			
@@ -5086,6 +5087,14 @@ F:	include/linux/fb.h
 
				 F:	include/uapi/video/
			
 
				 F:	include/uapi/linux/fb.h
			
 
				 
			
 
				+FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
			
 
				+M:	Horia Geantă <horia.geanta@nxp.com>
			
 
				+M:	Dan Douglass <dan.douglass@nxp.com>
			
 
				+L:	linux-crypto@vger.kernel.org
			
 
				+S:	Maintained
			
 
				+F:	drivers/crypto/caam/
			
 
				+F:	Documentation/devicetree/bindings/crypto/fsl-sec4.txt
			
 
				+
			
 
				 FREESCALE DIU FRAMEBUFFER DRIVER
			
 
				 M:	Timur Tabi <timur@tabi.org>
			
 
				 L:	linux-fbdev@vger.kernel.org
			
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -88,9 +88,9 @@ config CRYPTO_AES_ARM
 
				 config CRYPTO_AES_ARM_BS
			
 
				 	tristate "Bit sliced AES using NEON instructions"
			
 
				 	depends on KERNEL_MODE_NEON
			
 
				-	select CRYPTO_ALGAPI
			
 
				 	select CRYPTO_AES_ARM
			
 
				-	select CRYPTO_ABLK_HELPER
			
 
				+	select CRYPTO_BLKCIPHER
			
 
				+	select CRYPTO_SIMD
			
 
				 	help
			
 
				 	  Use a faster and more secure NEON based implementation of AES in CBC,
			
 
				 	  CTR and XTS modes
			
@@ -104,8 +104,8 @@ config CRYPTO_AES_ARM_BS
 
				 config CRYPTO_AES_ARM_CE
			
 
				 	tristate "Accelerated AES using ARMv8 Crypto Extensions"
			
 
				 	depends on KERNEL_MODE_NEON
			
 
				-	select CRYPTO_ALGAPI
			
 
				-	select CRYPTO_ABLK_HELPER
			
 
				+	select CRYPTO_BLKCIPHER
			
 
				+	select CRYPTO_SIMD
			
 
				 	help
			
 
				 	  Use an implementation of AES in CBC, CTR and XTS modes that uses
			
 
				 	  ARMv8 Crypto Extensions
			
@@ -120,4 +120,14 @@ config CRYPTO_GHASH_ARM_CE
 
				 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
			
 
				 	  that is part of the ARMv8 Crypto Extensions
			
 
				 
			
 
				+config CRYPTO_CRCT10DIF_ARM_CE
			
 
				+	tristate "CRCT10DIF digest algorithm using PMULL instructions"
			
 
				+	depends on KERNEL_MODE_NEON && CRC_T10DIF
			
 
				+	select CRYPTO_HASH
			
 
				+
			
 
				+config CRYPTO_CRC32_ARM_CE
			
 
				+	tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
			
 
				+	depends on KERNEL_MODE_NEON && CRC32
			
 
				+	select CRYPTO_HASH
			
 
				+
			
 
				 endif
			
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -13,6 +13,8 @@ ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 
				 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
			
 
				 ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
			
 
				 ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
			
 
				+ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
			
 
				+ce-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
			
 
				 
			
 
				 ifneq ($(ce-obj-y)$(ce-obj-m),)
			
 
				 ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
			
@@ -36,6 +38,8 @@ sha1-arm-ce-y	:= sha1-ce-core.o sha1-ce-glue.o
 
				 sha2-arm-ce-y	:= sha2-ce-core.o sha2-ce-glue.o
			
 
				 aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
			
 
				 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
			
 
				+crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
			
 
				+crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
			
 
				 
			
 
				 quiet_cmd_perl = PERL    $@
			
 
				       cmd_perl = $(PERL) $(<) > $(@)
			
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -12,8 +12,8 @@
 
				 #include <asm/neon.h>
			
 
				 #include <asm/hwcap.h>
			
 
				 #include <crypto/aes.h>
			
 
				-#include <crypto/ablk_helper.h>
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/simd.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <crypto/xts.h>
			
 
				 
			
@@ -88,8 +88,13 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
				 		u32 *rki = ctx->key_enc + (i * kwords);
			
 
				 		u32 *rko = rki + kwords;
			
 
				 
			
 
				+#ifndef CONFIG_CPU_BIG_ENDIAN
			
 
				 		rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
			
 
				 		rko[0] = rko[0] ^ rki[0] ^ rcon[i];
			
 
				+#else
			
 
				+		rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
			
 
				+		rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
			
 
				+#endif
			
 
				 		rko[1] = rko[0] ^ rki[1];
			
 
				 		rko[2] = rko[1] ^ rki[2];
			
 
				 		rko[3] = rko[2] ^ rki[3];
			
@@ -128,17 +133,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
			
 
				+static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				 			 unsigned int key_len)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int ret;
			
 
				 
			
 
				 	ret = ce_aes_expandkey(ctx, in_key, key_len);
			
 
				 	if (!ret)
			
 
				 		return 0;
			
 
				 
			
 
				-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
			
 
				+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 	return -EINVAL;
			
 
				 }
			
 
				 
			
@@ -147,13 +152,13 @@ struct crypto_aes_xts_ctx {
 
				 	struct crypto_aes_ctx __aligned(8) key2;
			
 
				 };
			
 
				 
			
 
				-static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
			
 
				+static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				 		       unsigned int key_len)
			
 
				 {
			
 
				-	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = xts_check_key(tfm, in_key, key_len);
			
 
				+	ret = xts_verify_key(tfm, in_key, key_len);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -164,130 +169,113 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
				 	if (!ret)
			
 
				 		return 0;
			
 
				 
			
 
				-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
			
 
				+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 	return -EINVAL;
			
 
				 }
			
 
				 
			
 
				-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int ecb_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 	int err;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int ecb_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 	int err;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int cbc_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 	int err;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
			
 
				 				   walk.iv);
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int cbc_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 	int err;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
			
 
				 				   walk.iv);
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int ctr_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	int err, blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
			
 
				 		ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
			
 
				 				   walk.iv);
			
 
				-		nbytes -= blocks * AES_BLOCK_SIZE;
			
 
				-		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
			
 
				-			break;
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				-	if (walk.nbytes % AES_BLOCK_SIZE) {
			
 
				-		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
			
 
				-		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
			
 
				+	if (walk.nbytes) {
			
 
				 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
			
 
				+		unsigned int nbytes = walk.nbytes;
			
 
				+		u8 *tdst = walk.dst.virt.addr;
			
 
				+		u8 *tsrc = walk.src.virt.addr;
			
 
				 
			
 
				 		/*
			
 
				 		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
			
@@ -298,231 +286,172 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 
				 		ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
			
 
				 				   num_rounds(ctx), blocks, walk.iv);
			
 
				 		memcpy(tdst, tail, nbytes);
			
 
				-		err = blkcipher_walk_done(desc, &walk, 0);
			
 
				+		err = skcipher_walk_done(&walk, 0);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int xts_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = num_rounds(&ctx->key1);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key1.key_enc, rounds, blocks,
			
 
				 				   walk.iv, (u8 *)ctx->key2.key_enc, first);
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int xts_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = num_rounds(&ctx->key1);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   (u8 *)ctx->key1.key_dec, rounds, blocks,
			
 
				 				   walk.iv, (u8 *)ctx->key2.key_enc, first);
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static struct crypto_alg aes_algs[] = { {
			
 
				-	.cra_name		= "__ecb-aes-ce",
			
 
				-	.cra_driver_name	= "__driver-ecb-aes-ce",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= 0,
			
 
				-		.setkey		= ce_aes_setkey,
			
 
				-		.encrypt	= ecb_encrypt,
			
 
				-		.decrypt	= ecb_decrypt,
			
 
				+static struct skcipher_alg aes_algs[] = { {
			
 
				+	.base = {
			
 
				+		.cra_name		= "__ecb(aes)",
			
 
				+		.cra_driver_name	= "__ecb-aes-ce",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.setkey		= ce_aes_setkey,
			
 
				+	.encrypt	= ecb_encrypt,
			
 
				+	.decrypt	= ecb_decrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__cbc-aes-ce",
			
 
				-	.cra_driver_name	= "__driver-cbc-aes-ce",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ce_aes_setkey,
			
 
				-		.encrypt	= cbc_encrypt,
			
 
				-		.decrypt	= cbc_decrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__cbc(aes)",
			
 
				+		.cra_driver_name	= "__cbc-aes-ce",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.setkey		= ce_aes_setkey,
			
 
				+	.encrypt	= cbc_encrypt,
			
 
				+	.decrypt	= cbc_decrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__ctr-aes-ce",
			
 
				-	.cra_driver_name	= "__driver-ctr-aes-ce",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ce_aes_setkey,
			
 
				-		.encrypt	= ctr_encrypt,
			
 
				-		.decrypt	= ctr_encrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__ctr(aes)",
			
 
				+		.cra_driver_name	= "__ctr-aes-ce",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= 1,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.chunksize	= AES_BLOCK_SIZE,
			
 
				+	.setkey		= ce_aes_setkey,
			
 
				+	.encrypt	= ctr_encrypt,
			
 
				+	.decrypt	= ctr_encrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__xts-aes-ce",
			
 
				-	.cra_driver_name	= "__driver-xts-aes-ce",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= xts_set_key,
			
 
				-		.encrypt	= xts_encrypt,
			
 
				-		.decrypt	= xts_decrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__xts(aes)",
			
 
				+		.cra_driver_name	= "__xts-aes-ce",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				-}, {
			
 
				-	.cra_name		= "ecb(aes)",
			
 
				-	.cra_driver_name	= "ecb-aes-ce",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= 0,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "cbc(aes)",
			
 
				-	.cra_driver_name	= "cbc-aes-ce",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "ctr(aes)",
			
 
				-	.cra_driver_name	= "ctr-aes-ce",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "xts(aes)",
			
 
				-	.cra_driver_name	= "xts-aes-ce",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				+	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.setkey		= xts_set_key,
			
 
				+	.encrypt	= xts_encrypt,
			
 
				+	.decrypt	= xts_decrypt,
			
 
				 } };
			
 
				 
			
 
				+static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
			
 
				+
			
 
				+static void aes_exit(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
			
 
				+		simd_skcipher_free(aes_simd_algs[i]);
			
 
				+
			
 
				+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+}
			
 
				+
			
 
				 static int __init aes_init(void)
			
 
				 {
			
 
				+	struct simd_skcipher_alg *simd;
			
 
				+	const char *basename;
			
 
				+	const char *algname;
			
 
				+	const char *drvname;
			
 
				+	int err;
			
 
				+	int i;
			
 
				+
			
 
				 	if (!(elf_hwcap2 & HWCAP2_AES))
			
 
				 		return -ENODEV;
			
 
				-	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				-}
			
 
				 
			
 
				-static void __exit aes_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
			
 
				+		algname = aes_algs[i].base.cra_name + 2;
			
 
				+		drvname = aes_algs[i].base.cra_driver_name + 2;
			
 
				+		basename = aes_algs[i].base.cra_driver_name;
			
 
				+		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+		err = PTR_ERR(simd);
			
 
				+		if (IS_ERR(simd))
			
 
				+			goto unregister_simds;
			
 
				+
			
 
				+		aes_simd_algs[i] = simd;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+unregister_simds:
			
 
				+	aes_exit();
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 module_init(aes_init);
			
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -10,8 +10,9 @@
 
				 
			
 
				 #include <asm/neon.h>
			
 
				 #include <crypto/aes.h>
			
 
				-#include <crypto/ablk_helper.h>
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/cbc.h>
			
 
				+#include <crypto/internal/simd.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <crypto/xts.h>
			
 
				 
			
@@ -55,14 +56,14 @@ struct aesbs_xts_ctx {
 
				 	struct AES_KEY	twkey;
			
 
				 };
			
 
				 
			
 
				-static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
			
 
				+static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				 			     unsigned int key_len)
			
 
				 {
			
 
				-	struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int bits = key_len * 8;
			
 
				 
			
 
				 	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
			
 
				-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
			
 
				+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 	ctx->dec.rk = ctx->enc;
			
@@ -71,33 +72,33 @@ static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
			
 
				+static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				 			     unsigned int key_len)
			
 
				 {
			
 
				-	struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int bits = key_len * 8;
			
 
				 
			
 
				 	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
			
 
				-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
			
 
				+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 	ctx->enc.converted = 0;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
			
 
				+static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				 			     unsigned int key_len)
			
 
				 {
			
 
				-	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int bits = key_len * 4;
			
 
				 	int err;
			
 
				 
			
 
				-	err = xts_check_key(tfm, in_key, key_len);
			
 
				+	err = xts_verify_key(tfm, in_key, key_len);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				 	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
			
 
				-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
			
 
				+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 	ctx->dec.rk = ctx->enc.rk;
			
@@ -107,88 +108,52 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
			
 
				-			     struct scatterlist *dst,
			
 
				-			     struct scatterlist *src, unsigned int nbytes)
			
 
				+static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
			
 
				+				     const u8 *src, u8 *dst)
			
 
				 {
			
 
				-	struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				-	int err;
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	AES_encrypt(src, dst, &ctx->enc);
			
 
				+}
			
 
				 
			
 
				-	while (walk.nbytes) {
			
 
				-		u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
			
 
				-		u8 *src = walk.src.virt.addr;
			
 
				+static int aesbs_cbc_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
			
 
				+}
			
 
				 
			
 
				-		if (walk.dst.virt.addr == walk.src.virt.addr) {
			
 
				-			u8 *iv = walk.iv;
			
 
				-
			
 
				-			do {
			
 
				-				crypto_xor(src, iv, AES_BLOCK_SIZE);
			
 
				-				AES_encrypt(src, src, &ctx->enc);
			
 
				-				iv = src;
			
 
				-				src += AES_BLOCK_SIZE;
			
 
				-			} while (--blocks);
			
 
				-			memcpy(walk.iv, iv, AES_BLOCK_SIZE);
			
 
				-		} else {
			
 
				-			u8 *dst = walk.dst.virt.addr;
			
 
				-
			
 
				-			do {
			
 
				-				crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
			
 
				-				AES_encrypt(walk.iv, dst, &ctx->enc);
			
 
				-				memcpy(walk.iv, dst, AES_BLOCK_SIZE);
			
 
				-				src += AES_BLOCK_SIZE;
			
 
				-				dst += AES_BLOCK_SIZE;
			
 
				-			} while (--blocks);
			
 
				-		}
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				-	}
			
 
				-	return err;
			
 
				+static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
			
 
				+				     const u8 *src, u8 *dst)
			
 
				+{
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	AES_decrypt(src, dst, &ctx->dec.rk);
			
 
				 }
			
 
				 
			
 
				-static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
			
 
				-			     struct scatterlist *dst,
			
 
				-			     struct scatterlist *src, unsigned int nbytes)
			
 
				+static int aesbs_cbc_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
			
 
				-
			
 
				-	while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
			
 
				-		kernel_neon_begin();
			
 
				-		bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
			
 
				-				  walk.nbytes, &ctx->dec, walk.iv);
			
 
				-		kernel_neon_end();
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				-	}
			
 
				-	while (walk.nbytes) {
			
 
				-		u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
			
 
				+	for (err = skcipher_walk_virt(&walk, req, false);
			
 
				+	     (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
			
 
				+		u32 blocks = nbytes / AES_BLOCK_SIZE;
			
 
				 		u8 *dst = walk.dst.virt.addr;
			
 
				 		u8 *src = walk.src.virt.addr;
			
 
				-		u8 bk[2][AES_BLOCK_SIZE];
			
 
				 		u8 *iv = walk.iv;
			
 
				 
			
 
				-		do {
			
 
				-			if (walk.dst.virt.addr == walk.src.virt.addr)
			
 
				-				memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
			
 
				-
			
 
				-			AES_decrypt(src, dst, &ctx->dec.rk);
			
 
				-			crypto_xor(dst, iv, AES_BLOCK_SIZE);
			
 
				-
			
 
				-			if (walk.dst.virt.addr == walk.src.virt.addr)
			
 
				-				iv = bk[blocks & 1];
			
 
				-			else
			
 
				-				iv = src;
			
 
				+		if (blocks >= 8) {
			
 
				+			kernel_neon_begin();
			
 
				+			bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
			
 
				+			kernel_neon_end();
			
 
				+			nbytes %= AES_BLOCK_SIZE;
			
 
				+			continue;
			
 
				+		}
			
 
				 
			
 
				-			dst += AES_BLOCK_SIZE;
			
 
				-			src += AES_BLOCK_SIZE;
			
 
				-		} while (--blocks);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
			
 
				+						   aesbs_decrypt_one);
			
 
				 	}
			
 
				 	return err;
			
 
				 }
			
@@ -206,17 +171,15 @@ static void inc_be128_ctr(__be32 ctr[], u32 addend)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
			
 
				-			     struct scatterlist *dst, struct scatterlist *src,
			
 
				-			     unsigned int nbytes)
			
 
				+static int aesbs_ctr_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	u32 blocks;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				 
			
 
				 	while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
			
 
				 		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
			
@@ -235,11 +198,7 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
 
				 		kernel_neon_end();
			
 
				 		inc_be128_ctr(ctr, blocks);
			
 
				 
			
 
				-		nbytes -= blocks * AES_BLOCK_SIZE;
			
 
				-		if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
			
 
				-			break;
			
 
				-
			
 
				-		err = blkcipher_walk_done(desc, &walk, tail);
			
 
				+		err = skcipher_walk_done(&walk, tail);
			
 
				 	}
			
 
				 	if (walk.nbytes) {
			
 
				 		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
			
@@ -248,23 +207,21 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
 
				 
			
 
				 		AES_encrypt(walk.iv, ks, &ctx->enc.rk);
			
 
				 		if (tdst != tsrc)
			
 
				-			memcpy(tdst, tsrc, nbytes);
			
 
				-		crypto_xor(tdst, ks, nbytes);
			
 
				-		err = blkcipher_walk_done(desc, &walk, 0);
			
 
				+			memcpy(tdst, tsrc, walk.nbytes);
			
 
				+		crypto_xor(tdst, ks, walk.nbytes);
			
 
				+		err = skcipher_walk_done(&walk, 0);
			
 
				 	}
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
			
 
				-			     struct scatterlist *dst,
			
 
				-			     struct scatterlist *src, unsigned int nbytes)
			
 
				+static int aesbs_xts_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				 
			
 
				 	/* generate the initial tweak */
			
 
				 	AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
			
@@ -274,21 +231,19 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
 
				 		bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
			
 
				 				  walk.nbytes, &ctx->enc, walk.iv);
			
 
				 		kernel_neon_end();
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
			
 
				-			     struct scatterlist *dst,
			
 
				-			     struct scatterlist *src, unsigned int nbytes)
			
 
				+static int aesbs_xts_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				 
			
 
				 	/* generate the initial tweak */
			
 
				 	AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
			
@@ -298,141 +253,110 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
 
				 		bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
			
 
				 				  walk.nbytes, &ctx->dec, walk.iv);
			
 
				 		kernel_neon_end();
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static struct crypto_alg aesbs_algs[] = { {
			
 
				-	.cra_name		= "__cbc-aes-neonbs",
			
 
				-	.cra_driver_name	= "__driver-cbc-aes-neonbs",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct aesbs_cbc_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= aesbs_cbc_set_key,
			
 
				-		.encrypt	= aesbs_cbc_encrypt,
			
 
				-		.decrypt	= aesbs_cbc_decrypt,
			
 
				+static struct skcipher_alg aesbs_algs[] = { {
			
 
				+	.base = {
			
 
				+		.cra_name		= "__cbc(aes)",
			
 
				+		.cra_driver_name	= "__cbc-aes-neonbs",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct aesbs_cbc_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.setkey		= aesbs_cbc_set_key,
			
 
				+	.encrypt	= aesbs_cbc_encrypt,
			
 
				+	.decrypt	= aesbs_cbc_decrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__ctr-aes-neonbs",
			
 
				-	.cra_driver_name	= "__driver-ctr-aes-neonbs",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct aesbs_ctr_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= aesbs_ctr_set_key,
			
 
				-		.encrypt	= aesbs_ctr_encrypt,
			
 
				-		.decrypt	= aesbs_ctr_encrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__ctr(aes)",
			
 
				+		.cra_driver_name	= "__ctr-aes-neonbs",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= 1,
			
 
				+		.cra_ctxsize		= sizeof(struct aesbs_ctr_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.chunksize	= AES_BLOCK_SIZE,
			
 
				+	.setkey		= aesbs_ctr_set_key,
			
 
				+	.encrypt	= aesbs_ctr_encrypt,
			
 
				+	.decrypt	= aesbs_ctr_encrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__xts-aes-neonbs",
			
 
				-	.cra_driver_name	= "__driver-xts-aes-neonbs",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct aesbs_xts_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= aesbs_xts_set_key,
			
 
				-		.encrypt	= aesbs_xts_encrypt,
			
 
				-		.decrypt	= aesbs_xts_decrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__xts(aes)",
			
 
				+		.cra_driver_name	= "__xts-aes-neonbs",
			
 
				+		.cra_priority		= 300,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct aesbs_xts_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				-}, {
			
 
				-	.cra_name		= "cbc(aes)",
			
 
				-	.cra_driver_name	= "cbc-aes-neonbs",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= __ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "ctr(aes)",
			
 
				-	.cra_driver_name	= "ctr-aes-neonbs",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "xts(aes)",
			
 
				-	.cra_driver_name	= "xts-aes-neonbs",
			
 
				-	.cra_priority		= 300,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				+	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.setkey		= aesbs_xts_set_key,
			
 
				+	.encrypt	= aesbs_xts_encrypt,
			
 
				+	.decrypt	= aesbs_xts_decrypt,
			
 
				 } };
			
 
				 
			
 
				+struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
			
 
				+
			
 
				+static void aesbs_mod_exit(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
			
 
				+		simd_skcipher_free(aesbs_simd_algs[i]);
			
 
				+
			
 
				+	crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
			
 
				+}
			
 
				+
			
 
				 static int __init aesbs_mod_init(void)
			
 
				 {
			
 
				+	struct simd_skcipher_alg *simd;
			
 
				+	const char *basename;
			
 
				+	const char *algname;
			
 
				+	const char *drvname;
			
 
				+	int err;
			
 
				+	int i;
			
 
				+
			
 
				 	if (!cpu_has_neon())
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
			
 
				-}
			
 
				+	err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
			
 
				+	if (err)
			
 
				+		return err;
			
 
				 
			
 
				-static void __exit aesbs_mod_exit(void)
			
 
				-{
			
 
				-	crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
			
 
				+	for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
			
 
				+		algname = aesbs_algs[i].base.cra_name + 2;
			
 
				+		drvname = aesbs_algs[i].base.cra_driver_name + 2;
			
 
				+		basename = aesbs_algs[i].base.cra_driver_name;
			
 
				+		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+		err = PTR_ERR(simd);
			
 
				+		if (IS_ERR(simd))
			
 
				+			goto unregister_simds;
			
 
				+
			
 
				+		aesbs_simd_algs[i] = simd;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+unregister_simds:
			
 
				+	aesbs_mod_exit();
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 module_init(aesbs_mod_init);
			
--- a/arch/arm/crypto/crc32-ce-core.S
+++ b/arch/arm/crypto/crc32-ce-core.S
@@ -0,0 +1,306 @@
 
				+/*
			
 
				+ * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+/* GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				+ *
			
 
				+ * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				+ * information or have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Copyright 2012 Xyratex Technology Limited
			
 
				+ *
			
 
				+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
			
 
				+ * calculation.
			
 
				+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
			
 
				+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
			
 
				+ * at:
			
 
				+ * http://www.intel.com/products/processor/manuals/
			
 
				+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
			
 
				+ * Volume 2B: Instruction Set Reference, N-Z
			
 
				+ *
			
 
				+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
			
 
				+ *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
			
 
				+ */
			
 
				+
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				+
			
 
				+	.text
			
 
				+	.align		6
			
 
				+	.arch		armv8-a
			
 
				+	.arch_extension	crc
			
 
				+	.fpu		crypto-neon-fp-armv8
			
 
				+
			
 
				+.Lcrc32_constants:
			
 
				+	/*
			
 
				+	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
			
 
				+	 * #define CONSTANT_R1  0x154442bd4LL
			
 
				+	 *
			
 
				+	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
			
 
				+	 * #define CONSTANT_R2  0x1c6e41596LL
			
 
				+	 */
			
 
				+	.quad		0x0000000154442bd4
			
 
				+	.quad		0x00000001c6e41596
			
 
				+
			
 
				+	/*
			
 
				+	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
			
 
				+	 * #define CONSTANT_R3  0x1751997d0LL
			
 
				+	 *
			
 
				+	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
			
 
				+	 * #define CONSTANT_R4  0x0ccaa009eLL
			
 
				+	 */
			
 
				+	.quad		0x00000001751997d0
			
 
				+	.quad		0x00000000ccaa009e
			
 
				+
			
 
				+	/*
			
 
				+	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
			
 
				+	 * #define CONSTANT_R5  0x163cd6124LL
			
 
				+	 */
			
 
				+	.quad		0x0000000163cd6124
			
 
				+	.quad		0x00000000FFFFFFFF
			
 
				+
			
 
				+	/*
			
 
				+	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
			
 
				+	 *
			
 
				+	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
			
 
				+	 *                                                      = 0x1F7011641LL
			
 
				+	 * #define CONSTANT_RU  0x1F7011641LL
			
 
				+	 */
			
 
				+	.quad		0x00000001DB710641
			
 
				+	.quad		0x00000001F7011641
			
 
				+
			
 
				+.Lcrc32c_constants:
			
 
				+	.quad		0x00000000740eef02
			
 
				+	.quad		0x000000009e4addf8
			
 
				+	.quad		0x00000000f20c0dfe
			
 
				+	.quad		0x000000014cd00bd6
			
 
				+	.quad		0x00000000dd45aab8
			
 
				+	.quad		0x00000000FFFFFFFF
			
 
				+	.quad		0x0000000105ec76f0
			
 
				+	.quad		0x00000000dea713f1
			
 
				+
			
 
				+	dCONSTANTl	.req	d0
			
 
				+	dCONSTANTh	.req	d1
			
 
				+	qCONSTANT	.req	q0
			
 
				+
			
 
				+	BUF		.req	r0
			
 
				+	LEN		.req	r1
			
 
				+	CRC		.req	r2
			
 
				+
			
 
				+	qzr		.req	q9
			
 
				+
			
 
				+	/**
			
 
				+	 * Calculate crc32
			
 
				+	 * BUF - buffer
			
 
				+	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
			
 
				+	 * CRC - initial crc32
			
 
				+	 * return %eax crc32
			
 
				+	 * uint crc32_pmull_le(unsigned char const *buffer,
			
 
				+	 *                     size_t len, uint crc32)
			
 
				+	 */
			
 
				+ENTRY(crc32_pmull_le)
			
 
				+	adr		r3, .Lcrc32_constants
			
 
				+	b		0f
			
 
				+
			
 
				+ENTRY(crc32c_pmull_le)
			
 
				+	adr		r3, .Lcrc32c_constants
			
 
				+
			
 
				+0:	bic		LEN, LEN, #15
			
 
				+	vld1.8		{q1-q2}, [BUF, :128]!
			
 
				+	vld1.8		{q3-q4}, [BUF, :128]!
			
 
				+	vmov.i8		qzr, #0
			
 
				+	vmov.i8		qCONSTANT, #0
			
 
				+	vmov		dCONSTANTl[0], CRC
			
 
				+	veor.8		d2, d2, dCONSTANTl
			
 
				+	sub		LEN, LEN, #0x40
			
 
				+	cmp		LEN, #0x40
			
 
				+	blt		less_64
			
 
				+
			
 
				+	vld1.64		{qCONSTANT}, [r3]
			
 
				+
			
 
				+loop_64:		/* 64 bytes Full cache line folding */
			
 
				+	sub		LEN, LEN, #0x40
			
 
				+
			
 
				+	vmull.p64	q5, d3, dCONSTANTh
			
 
				+	vmull.p64	q6, d5, dCONSTANTh
			
 
				+	vmull.p64	q7, d7, dCONSTANTh
			
 
				+	vmull.p64	q8, d9, dCONSTANTh
			
 
				+
			
 
				+	vmull.p64	q1, d2, dCONSTANTl
			
 
				+	vmull.p64	q2, d4, dCONSTANTl
			
 
				+	vmull.p64	q3, d6, dCONSTANTl
			
 
				+	vmull.p64	q4, d8, dCONSTANTl
			
 
				+
			
 
				+	veor.8		q1, q1, q5
			
 
				+	vld1.8		{q5}, [BUF, :128]!
			
 
				+	veor.8		q2, q2, q6
			
 
				+	vld1.8		{q6}, [BUF, :128]!
			
 
				+	veor.8		q3, q3, q7
			
 
				+	vld1.8		{q7}, [BUF, :128]!
			
 
				+	veor.8		q4, q4, q8
			
 
				+	vld1.8		{q8}, [BUF, :128]!
			
 
				+
			
 
				+	veor.8		q1, q1, q5
			
 
				+	veor.8		q2, q2, q6
			
 
				+	veor.8		q3, q3, q7
			
 
				+	veor.8		q4, q4, q8
			
 
				+
			
 
				+	cmp		LEN, #0x40
			
 
				+	bge		loop_64
			
 
				+
			
 
				+less_64:		/* Folding cache line into 128bit */
			
 
				+	vldr		dCONSTANTl, [r3, #16]
			
 
				+	vldr		dCONSTANTh, [r3, #24]
			
 
				+
			
 
				+	vmull.p64	q5, d3, dCONSTANTh
			
 
				+	vmull.p64	q1, d2, dCONSTANTl
			
 
				+	veor.8		q1, q1, q5
			
 
				+	veor.8		q1, q1, q2
			
 
				+
			
 
				+	vmull.p64	q5, d3, dCONSTANTh
			
 
				+	vmull.p64	q1, d2, dCONSTANTl
			
 
				+	veor.8		q1, q1, q5
			
 
				+	veor.8		q1, q1, q3
			
 
				+
			
 
				+	vmull.p64	q5, d3, dCONSTANTh
			
 
				+	vmull.p64	q1, d2, dCONSTANTl
			
 
				+	veor.8		q1, q1, q5
			
 
				+	veor.8		q1, q1, q4
			
 
				+
			
 
				+	teq		LEN, #0
			
 
				+	beq		fold_64
			
 
				+
			
 
				+loop_16:		/* Folding rest buffer into 128bit */
			
 
				+	subs		LEN, LEN, #0x10
			
 
				+
			
 
				+	vld1.8		{q2}, [BUF, :128]!
			
 
				+	vmull.p64	q5, d3, dCONSTANTh
			
 
				+	vmull.p64	q1, d2, dCONSTANTl
			
 
				+	veor.8		q1, q1, q5
			
 
				+	veor.8		q1, q1, q2
			
 
				+
			
 
				+	bne		loop_16
			
 
				+
			
 
				+fold_64:
			
 
				+	/* perform the last 64 bit fold, also adds 32 zeroes
			
 
				+	 * to the input stream */
			
 
				+	vmull.p64	q2, d2, dCONSTANTh
			
 
				+	vext.8		q1, q1, qzr, #8
			
 
				+	veor.8		q1, q1, q2
			
 
				+
			
 
				+	/* final 32-bit fold */
			
 
				+	vldr		dCONSTANTl, [r3, #32]
			
 
				+	vldr		d6, [r3, #40]
			
 
				+	vmov.i8		d7, #0
			
 
				+
			
 
				+	vext.8		q2, q1, qzr, #4
			
 
				+	vand.8		d2, d2, d6
			
 
				+	vmull.p64	q1, d2, dCONSTANTl
			
 
				+	veor.8		q1, q1, q2
			
 
				+
			
 
				+	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
			
 
				+	vldr		dCONSTANTl, [r3, #48]
			
 
				+	vldr		dCONSTANTh, [r3, #56]
			
 
				+
			
 
				+	vand.8		q2, q1, q3
			
 
				+	vext.8		q2, qzr, q2, #8
			
 
				+	vmull.p64	q2, d5, dCONSTANTh
			
 
				+	vand.8		q2, q2, q3
			
 
				+	vmull.p64	q2, d4, dCONSTANTl
			
 
				+	veor.8		q1, q1, q2
			
 
				+	vmov		r0, s5
			
 
				+
			
 
				+	bx		lr
			
 
				+ENDPROC(crc32_pmull_le)
			
 
				+ENDPROC(crc32c_pmull_le)
			
 
				+
			
 
				+	.macro		__crc32, c
			
 
				+	subs		ip, r2, #8
			
 
				+	bmi		.Ltail\c
			
 
				+
			
 
				+	tst		r1, #3
			
 
				+	bne		.Lunaligned\c
			
 
				+
			
 
				+	teq		ip, #0
			
 
				+.Laligned8\c:
			
 
				+	ldrd		r2, r3, [r1], #8
			
 
				+ARM_BE8(rev		r2, r2		)
			
 
				+ARM_BE8(rev		r3, r3		)
			
 
				+	crc32\c\()w	r0, r0, r2
			
 
				+	crc32\c\()w	r0, r0, r3
			
 
				+	bxeq		lr
			
 
				+	subs		ip, ip, #8
			
 
				+	bpl		.Laligned8\c
			
 
				+
			
 
				+.Ltail\c:
			
 
				+	tst		ip, #4
			
 
				+	beq		2f
			
 
				+	ldr		r3, [r1], #4
			
 
				+ARM_BE8(rev		r3, r3		)
			
 
				+	crc32\c\()w	r0, r0, r3
			
 
				+
			
 
				+2:	tst		ip, #2
			
 
				+	beq		1f
			
 
				+	ldrh		r3, [r1], #2
			
 
				+ARM_BE8(rev16		r3, r3		)
			
 
				+	crc32\c\()h	r0, r0, r3
			
 
				+
			
 
				+1:	tst		ip, #1
			
 
				+	bxeq		lr
			
 
				+	ldrb		r3, [r1]
			
 
				+	crc32\c\()b	r0, r0, r3
			
 
				+	bx		lr
			
 
				+
			
 
				+.Lunaligned\c:
			
 
				+	tst		r1, #1
			
 
				+	beq		2f
			
 
				+	ldrb		r3, [r1], #1
			
 
				+	subs		r2, r2, #1
			
 
				+	crc32\c\()b	r0, r0, r3
			
 
				+
			
 
				+	tst		r1, #2
			
 
				+	beq		0f
			
 
				+2:	ldrh		r3, [r1], #2
			
 
				+	subs		r2, r2, #2
			
 
				+ARM_BE8(rev16		r3, r3		)
			
 
				+	crc32\c\()h	r0, r0, r3
			
 
				+
			
 
				+0:	subs		ip, r2, #8
			
 
				+	bpl		.Laligned8\c
			
 
				+	b		.Ltail\c
			
 
				+	.endm
			
 
				+
			
 
				+	.align		5
			
 
				+ENTRY(crc32_armv8_le)
			
 
				+	__crc32
			
 
				+ENDPROC(crc32_armv8_le)
			
 
				+
			
 
				+	.align		5
			
 
				+ENTRY(crc32c_armv8_le)
			
 
				+	__crc32		c
			
 
				+ENDPROC(crc32c_armv8_le)
			
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -0,0 +1,242 @@
 
				+/*
			
 
				+ * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/crc32.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+#include <crypto/internal/hash.h>
			
 
				+
			
 
				+#include <asm/hwcap.h>
			
 
				+#include <asm/neon.h>
			
 
				+#include <asm/simd.h>
			
 
				+#include <asm/unaligned.h>
			
 
				+
			
 
				+#define PMULL_MIN_LEN		64L	/* minimum size of buffer
			
 
				+					 * for crc32_pmull_le_16 */
			
 
				+#define SCALE_F			16L	/* size of NEON register */
			
 
				+
			
 
				+asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
			
 
				+asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
			
 
				+
			
 
				+asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
			
 
				+asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
			
 
				+
			
 
				+static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len);
			
 
				+static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len);
			
 
				+
			
 
				+static int crc32_cra_init(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	u32 *key = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	*key = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_cra_init(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	u32 *key = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	*key = ~0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
			
 
				+			unsigned int keylen)
			
 
				+{
			
 
				+	u32 *mctx = crypto_shash_ctx(hash);
			
 
				+
			
 
				+	if (keylen != sizeof(u32)) {
			
 
				+		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	*mctx = le32_to_cpup((__le32 *)key);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_init(struct shash_desc *desc)
			
 
				+{
			
 
				+	u32 *mctx = crypto_shash_ctx(desc->tfm);
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*crc = *mctx;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_update(struct shash_desc *desc, const u8 *data,
			
 
				+			unsigned int length)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*crc = crc32_armv8_le(*crc, data, length);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_update(struct shash_desc *desc, const u8 *data,
			
 
				+			 unsigned int length)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*crc = crc32c_armv8_le(*crc, data, length);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	put_unaligned_le32(*crc, out);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	put_unaligned_le32(~*crc, out);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
			
 
				+			      unsigned int length)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+	unsigned int l;
			
 
				+
			
 
				+	if (may_use_simd()) {
			
 
				+		if ((u32)data % SCALE_F) {
			
 
				+			l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
			
 
				+
			
 
				+			*crc = fallback_crc32(*crc, data, l);
			
 
				+
			
 
				+			data += l;
			
 
				+			length -= l;
			
 
				+		}
			
 
				+
			
 
				+		if (length >= PMULL_MIN_LEN) {
			
 
				+			l = round_down(length, SCALE_F);
			
 
				+
			
 
				+			kernel_neon_begin();
			
 
				+			*crc = crc32_pmull_le(data, l, *crc);
			
 
				+			kernel_neon_end();
			
 
				+
			
 
				+			data += l;
			
 
				+			length -= l;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (length > 0)
			
 
				+		*crc = fallback_crc32(*crc, data, length);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
			
 
				+			       unsigned int length)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+	unsigned int l;
			
 
				+
			
 
				+	if (may_use_simd()) {
			
 
				+		if ((u32)data % SCALE_F) {
			
 
				+			l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
			
 
				+
			
 
				+			*crc = fallback_crc32c(*crc, data, l);
			
 
				+
			
 
				+			data += l;
			
 
				+			length -= l;
			
 
				+		}
			
 
				+
			
 
				+		if (length >= PMULL_MIN_LEN) {
			
 
				+			l = round_down(length, SCALE_F);
			
 
				+
			
 
				+			kernel_neon_begin();
			
 
				+			*crc = crc32c_pmull_le(data, l, *crc);
			
 
				+			kernel_neon_end();
			
 
				+
			
 
				+			data += l;
			
 
				+			length -= l;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (length > 0)
			
 
				+		*crc = fallback_crc32c(*crc, data, length);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg crc32_pmull_algs[] = { {
			
 
				+	.setkey			= crc32_setkey,
			
 
				+	.init			= crc32_init,
			
 
				+	.update			= crc32_update,
			
 
				+	.final			= crc32_final,
			
 
				+	.descsize		= sizeof(u32),
			
 
				+	.digestsize		= sizeof(u32),
			
 
				+
			
 
				+	.base.cra_ctxsize	= sizeof(u32),
			
 
				+	.base.cra_init		= crc32_cra_init,
			
 
				+	.base.cra_name		= "crc32",
			
 
				+	.base.cra_driver_name	= "crc32-arm-ce",
			
 
				+	.base.cra_priority	= 200,
			
 
				+	.base.cra_blocksize	= 1,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+}, {
			
 
				+	.setkey			= crc32_setkey,
			
 
				+	.init			= crc32_init,
			
 
				+	.update			= crc32c_update,
			
 
				+	.final			= crc32c_final,
			
 
				+	.descsize		= sizeof(u32),
			
 
				+	.digestsize		= sizeof(u32),
			
 
				+
			
 
				+	.base.cra_ctxsize	= sizeof(u32),
			
 
				+	.base.cra_init		= crc32c_cra_init,
			
 
				+	.base.cra_name		= "crc32c",
			
 
				+	.base.cra_driver_name	= "crc32c-arm-ce",
			
 
				+	.base.cra_priority	= 200,
			
 
				+	.base.cra_blocksize	= 1,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+} };
			
 
				+
			
 
				+static int __init crc32_pmull_mod_init(void)
			
 
				+{
			
 
				+	if (elf_hwcap2 & HWCAP2_PMULL) {
			
 
				+		crc32_pmull_algs[0].update = crc32_pmull_update;
			
 
				+		crc32_pmull_algs[1].update = crc32c_pmull_update;
			
 
				+
			
 
				+		if (elf_hwcap2 & HWCAP2_CRC32) {
			
 
				+			fallback_crc32 = crc32_armv8_le;
			
 
				+			fallback_crc32c = crc32c_armv8_le;
			
 
				+		} else {
			
 
				+			fallback_crc32 = crc32_le;
			
 
				+			fallback_crc32c = __crc32c_le;
			
 
				+		}
			
 
				+	} else if (!(elf_hwcap2 & HWCAP2_CRC32)) {
			
 
				+		return -ENODEV;
			
 
				+	}
			
 
				+
			
 
				+	return crypto_register_shashes(crc32_pmull_algs,
			
 
				+				       ARRAY_SIZE(crc32_pmull_algs));
			
 
				+}
			
 
				+
			
 
				+static void __exit crc32_pmull_mod_exit(void)
			
 
				+{
			
 
				+	crypto_unregister_shashes(crc32_pmull_algs,
			
 
				+				  ARRAY_SIZE(crc32_pmull_algs));
			
 
				+}
			
 
				+
			
 
				+module_init(crc32_pmull_mod_init);
			
 
				+module_exit(crc32_pmull_mod_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
 
				+MODULE_ALIAS_CRYPTO("crc32");
			
 
				+MODULE_ALIAS_CRYPTO("crc32c");
			
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -0,0 +1,427 @@
 
				+//
			
 
				+// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
			
 
				+//
			
 
				+// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License version 2 as
			
 
				+// published by the Free Software Foundation.
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
			
 
				+//
			
 
				+// Copyright (c) 2013, Intel Corporation
			
 
				+//
			
 
				+// Authors:
			
 
				+//     Erdinc Ozturk <erdinc.ozturk@intel.com>
			
 
				+//     Vinodh Gopal <vinodh.gopal@intel.com>
			
 
				+//     James Guilford <james.guilford@intel.com>
			
 
				+//     Tim Chen <tim.c.chen@linux.intel.com>
			
 
				+//
			
 
				+// This software is available to you under a choice of one of two
			
 
				+// licenses.  You may choose to be licensed under the terms of the GNU
			
 
				+// General Public License (GPL) Version 2, available from the file
			
 
				+// COPYING in the main directory of this source tree, or the
			
 
				+// OpenIB.org BSD license below:
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions are
			
 
				+// met:
			
 
				+//
			
 
				+// * Redistributions of source code must retain the above copyright
			
 
				+//   notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// * Redistributions in binary form must reproduce the above copyright
			
 
				+//   notice, this list of conditions and the following disclaimer in the
			
 
				+//   documentation and/or other materials provided with the
			
 
				+//   distribution.
			
 
				+//
			
 
				+// * Neither the name of the Intel Corporation nor the names of its
			
 
				+//   contributors may be used to endorse or promote products derived from
			
 
				+//   this software without specific prior written permission.
			
 
				+//
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
			
 
				+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
			
 
				+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
			
 
				+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+//
			
 
				+//       Function API:
			
 
				+//       UINT16 crc_t10dif_pcl(
			
 
				+//               UINT16 init_crc, //initial CRC value, 16 bits
			
 
				+//               const unsigned char *buf, //buffer pointer to calculate CRC on
			
 
				+//               UINT64 len //buffer length in bytes (64-bit data)
			
 
				+//       );
			
 
				+//
			
 
				+//       Reference paper titled "Fast CRC Computation for Generic
			
 
				+//	Polynomials Using PCLMULQDQ Instruction"
			
 
				+//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
			
 
				+//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
			
 
				+//
			
 
				+//
			
 
				+
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				+
			
 
				+#ifdef CONFIG_CPU_ENDIAN_BE8
			
 
				+#define CPU_LE(code...)
			
 
				+#else
			
 
				+#define CPU_LE(code...)		code
			
 
				+#endif
			
 
				+
			
 
				+	.text
			
 
				+	.fpu		crypto-neon-fp-armv8
			
 
				+
			
 
				+	arg1_low32	.req	r0
			
 
				+	arg2		.req	r1
			
 
				+	arg3		.req	r2
			
 
				+
			
 
				+	qzr		.req	q13
			
 
				+
			
 
				+	q0l		.req	d0
			
 
				+	q0h		.req	d1
			
 
				+	q1l		.req	d2
			
 
				+	q1h		.req	d3
			
 
				+	q2l		.req	d4
			
 
				+	q2h		.req	d5
			
 
				+	q3l		.req	d6
			
 
				+	q3h		.req	d7
			
 
				+	q4l		.req	d8
			
 
				+	q4h		.req	d9
			
 
				+	q5l		.req	d10
			
 
				+	q5h		.req	d11
			
 
				+	q6l		.req	d12
			
 
				+	q6h		.req	d13
			
 
				+	q7l		.req	d14
			
 
				+	q7h		.req	d15
			
 
				+
			
 
				+ENTRY(crc_t10dif_pmull)
			
 
				+	vmov.i8		qzr, #0			// init zero register
			
 
				+
			
 
				+	// adjust the 16-bit initial_crc value, scale it to 32 bits
			
 
				+	lsl		arg1_low32, arg1_low32, #16
			
 
				+
			
 
				+	// check if smaller than 256
			
 
				+	cmp		arg3, #256
			
 
				+
			
 
				+	// for sizes less than 128, we can't fold 64B at a time...
			
 
				+	blt		_less_than_128
			
 
				+
			
 
				+	// load the initial crc value
			
 
				+	// crc value does not need to be byte-reflected, but it needs
			
 
				+	// to be moved to the high part of the register.
			
 
				+	// because data will be byte-reflected and will align with
			
 
				+	// initial crc at correct place.
			
 
				+	vmov		s0, arg1_low32		// initial crc
			
 
				+	vext.8		q10, qzr, q0, #4
			
 
				+
			
 
				+	// receive the initial 64B data, xor the initial crc value
			
 
				+	vld1.64		{q0-q1}, [arg2, :128]!
			
 
				+	vld1.64		{q2-q3}, [arg2, :128]!
			
 
				+	vld1.64		{q4-q5}, [arg2, :128]!
			
 
				+	vld1.64		{q6-q7}, [arg2, :128]!
			
 
				+CPU_LE(	vrev64.8	q0, q0			)
			
 
				+CPU_LE(	vrev64.8	q1, q1			)
			
 
				+CPU_LE(	vrev64.8	q2, q2			)
			
 
				+CPU_LE(	vrev64.8	q3, q3			)
			
 
				+CPU_LE(	vrev64.8	q4, q4			)
			
 
				+CPU_LE(	vrev64.8	q5, q5			)
			
 
				+CPU_LE(	vrev64.8	q6, q6			)
			
 
				+CPU_LE(	vrev64.8	q7, q7			)
			
 
				+
			
 
				+	vswp		d0, d1
			
 
				+	vswp		d2, d3
			
 
				+	vswp		d4, d5
			
 
				+	vswp		d6, d7
			
 
				+	vswp		d8, d9
			
 
				+	vswp		d10, d11
			
 
				+	vswp		d12, d13
			
 
				+	vswp		d14, d15
			
 
				+
			
 
				+	// XOR the initial_crc value
			
 
				+	veor.8		q0, q0, q10
			
 
				+
			
 
				+	adr		ip, rk3
			
 
				+	vld1.64		{q10}, [ip, :128]	// xmm10 has rk3 and rk4
			
 
				+
			
 
				+	//
			
 
				+	// we subtract 256 instead of 128 to save one instruction from the loop
			
 
				+	//
			
 
				+	sub		arg3, arg3, #256
			
 
				+
			
 
				+	// at this section of the code, there is 64*x+y (0<=y<64) bytes of
			
 
				+	// buffer. The _fold_64_B_loop will fold 64B at a time
			
 
				+	// until we have 64+y Bytes of buffer
			
 
				+
			
 
				+
			
 
				+	// fold 64B at a time. This section of the code folds 4 vector
			
 
				+	// registers in parallel
			
 
				+_fold_64_B_loop:
			
 
				+
			
 
				+	.macro		fold64, reg1, reg2
			
 
				+	vld1.64		{q11-q12}, [arg2, :128]!
			
 
				+
			
 
				+	vmull.p64	q8, \reg1\()h, d21
			
 
				+	vmull.p64	\reg1, \reg1\()l, d20
			
 
				+	vmull.p64	q9, \reg2\()h, d21
			
 
				+	vmull.p64	\reg2, \reg2\()l, d20
			
 
				+
			
 
				+CPU_LE(	vrev64.8	q11, q11		)
			
 
				+CPU_LE(	vrev64.8	q12, q12		)
			
 
				+	vswp		d22, d23
			
 
				+	vswp		d24, d25
			
 
				+
			
 
				+	veor.8		\reg1, \reg1, q8
			
 
				+	veor.8		\reg2, \reg2, q9
			
 
				+	veor.8		\reg1, \reg1, q11
			
 
				+	veor.8		\reg2, \reg2, q12
			
 
				+	.endm
			
 
				+
			
 
				+	fold64		q0, q1
			
 
				+	fold64		q2, q3
			
 
				+	fold64		q4, q5
			
 
				+	fold64		q6, q7
			
 
				+
			
 
				+	subs		arg3, arg3, #128
			
 
				+
			
 
				+	// check if there is another 64B in the buffer to be able to fold
			
 
				+	bge		_fold_64_B_loop
			
 
				+
			
 
				+	// at this point, the buffer pointer is pointing at the last y Bytes
			
 
				+	// of the buffer the 64B of folded data is in 4 of the vector
			
 
				+	// registers: v0, v1, v2, v3
			
 
				+
			
 
				+	// fold the 8 vector registers to 1 vector register with different
			
 
				+	// constants
			
 
				+
			
 
				+	adr		ip, rk9
			
 
				+	vld1.64		{q10}, [ip, :128]!
			
 
				+
			
 
				+	.macro		fold16, reg, rk
			
 
				+	vmull.p64	q8, \reg\()l, d20
			
 
				+	vmull.p64	\reg, \reg\()h, d21
			
 
				+	.ifnb		\rk
			
 
				+	vld1.64		{q10}, [ip, :128]!
			
 
				+	.endif
			
 
				+	veor.8		q7, q7, q8
			
 
				+	veor.8		q7, q7, \reg
			
 
				+	.endm
			
 
				+
			
 
				+	fold16		q0, rk11
			
 
				+	fold16		q1, rk13
			
 
				+	fold16		q2, rk15
			
 
				+	fold16		q3, rk17
			
 
				+	fold16		q4, rk19
			
 
				+	fold16		q5, rk1
			
 
				+	fold16		q6
			
 
				+
			
 
				+	// instead of 64, we add 48 to the loop counter to save 1 instruction
			
 
				+	// from the loop instead of a cmp instruction, we use the negative
			
 
				+	// flag with the jl instruction
			
 
				+	adds		arg3, arg3, #(128-16)
			
 
				+	blt		_final_reduction_for_128
			
 
				+
			
 
				+	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
			
 
				+	// and the rest is in memory. We can fold 16 bytes at a time if y>=16
			
 
				+	// continue folding 16B at a time
			
 
				+
			
 
				+_16B_reduction_loop:
			
 
				+	vmull.p64	q8, d14, d20
			
 
				+	vmull.p64	q7, d15, d21
			
 
				+	veor.8		q7, q7, q8
			
 
				+
			
 
				+	vld1.64		{q0}, [arg2, :128]!
			
 
				+CPU_LE(	vrev64.8	q0, q0		)
			
 
				+	vswp		d0, d1
			
 
				+	veor.8		q7, q7, q0
			
 
				+	subs		arg3, arg3, #16
			
 
				+
			
 
				+	// instead of a cmp instruction, we utilize the flags with the
			
 
				+	// jge instruction equivalent of: cmp arg3, 16-16
			
 
				+	// check if there is any more 16B in the buffer to be able to fold
			
 
				+	bge		_16B_reduction_loop
			
 
				+
			
 
				+	// now we have 16+z bytes left to reduce, where 0<= z < 16.
			
 
				+	// first, we reduce the data in the xmm7 register
			
 
				+
			
 
				+_final_reduction_for_128:
			
 
				+	// check if any more data to fold. If not, compute the CRC of
			
 
				+	// the final 128 bits
			
 
				+	adds		arg3, arg3, #16
			
 
				+	beq		_128_done
			
 
				+
			
 
				+	// here we are getting data that is less than 16 bytes.
			
 
				+	// since we know that there was data before the pointer, we can
			
 
				+	// offset the input pointer before the actual point, to receive
			
 
				+	// exactly 16 bytes. after that the registers need to be adjusted.
			
 
				+_get_last_two_regs:
			
 
				+	add		arg2, arg2, arg3
			
 
				+	sub		arg2, arg2, #16
			
 
				+	vld1.64		{q1}, [arg2]
			
 
				+CPU_LE(	vrev64.8	q1, q1			)
			
 
				+	vswp		d2, d3
			
 
				+
			
 
				+	// get rid of the extra data that was loaded before
			
 
				+	// load the shift constant
			
 
				+	adr		ip, tbl_shf_table + 16
			
 
				+	sub		ip, ip, arg3
			
 
				+	vld1.8		{q0}, [ip]
			
 
				+
			
 
				+	// shift v2 to the left by arg3 bytes
			
 
				+	vtbl.8		d4, {d14-d15}, d0
			
 
				+	vtbl.8		d5, {d14-d15}, d1
			
 
				+
			
 
				+	// shift v7 to the right by 16-arg3 bytes
			
 
				+	vmov.i8		q9, #0x80
			
 
				+	veor.8		q0, q0, q9
			
 
				+	vtbl.8		d18, {d14-d15}, d0
			
 
				+	vtbl.8		d19, {d14-d15}, d1
			
 
				+
			
 
				+	// blend
			
 
				+	vshr.s8		q0, q0, #7		// convert to 8-bit mask
			
 
				+	vbsl.8		q0, q2, q1
			
 
				+
			
 
				+	// fold 16 Bytes
			
 
				+	vmull.p64	q8, d18, d20
			
 
				+	vmull.p64	q7, d19, d21
			
 
				+	veor.8		q7, q7, q8
			
 
				+	veor.8		q7, q7, q0
			
 
				+
			
 
				+_128_done:
			
 
				+	// compute crc of a 128-bit value
			
 
				+	vldr		d20, rk5
			
 
				+	vldr		d21, rk6		// rk5 and rk6 in xmm10
			
 
				+
			
 
				+	// 64b fold
			
 
				+	vext.8		q0, qzr, q7, #8
			
 
				+	vmull.p64	q7, d15, d20
			
 
				+	veor.8		q7, q7, q0
			
 
				+
			
 
				+	// 32b fold
			
 
				+	vext.8		q0, q7, qzr, #12
			
 
				+	vmov		s31, s3
			
 
				+	vmull.p64	q0, d0, d21
			
 
				+	veor.8		q7, q0, q7
			
 
				+
			
 
				+	// barrett reduction
			
 
				+_barrett:
			
 
				+	vldr		d20, rk7
			
 
				+	vldr		d21, rk8
			
 
				+
			
 
				+	vmull.p64	q0, d15, d20
			
 
				+	vext.8		q0, qzr, q0, #12
			
 
				+	vmull.p64	q0, d1, d21
			
 
				+	vext.8		q0, qzr, q0, #12
			
 
				+	veor.8		q7, q7, q0
			
 
				+	vmov		r0, s29
			
 
				+
			
 
				+_cleanup:
			
 
				+	// scale the result back to 16 bits
			
 
				+	lsr		r0, r0, #16
			
 
				+	bx		lr
			
 
				+
			
 
				+_less_than_128:
			
 
				+	teq		arg3, #0
			
 
				+	beq		_cleanup
			
 
				+
			
 
				+	vmov.i8		q0, #0
			
 
				+	vmov		s3, arg1_low32		// get the initial crc value
			
 
				+
			
 
				+	vld1.64		{q7}, [arg2, :128]!
			
 
				+CPU_LE(	vrev64.8	q7, q7		)
			
 
				+	vswp		d14, d15
			
 
				+	veor.8		q7, q7, q0
			
 
				+
			
 
				+	cmp		arg3, #16
			
 
				+	beq		_128_done		// exactly 16 left
			
 
				+	blt		_less_than_16_left
			
 
				+
			
 
				+	// now if there is, load the constants
			
 
				+	vldr		d20, rk1
			
 
				+	vldr		d21, rk2		// rk1 and rk2 in xmm10
			
 
				+
			
 
				+	// check if there is enough buffer to be able to fold 16B at a time
			
 
				+	subs		arg3, arg3, #32
			
 
				+	addlt		arg3, arg3, #16
			
 
				+	blt		_get_last_two_regs
			
 
				+	b		_16B_reduction_loop
			
 
				+
			
 
				+_less_than_16_left:
			
 
				+	// shl r9, 4
			
 
				+	adr		ip, tbl_shf_table + 16
			
 
				+	sub		ip, ip, arg3
			
 
				+	vld1.8		{q0}, [ip]
			
 
				+	vmov.i8		q9, #0x80
			
 
				+	veor.8		q0, q0, q9
			
 
				+	vtbl.8		d18, {d14-d15}, d0
			
 
				+	vtbl.8		d15, {d14-d15}, d1
			
 
				+	vmov		d14, d18
			
 
				+	b		_128_done
			
 
				+ENDPROC(crc_t10dif_pmull)
			
 
				+
			
 
				+// precomputed constants
			
 
				+// these constants are precomputed from the poly:
			
 
				+// 0x8bb70000 (0x8bb7 scaled to 32 bits)
			
 
				+	.align		4
			
 
				+// Q = 0x18BB70000
			
 
				+// rk1 = 2^(32*3) mod Q << 32
			
 
				+// rk2 = 2^(32*5) mod Q << 32
			
 
				+// rk3 = 2^(32*15) mod Q << 32
			
 
				+// rk4 = 2^(32*17) mod Q << 32
			
 
				+// rk5 = 2^(32*3) mod Q << 32
			
 
				+// rk6 = 2^(32*2) mod Q << 32
			
 
				+// rk7 = floor(2^64/Q)
			
 
				+// rk8 = Q
			
 
				+
			
 
				+rk3:	.quad		0x9d9d000000000000
			
 
				+rk4:	.quad		0x7cf5000000000000
			
 
				+rk5:	.quad		0x2d56000000000000
			
 
				+rk6:	.quad		0x1368000000000000
			
 
				+rk7:	.quad		0x00000001f65a57f8
			
 
				+rk8:	.quad		0x000000018bb70000
			
 
				+rk9:	.quad		0xceae000000000000
			
 
				+rk10:	.quad		0xbfd6000000000000
			
 
				+rk11:	.quad		0x1e16000000000000
			
 
				+rk12:	.quad		0x713c000000000000
			
 
				+rk13:	.quad		0xf7f9000000000000
			
 
				+rk14:	.quad		0x80a6000000000000
			
 
				+rk15:	.quad		0x044c000000000000
			
 
				+rk16:	.quad		0xe658000000000000
			
 
				+rk17:	.quad		0xad18000000000000
			
 
				+rk18:	.quad		0xa497000000000000
			
 
				+rk19:	.quad		0x6ee3000000000000
			
 
				+rk20:	.quad		0xe7b5000000000000
			
 
				+rk1:	.quad		0x2d56000000000000
			
 
				+rk2:	.quad		0x06df000000000000
			
 
				+
			
 
				+tbl_shf_table:
			
 
				+// use these values for shift constants for the tbl/tbx instruction
			
 
				+// different alignments result in values as shown:
			
 
				+//	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
			
 
				+//	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
			
 
				+//	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
			
 
				+//	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
			
 
				+//	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
			
 
				+//	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
			
 
				+//	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
			
 
				+//	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
			
 
				+//	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
			
 
				+//	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
			
 
				+//	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
			
 
				+//	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
			
 
				+//	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
			
 
				+//	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
			
 
				+//	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
			
 
				+
			
 
				+	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
			
 
				+	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
			
 
				+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
			
 
				+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
			
--- a/arch/arm/crypto/crct10dif-ce-glue.c
+++ b/arch/arm/crypto/crct10dif-ce-glue.c
@@ -0,0 +1,101 @@
 
				+/*
			
 
				+ * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/crc-t10dif.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+#include <crypto/internal/hash.h>
			
 
				+
			
 
				+#include <asm/neon.h>
			
 
				+#include <asm/simd.h>
			
 
				+
			
 
				+#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
			
 
				+
			
 
				+asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u32 len);
			
 
				+
			
 
				+static int crct10dif_init(struct shash_desc *desc)
			
 
				+{
			
 
				+	u16 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*crc = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crct10dif_update(struct shash_desc *desc, const u8 *data,
			
 
				+			    unsigned int length)
			
 
				+{
			
 
				+	u16 *crc = shash_desc_ctx(desc);
			
 
				+	unsigned int l;
			
 
				+
			
 
				+	if (!may_use_simd()) {
			
 
				+		*crc = crc_t10dif_generic(*crc, data, length);
			
 
				+	} else {
			
 
				+		if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
			
 
				+			l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
			
 
				+				  ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
			
 
				+
			
 
				+			*crc = crc_t10dif_generic(*crc, data, l);
			
 
				+
			
 
				+			length -= l;
			
 
				+			data += l;
			
 
				+		}
			
 
				+		if (length > 0) {
			
 
				+			kernel_neon_begin();
			
 
				+			*crc = crc_t10dif_pmull(*crc, data, length);
			
 
				+			kernel_neon_end();
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crct10dif_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	u16 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*(u16 *)out = *crc;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg crc_t10dif_alg = {
			
 
				+	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
			
 
				+	.init			= crct10dif_init,
			
 
				+	.update			= crct10dif_update,
			
 
				+	.final			= crct10dif_final,
			
 
				+	.descsize		= CRC_T10DIF_DIGEST_SIZE,
			
 
				+
			
 
				+	.base.cra_name		= "crct10dif",
			
 
				+	.base.cra_driver_name	= "crct10dif-arm-ce",
			
 
				+	.base.cra_priority	= 200,
			
 
				+	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+};
			
 
				+
			
 
				+static int __init crc_t10dif_mod_init(void)
			
 
				+{
			
 
				+	if (!(elf_hwcap2 & HWCAP2_PMULL))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	return crypto_register_shash(&crc_t10dif_alg);
			
 
				+}
			
 
				+
			
 
				+static void __exit crc_t10dif_mod_exit(void)
			
 
				+{
			
 
				+	crypto_unregister_shash(&crc_t10dif_alg);
			
 
				+}
			
 
				+
			
 
				+module_init(crc_t10dif_mod_init);
			
 
				+module_exit(crc_t10dif_mod_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
 
				+MODULE_ALIAS_CRYPTO("crct10dif");
			
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -164,6 +164,14 @@
 
				 				clocks = <&cpm_syscon0 1 21>;
			
 
				 				status = "disabled";
			
 
				 			};
			
 
				+
			
 
				+			cpm_trng: trng@760000 {
			
 
				+				compatible = "marvell,armada-8k-rng", "inside-secure,safexcel-eip76";
			
 
				+				reg = <0x760000 0x7d>;
			
 
				+				interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
			
 
				+				clocks = <&cpm_syscon0 1 25>;
			
 
				+				status = "okay";
			
 
				+			};
			
 
				 		};
			
 
				 
			
 
				 		cpm_pcie0: pcie@f2600000 {
			
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -164,6 +164,14 @@
 
				 				clocks = <&cps_syscon0 1 21>;
			
 
				 				status = "disabled";
			
 
				 			};
			
 
				+
			
 
				+			cps_trng: trng@760000 {
			
 
				+				compatible = "marvell,armada-8k-rng", "inside-secure,safexcel-eip76";
			
 
				+				reg = <0x760000 0x7d>;
			
 
				+				interrupts = <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>;
			
 
				+				clocks = <&cps_syscon0 1 25>;
			
 
				+				status = "okay";
			
 
				+			};
			
 
				 		};
			
 
				 
			
 
				 		cps_pcie0: pcie@f4600000 {
			
--- a/arch/arm64/crypto/.gitignore
+++ b/arch/arm64/crypto/.gitignore
@@ -0,0 +1,2 @@
 
				+sha256-core.S
			
 
				+sha512-core.S
			
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -8,6 +8,14 @@ menuconfig ARM64_CRYPTO
 
				 
			
 
				 if ARM64_CRYPTO
			
 
				 
			
 
				+config CRYPTO_SHA256_ARM64
			
 
				+	tristate "SHA-224/SHA-256 digest algorithm for arm64"
			
 
				+	select CRYPTO_HASH
			
 
				+
			
 
				+config CRYPTO_SHA512_ARM64
			
 
				+	tristate "SHA-384/SHA-512 digest algorithm for arm64"
			
 
				+	select CRYPTO_HASH
			
 
				+
			
 
				 config CRYPTO_SHA1_ARM64_CE
			
 
				 	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
			
 
				 	depends on ARM64 && KERNEL_MODE_NEON
			
@@ -23,6 +31,16 @@ config CRYPTO_GHASH_ARM64_CE
 
				 	depends on ARM64 && KERNEL_MODE_NEON
			
 
				 	select CRYPTO_HASH
			
 
				 
			
 
				+config CRYPTO_CRCT10DIF_ARM64_CE
			
 
				+	tristate "CRCT10DIF digest algorithm using PMULL instructions"
			
 
				+	depends on KERNEL_MODE_NEON && CRC_T10DIF
			
 
				+	select CRYPTO_HASH
			
 
				+
			
 
				+config CRYPTO_CRC32_ARM64_CE
			
 
				+	tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
			
 
				+	depends on KERNEL_MODE_NEON && CRC32
			
 
				+	select CRYPTO_HASH
			
 
				+
			
 
				 config CRYPTO_AES_ARM64_CE
			
 
				 	tristate "AES core cipher using ARMv8 Crypto Extensions"
			
 
				 	depends on ARM64 && KERNEL_MODE_NEON
			
@@ -40,17 +58,18 @@ config CRYPTO_AES_ARM64_CE_BLK
 
				 	depends on ARM64 && KERNEL_MODE_NEON
			
 
				 	select CRYPTO_BLKCIPHER
			
 
				 	select CRYPTO_AES_ARM64_CE
			
 
				-	select CRYPTO_ABLK_HELPER
			
 
				+	select CRYPTO_SIMD
			
 
				 
			
 
				 config CRYPTO_AES_ARM64_NEON_BLK
			
 
				 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
			
 
				 	depends on ARM64 && KERNEL_MODE_NEON
			
 
				 	select CRYPTO_BLKCIPHER
			
 
				 	select CRYPTO_AES
			
 
				-	select CRYPTO_ABLK_HELPER
			
 
				+	select CRYPTO_SIMD
			
 
				 
			
 
				 config CRYPTO_CRC32_ARM64
			
 
				 	tristate "CRC32 and CRC32C using optional ARMv8 instructions"
			
 
				 	depends on ARM64
			
 
				 	select CRYPTO_HASH
			
 
				+
			
 
				 endif
			
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -17,6 +17,12 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 
				 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
			
 
				 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
			
 
				 
			
 
				+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
			
 
				+crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
			
 
				+
			
 
				+obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
			
 
				+crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
			
 
				+
			
 
				 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
			
 
				 CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
			
 
				 
			
@@ -29,6 +35,12 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
 
				 obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
			
 
				 aes-neon-blk-y := aes-glue-neon.o aes-neon.o
			
 
				 
			
 
				+obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o
			
 
				+sha256-arm64-y := sha256-glue.o sha256-core.o
			
 
				+
			
 
				+obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
			
 
				+sha512-arm64-y := sha512-glue.o sha512-core.o
			
 
				+
			
 
				 AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
			
 
				 AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
			
 
				 
			
@@ -40,3 +52,14 @@ CFLAGS_crc32-arm64.o	:= -mcpu=generic+crc
 
				 
			
 
				 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
			
 
				 	$(call if_changed_rule,cc_o_c)
			
 
				+
			
 
				+quiet_cmd_perlasm = PERLASM $@
			
 
				+      cmd_perlasm = $(PERL) $(<) void $(@)
			
 
				+
			
 
				+$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
			
 
				+	$(call cmd,perlasm)
			
 
				+
			
 
				+$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
			
 
				+	$(call cmd,perlasm)
			
 
				+
			
 
				+.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
			
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -9,6 +9,7 @@
 
				  */
			
 
				 
			
 
				 #include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				 
			
 
				 	.text
			
 
				 	.arch	armv8-a+crypto
			
@@ -19,7 +20,7 @@
 
				 	 */
			
 
				 ENTRY(ce_aes_ccm_auth_data)
			
 
				 	ldr	w8, [x3]			/* leftover from prev round? */
			
 
				-	ld1	{v0.2d}, [x0]			/* load mac */
			
 
				+	ld1	{v0.16b}, [x0]			/* load mac */
			
 
				 	cbz	w8, 1f
			
 
				 	sub	w8, w8, #16
			
 
				 	eor	v1.16b, v1.16b, v1.16b
			
@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 
				 	beq	8f				/* out of input? */
			
 
				 	cbnz	w8, 0b
			
 
				 	eor	v0.16b, v0.16b, v1.16b
			
 
				-1:	ld1	{v3.2d}, [x4]			/* load first round key */
			
 
				+1:	ld1	{v3.16b}, [x4]			/* load first round key */
			
 
				 	prfm	pldl1strm, [x1]
			
 
				 	cmp	w5, #12				/* which key size? */
			
 
				 	add	x6, x4, #16
			
@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 
				 	mov	v5.16b, v3.16b
			
 
				 	b	4f
			
 
				 2:	mov	v4.16b, v3.16b
			
 
				-	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
			
 
				+	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
			
 
				 3:	aese	v0.16b, v4.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				-4:	ld1	{v3.2d}, [x6], #16		/* load next round key */
			
 
				+4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
			
 
				 	aese	v0.16b, v5.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				-5:	ld1	{v4.2d}, [x6], #16		/* load next round key */
			
 
				+5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
			
 
				 	subs	w7, w7, #3
			
 
				 	aese	v0.16b, v3.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				-	ld1	{v5.2d}, [x6], #16		/* load next round key */
			
 
				+	ld1	{v5.16b}, [x6], #16		/* load next round key */
			
 
				 	bpl	3b
			
 
				 	aese	v0.16b, v4.16b
			
 
				 	subs	w2, w2, #16			/* last data? */
			
@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
 
				 	ld1	{v1.16b}, [x1], #16		/* load next input block */
			
 
				 	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
			
 
				 	bne	1b
			
 
				-6:	st1	{v0.2d}, [x0]			/* store mac */
			
 
				+6:	st1	{v0.16b}, [x0]			/* store mac */
			
 
				 	beq	10f
			
 
				 	adds	w2, w2, #16
			
 
				 	beq	10f
			
@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
 
				 	adds	w7, w7, #1
			
 
				 	bne	9b
			
 
				 	eor	v0.16b, v0.16b, v1.16b
			
 
				-	st1	{v0.2d}, [x0]
			
 
				+	st1	{v0.16b}, [x0]
			
 
				 10:	str	w8, [x3]
			
 
				 	ret
			
 
				 ENDPROC(ce_aes_ccm_auth_data)
			
@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
 
				 	 * 			 u32 rounds);
			
 
				 	 */
			
 
				 ENTRY(ce_aes_ccm_final)
			
 
				-	ld1	{v3.2d}, [x2], #16		/* load first round key */
			
 
				-	ld1	{v0.2d}, [x0]			/* load mac */
			
 
				+	ld1	{v3.16b}, [x2], #16		/* load first round key */
			
 
				+	ld1	{v0.16b}, [x0]			/* load mac */
			
 
				 	cmp	w3, #12				/* which key size? */
			
 
				 	sub	w3, w3, #2			/* modified # of rounds */
			
 
				-	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
			
 
				+	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
			
 
				 	bmi	0f
			
 
				 	bne	3f
			
 
				 	mov	v5.16b, v3.16b
			
 
				 	b	2f
			
 
				 0:	mov	v4.16b, v3.16b
			
 
				-1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
			
 
				+1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
			
 
				 	aese	v0.16b, v4.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				 	aese	v1.16b, v4.16b
			
 
				 	aesmc	v1.16b, v1.16b
			
 
				-2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
			
 
				+2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
			
 
				 	aese	v0.16b, v5.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				 	aese	v1.16b, v5.16b
			
 
				 	aesmc	v1.16b, v1.16b
			
 
				-3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
			
 
				+3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
			
 
				 	subs	w3, w3, #3
			
 
				 	aese	v0.16b, v3.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
 
				 	aese	v1.16b, v4.16b
			
 
				 	/* final round key cancels out */
			
 
				 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
			
 
				-	st1	{v0.2d}, [x0]			/* store result */
			
 
				+	st1	{v0.16b}, [x0]			/* store result */
			
 
				 	ret
			
 
				 ENDPROC(ce_aes_ccm_final)
			
 
				 
			
 
				 	.macro	aes_ccm_do_crypt,enc
			
 
				 	ldr	x8, [x6, #8]			/* load lower ctr */
			
 
				-	ld1	{v0.2d}, [x5]			/* load mac */
			
 
				-	rev	x8, x8				/* keep swabbed ctr in reg */
			
 
				+	ld1	{v0.16b}, [x5]			/* load mac */
			
 
				+CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
			
 
				 0:	/* outer loop */
			
 
				-	ld1	{v1.1d}, [x6]			/* load upper ctr */
			
 
				+	ld1	{v1.8b}, [x6]			/* load upper ctr */
			
 
				 	prfm	pldl1strm, [x1]
			
 
				 	add	x8, x8, #1
			
 
				 	rev	x9, x8
			
 
				 	cmp	w4, #12				/* which key size? */
			
 
				 	sub	w7, w4, #2			/* get modified # of rounds */
			
 
				 	ins	v1.d[1], x9			/* no carry in lower ctr */
			
 
				-	ld1	{v3.2d}, [x3]			/* load first round key */
			
 
				+	ld1	{v3.16b}, [x3]			/* load first round key */
			
 
				 	add	x10, x3, #16
			
 
				 	bmi	1f
			
 
				 	bne	4f
			
 
				 	mov	v5.16b, v3.16b
			
 
				 	b	3f
			
 
				 1:	mov	v4.16b, v3.16b
			
 
				-	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
			
 
				+	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
			
 
				 2:	/* inner loop: 3 rounds, 2x interleaved */
			
 
				 	aese	v0.16b, v4.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				 	aese	v1.16b, v4.16b
			
 
				 	aesmc	v1.16b, v1.16b
			
 
				-3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
			
 
				+3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
			
 
				 	aese	v0.16b, v5.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				 	aese	v1.16b, v5.16b
			
 
				 	aesmc	v1.16b, v1.16b
			
 
				-4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
			
 
				+4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
			
 
				 	subs	w7, w7, #3
			
 
				 	aese	v0.16b, v3.16b
			
 
				 	aesmc	v0.16b, v0.16b
			
 
				 	aese	v1.16b, v3.16b
			
 
				 	aesmc	v1.16b, v1.16b
			
 
				-	ld1	{v5.2d}, [x10], #16		/* load next round key */
			
 
				+	ld1	{v5.16b}, [x10], #16		/* load next round key */
			
 
				 	bpl	2b
			
 
				 	aese	v0.16b, v4.16b
			
 
				 	aese	v1.16b, v4.16b
			
@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
 
				 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
			
 
				 	st1	{v1.16b}, [x0], #16		/* write output block */
			
 
				 	bne	0b
			
 
				-	rev	x8, x8
			
 
				-	st1	{v0.2d}, [x5]			/* store mac */
			
 
				+CPU_LE(	rev	x8, x8			)
			
 
				+	st1	{v0.16b}, [x5]			/* store mac */
			
 
				 	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
			
 
				 5:	ret
			
 
				 
			
 
				 6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
			
 
				 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
			
 
				-	st1	{v0.2d}, [x5]			/* store mac */
			
 
				+	st1	{v0.16b}, [x5]			/* store mac */
			
 
				 	add	w2, w2, #16			/* process partial tail block */
			
 
				 7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
			
 
				 	umov	w6, v1.b[0]			/* get top crypted ctr byte */
			
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -11,9 +11,9 @@
 
				 #include <asm/neon.h>
			
 
				 #include <asm/unaligned.h>
			
 
				 #include <crypto/aes.h>
			
 
				-#include <crypto/algapi.h>
			
 
				 #include <crypto/scatterwalk.h>
			
 
				 #include <crypto/internal/aead.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/module.h>
			
 
				 
			
 
				 #include "aes-ce-setkey.h"
			
@@ -149,12 +149,7 @@ static int ccm_encrypt(struct aead_request *req)
 
				 {
			
 
				 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
			
 
				 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
			
 
				-	struct blkcipher_desc desc = { .info = req->iv };
			
 
				-	struct blkcipher_walk walk;
			
 
				-	struct scatterlist srcbuf[2];
			
 
				-	struct scatterlist dstbuf[2];
			
 
				-	struct scatterlist *src;
			
 
				-	struct scatterlist *dst;
			
 
				+	struct skcipher_walk walk;
			
 
				 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
			
 
				 	u8 buf[AES_BLOCK_SIZE];
			
 
				 	u32 len = req->cryptlen;
			
@@ -172,27 +167,19 @@ static int ccm_encrypt(struct aead_request *req)
 
				 	/* preserve the original iv for the final round */
			
 
				 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
			
 
				 
			
 
				-	src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
			
 
				-	dst = src;
			
 
				-	if (req->src != req->dst)
			
 
				-		dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
			
 
				-
			
 
				-	blkcipher_walk_init(&walk, dst, src, len);
			
 
				-	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
			
 
				-					     AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_aead_encrypt(&walk, req, true);
			
 
				 
			
 
				 	while (walk.nbytes) {
			
 
				 		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
			
 
				 
			
 
				-		if (walk.nbytes == len)
			
 
				+		if (walk.nbytes == walk.total)
			
 
				 			tail = 0;
			
 
				 
			
 
				 		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   walk.nbytes - tail, ctx->key_enc,
			
 
				 				   num_rounds(ctx), mac, walk.iv);
			
 
				 
			
 
				-		len -= walk.nbytes - tail;
			
 
				-		err = blkcipher_walk_done(&desc, &walk, tail);
			
 
				+		err = skcipher_walk_done(&walk, tail);
			
 
				 	}
			
 
				 	if (!err)
			
 
				 		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
			
@@ -203,7 +190,7 @@ static int ccm_encrypt(struct aead_request *req)
 
				 		return err;
			
 
				 
			
 
				 	/* copy authtag to end of dst */
			
 
				-	scatterwalk_map_and_copy(mac, dst, req->cryptlen,
			
 
				+	scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
			
 
				 				 crypto_aead_authsize(aead), 1);
			
 
				 
			
 
				 	return 0;
			
@@ -214,12 +201,7 @@ static int ccm_decrypt(struct aead_request *req)
 
				 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
			
 
				 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
			
 
				 	unsigned int authsize = crypto_aead_authsize(aead);
			
 
				-	struct blkcipher_desc desc = { .info = req->iv };
			
 
				-	struct blkcipher_walk walk;
			
 
				-	struct scatterlist srcbuf[2];
			
 
				-	struct scatterlist dstbuf[2];
			
 
				-	struct scatterlist *src;
			
 
				-	struct scatterlist *dst;
			
 
				+	struct skcipher_walk walk;
			
 
				 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
			
 
				 	u8 buf[AES_BLOCK_SIZE];
			
 
				 	u32 len = req->cryptlen - authsize;
			
@@ -237,27 +219,19 @@ static int ccm_decrypt(struct aead_request *req)
 
				 	/* preserve the original iv for the final round */
			
 
				 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
			
 
				 
			
 
				-	src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
			
 
				-	dst = src;
			
 
				-	if (req->src != req->dst)
			
 
				-		dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
			
 
				-
			
 
				-	blkcipher_walk_init(&walk, dst, src, len);
			
 
				-	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
			
 
				-					     AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_aead_decrypt(&walk, req, true);
			
 
				 
			
 
				 	while (walk.nbytes) {
			
 
				 		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
			
 
				 
			
 
				-		if (walk.nbytes == len)
			
 
				+		if (walk.nbytes == walk.total)
			
 
				 			tail = 0;
			
 
				 
			
 
				 		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				   walk.nbytes - tail, ctx->key_enc,
			
 
				 				   num_rounds(ctx), mac, walk.iv);
			
 
				 
			
 
				-		len -= walk.nbytes - tail;
			
 
				-		err = blkcipher_walk_done(&desc, &walk, tail);
			
 
				+		err = skcipher_walk_done(&walk, tail);
			
 
				 	}
			
 
				 	if (!err)
			
 
				 		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
			
@@ -268,7 +242,8 @@ static int ccm_decrypt(struct aead_request *req)
 
				 		return err;
			
 
				 
			
 
				 	/* compare calculated auth tag with the stored one */
			
 
				-	scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize,
			
 
				+	scatterwalk_map_and_copy(buf, req->src,
			
 
				+				 req->assoclen + req->cryptlen - authsize,
			
 
				 				 authsize, 0);
			
 
				 
			
 
				 	if (crypto_memneq(mac, buf, authsize))
			
@@ -287,6 +262,7 @@ static struct aead_alg ccm_aes_alg = {
 
				 		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				 	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.chunksize	= AES_BLOCK_SIZE,
			
 
				 	.maxauthsize	= AES_BLOCK_SIZE,
			
 
				 	.setkey		= ccm_setkey,
			
 
				 	.setauthsize	= ccm_setauthsize,
			
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 
				 	kernel_neon_begin_partial(4);
			
 
				 
			
 
				 	__asm__("	ld1	{v0.16b}, %[in]			;"
			
 
				-		"	ld1	{v1.2d}, [%[key]], #16		;"
			
 
				+		"	ld1	{v1.16b}, [%[key]], #16		;"
			
 
				 		"	cmp	%w[rounds], #10			;"
			
 
				 		"	bmi	0f				;"
			
 
				 		"	bne	3f				;"
			
 
				 		"	mov	v3.16b, v1.16b			;"
			
 
				 		"	b	2f				;"
			
 
				 		"0:	mov	v2.16b, v1.16b			;"
			
 
				-		"	ld1	{v3.2d}, [%[key]], #16		;"
			
 
				+		"	ld1	{v3.16b}, [%[key]], #16		;"
			
 
				 		"1:	aese	v0.16b, v2.16b			;"
			
 
				 		"	aesmc	v0.16b, v0.16b			;"
			
 
				-		"2:	ld1	{v1.2d}, [%[key]], #16		;"
			
 
				+		"2:	ld1	{v1.16b}, [%[key]], #16		;"
			
 
				 		"	aese	v0.16b, v3.16b			;"
			
 
				 		"	aesmc	v0.16b, v0.16b			;"
			
 
				-		"3:	ld1	{v2.2d}, [%[key]], #16		;"
			
 
				+		"3:	ld1	{v2.16b}, [%[key]], #16		;"
			
 
				 		"	subs	%w[rounds], %w[rounds], #3	;"
			
 
				 		"	aese	v0.16b, v1.16b			;"
			
 
				 		"	aesmc	v0.16b, v0.16b			;"
			
 
				-		"	ld1	{v3.2d}, [%[key]], #16		;"
			
 
				+		"	ld1	{v3.16b}, [%[key]], #16		;"
			
 
				 		"	bpl	1b				;"
			
 
				 		"	aese	v0.16b, v2.16b			;"
			
 
				 		"	eor	v0.16b, v0.16b, v3.16b		;"
			
@@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 
				 	kernel_neon_begin_partial(4);
			
 
				 
			
 
				 	__asm__("	ld1	{v0.16b}, %[in]			;"
			
 
				-		"	ld1	{v1.2d}, [%[key]], #16		;"
			
 
				+		"	ld1	{v1.16b}, [%[key]], #16		;"
			
 
				 		"	cmp	%w[rounds], #10			;"
			
 
				 		"	bmi	0f				;"
			
 
				 		"	bne	3f				;"
			
 
				 		"	mov	v3.16b, v1.16b			;"
			
 
				 		"	b	2f				;"
			
 
				 		"0:	mov	v2.16b, v1.16b			;"
			
 
				-		"	ld1	{v3.2d}, [%[key]], #16		;"
			
 
				+		"	ld1	{v3.16b}, [%[key]], #16		;"
			
 
				 		"1:	aesd	v0.16b, v2.16b			;"
			
 
				 		"	aesimc	v0.16b, v0.16b			;"
			
 
				-		"2:	ld1	{v1.2d}, [%[key]], #16		;"
			
 
				+		"2:	ld1	{v1.16b}, [%[key]], #16		;"
			
 
				 		"	aesd	v0.16b, v3.16b			;"
			
 
				 		"	aesimc	v0.16b, v0.16b			;"
			
 
				-		"3:	ld1	{v2.2d}, [%[key]], #16		;"
			
 
				+		"3:	ld1	{v2.16b}, [%[key]], #16		;"
			
 
				 		"	subs	%w[rounds], %w[rounds], #3	;"
			
 
				 		"	aesd	v0.16b, v1.16b			;"
			
 
				 		"	aesimc	v0.16b, v0.16b			;"
			
 
				-		"	ld1	{v3.2d}, [%[key]], #16		;"
			
 
				+		"	ld1	{v3.16b}, [%[key]], #16		;"
			
 
				 		"	bpl	1b				;"
			
 
				 		"	aesd	v0.16b, v2.16b			;"
			
 
				 		"	eor	v0.16b, v0.16b, v3.16b		;"
			
@@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
				 		u32 *rki = ctx->key_enc + (i * kwords);
			
 
				 		u32 *rko = rki + kwords;
			
 
				 
			
 
				+#ifndef CONFIG_CPU_BIG_ENDIAN
			
 
				 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
			
 
				+#else
			
 
				+		rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
			
 
				+			 rki[0];
			
 
				+#endif
			
 
				 		rko[1] = rko[0] ^ rki[1];
			
 
				 		rko[2] = rko[1] ^ rki[2];
			
 
				 		rko[3] = rko[2] ^ rki[3];
			
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -10,6 +10,7 @@
 
				  */
			
 
				 
			
 
				 #include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				 
			
 
				 #define AES_ENTRY(func)		ENTRY(ce_ ## func)
			
 
				 #define AES_ENDPROC(func)	ENDPROC(ce_ ## func)
			
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -11,8 +11,8 @@
 
				 #include <asm/neon.h>
			
 
				 #include <asm/hwcap.h>
			
 
				 #include <crypto/aes.h>
			
 
				-#include <crypto/ablk_helper.h>
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/simd.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/cpufeature.h>
			
 
				 #include <crypto/xts.h>
			
@@ -80,13 +80,19 @@ struct crypto_aes_xts_ctx {
 
				 	struct crypto_aes_ctx __aligned(8) key2;
			
 
				 };
			
 
				 
			
 
				-static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
			
 
				+static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				+			       unsigned int key_len)
			
 
				+{
			
 
				+	return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
			
 
				+}
			
 
				+
			
 
				+static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				 		       unsigned int key_len)
			
 
				 {
			
 
				-	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = xts_check_key(tfm, in_key, key_len);
			
 
				+	ret = xts_verify_key(tfm, in_key, key_len);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -97,111 +103,101 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
				 	if (!ret)
			
 
				 		return 0;
			
 
				 
			
 
				-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
			
 
				+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				 	return -EINVAL;
			
 
				 }
			
 
				 
			
 
				-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int ecb_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key_enc, rounds, blocks, first);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int ecb_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key_dec, rounds, blocks, first);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int cbc_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
			
 
				 				first);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int cbc_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
			
 
				 				first);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int ctr_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	first = 1;
			
 
				 	kernel_neon_begin();
			
@@ -209,17 +205,14 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 
				 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
			
 
				 				first);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 		first = 0;
			
 
				-		nbytes -= blocks * AES_BLOCK_SIZE;
			
 
				-		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
			
 
				-			break;
			
 
				-		err = blkcipher_walk_done(desc, &walk,
			
 
				-					  walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				-	if (walk.nbytes % AES_BLOCK_SIZE) {
			
 
				-		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
			
 
				-		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
			
 
				+	if (walk.nbytes) {
			
 
				 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
			
 
				+		unsigned int nbytes = walk.nbytes;
			
 
				+		u8 *tdst = walk.dst.virt.addr;
			
 
				+		u8 *tsrc = walk.src.virt.addr;
			
 
				 
			
 
				 		/*
			
 
				 		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
			
@@ -230,227 +223,169 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 
				 		aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
			
 
				 				blocks, walk.iv, first);
			
 
				 		memcpy(tdst, tail, nbytes);
			
 
				-		err = blkcipher_walk_done(desc, &walk, 0);
			
 
				+		err = skcipher_walk_done(&walk, 0);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int xts_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key1.key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key1.key_enc, rounds, blocks,
			
 
				 				(u8 *)ctx->key2.key_enc, walk.iv, first);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int xts_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err, first, rounds = 6 + ctx->key1.key_length / 4;
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct skcipher_walk walk;
			
 
				 	unsigned int blocks;
			
 
				 
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_neon_begin();
			
 
				 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
			
 
				 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 				(u8 *)ctx->key1.key_dec, rounds, blocks,
			
 
				 				(u8 *)ctx->key2.key_enc, walk.iv, first);
			
 
				-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
			
 
				 	}
			
 
				 	kernel_neon_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static struct crypto_alg aes_algs[] = { {
			
 
				-	.cra_name		= "__ecb-aes-" MODE,
			
 
				-	.cra_driver_name	= "__driver-ecb-aes-" MODE,
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= 0,
			
 
				-		.setkey		= aes_setkey,
			
 
				-		.encrypt	= ecb_encrypt,
			
 
				-		.decrypt	= ecb_decrypt,
			
 
				+static struct skcipher_alg aes_algs[] = { {
			
 
				+	.base = {
			
 
				+		.cra_name		= "__ecb(aes)",
			
 
				+		.cra_driver_name	= "__ecb-aes-" MODE,
			
 
				+		.cra_priority		= PRIO,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.setkey		= skcipher_aes_setkey,
			
 
				+	.encrypt	= ecb_encrypt,
			
 
				+	.decrypt	= ecb_decrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__cbc-aes-" MODE,
			
 
				-	.cra_driver_name	= "__driver-cbc-aes-" MODE,
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= aes_setkey,
			
 
				-		.encrypt	= cbc_encrypt,
			
 
				-		.decrypt	= cbc_decrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__cbc(aes)",
			
 
				+		.cra_driver_name	= "__cbc-aes-" MODE,
			
 
				+		.cra_priority		= PRIO,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.setkey		= skcipher_aes_setkey,
			
 
				+	.encrypt	= cbc_encrypt,
			
 
				+	.decrypt	= cbc_decrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__ctr-aes-" MODE,
			
 
				-	.cra_driver_name	= "__driver-ctr-aes-" MODE,
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= aes_setkey,
			
 
				-		.encrypt	= ctr_encrypt,
			
 
				-		.decrypt	= ctr_encrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__ctr(aes)",
			
 
				+		.cra_driver_name	= "__ctr-aes-" MODE,
			
 
				+		.cra_priority		= PRIO,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= 1,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				+	.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.chunksize	= AES_BLOCK_SIZE,
			
 
				+	.setkey		= skcipher_aes_setkey,
			
 
				+	.encrypt	= ctr_encrypt,
			
 
				+	.decrypt	= ctr_encrypt,
			
 
				 }, {
			
 
				-	.cra_name		= "__xts-aes-" MODE,
			
 
				-	.cra_driver_name	= "__driver-xts-aes-" MODE,
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_blkcipher = {
			
 
				-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= xts_set_key,
			
 
				-		.encrypt	= xts_encrypt,
			
 
				-		.decrypt	= xts_decrypt,
			
 
				+	.base = {
			
 
				+		.cra_name		= "__xts(aes)",
			
 
				+		.cra_driver_name	= "__xts-aes-" MODE,
			
 
				+		.cra_priority		= PRIO,
			
 
				+		.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+		.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
			
 
				+		.cra_alignmask		= 7,
			
 
				+		.cra_module		= THIS_MODULE,
			
 
				 	},
			
 
				-}, {
			
 
				-	.cra_name		= "ecb(aes)",
			
 
				-	.cra_driver_name	= "ecb-aes-" MODE,
			
 
				-	.cra_priority		= PRIO,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= 0,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "cbc(aes)",
			
 
				-	.cra_driver_name	= "cbc-aes-" MODE,
			
 
				-	.cra_priority		= PRIO,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "ctr(aes)",
			
 
				-	.cra_driver_name	= "ctr-aes-" MODE,
			
 
				-	.cra_priority		= PRIO,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				-}, {
			
 
				-	.cra_name		= "xts(aes)",
			
 
				-	.cra_driver_name	= "xts-aes-" MODE,
			
 
				-	.cra_priority		= PRIO,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 7,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_ablkcipher = {
			
 
				-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-		.ivsize		= AES_BLOCK_SIZE,
			
 
				-		.setkey		= ablk_set_key,
			
 
				-		.encrypt	= ablk_encrypt,
			
 
				-		.decrypt	= ablk_decrypt,
			
 
				-	}
			
 
				+	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				+	.ivsize		= AES_BLOCK_SIZE,
			
 
				+	.setkey		= xts_set_key,
			
 
				+	.encrypt	= xts_encrypt,
			
 
				+	.decrypt	= xts_decrypt,
			
 
				 } };
			
 
				 
			
 
				-static int __init aes_init(void)
			
 
				+static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
			
 
				+
			
 
				+static void aes_exit(void)
			
 
				 {
			
 
				-	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
			
 
				+		simd_skcipher_free(aes_simd_algs[i]);
			
 
				+
			
 
				+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				 }
			
 
				 
			
 
				-static void __exit aes_exit(void)
			
 
				+static int __init aes_init(void)
			
 
				 {
			
 
				-	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+	struct simd_skcipher_alg *simd;
			
 
				+	const char *basename;
			
 
				+	const char *algname;
			
 
				+	const char *drvname;
			
 
				+	int err;
			
 
				+	int i;
			
 
				+
			
 
				+	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
			
 
				+		algname = aes_algs[i].base.cra_name + 2;
			
 
				+		drvname = aes_algs[i].base.cra_driver_name + 2;
			
 
				+		basename = aes_algs[i].base.cra_driver_name;
			
 
				+		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+		err = PTR_ERR(simd);
			
 
				+		if (IS_ERR(simd))
			
 
				+			goto unregister_simds;
			
 
				+
			
 
				+		aes_simd_algs[i] = simd;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+unregister_simds:
			
 
				+	aes_exit();
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 #ifdef USE_V8_CRYPTO_EXTENSIONS
			
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -386,7 +386,8 @@ AES_ENDPROC(aes_ctr_encrypt)
 
				 	.endm
			
 
				 
			
 
				 .Lxts_mul_x:
			
 
				-	.word		1, 0, 0x87, 0
			
 
				+CPU_LE(	.quad		1, 0x87		)
			
 
				+CPU_BE(	.quad		0x87, 1		)
			
 
				 
			
 
				 AES_ENTRY(aes_xts_encrypt)
			
 
				 	FRAME_PUSH
			
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -9,6 +9,7 @@
 
				  */
			
 
				 
			
 
				 #include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				 
			
 
				 #define AES_ENTRY(func)		ENTRY(neon_ ## func)
			
 
				 #define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
			
@@ -83,13 +84,13 @@
 
				 	.endm
			
 
				 
			
 
				 	.macro		do_block, enc, in, rounds, rk, rkp, i
			
 
				-	ld1		{v15.16b}, [\rk]
			
 
				+	ld1		{v15.4s}, [\rk]
			
 
				 	add		\rkp, \rk, #16
			
 
				 	mov		\i, \rounds
			
 
				 1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
			
 
				 	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
			
 
				 	sub_bytes	\in
			
 
				-	ld1		{v15.16b}, [\rkp], #16
			
 
				+	ld1		{v15.4s}, [\rkp], #16
			
 
				 	subs		\i, \i, #1
			
 
				 	beq		2222f
			
 
				 	.if		\enc == 1
			
@@ -229,7 +230,7 @@
 
				 	.endm
			
 
				 
			
 
				 	.macro		do_block_2x, enc, in0, in1 rounds, rk, rkp, i
			
 
				-	ld1		{v15.16b}, [\rk]
			
 
				+	ld1		{v15.4s}, [\rk]
			
 
				 	add		\rkp, \rk, #16
			
 
				 	mov		\i, \rounds
			
 
				 1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
			
@@ -237,7 +238,7 @@
 
				 	sub_bytes_2x	\in0, \in1
			
 
				 	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
			
 
				 	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
			
 
				-	ld1		{v15.16b}, [\rkp], #16
			
 
				+	ld1		{v15.4s}, [\rkp], #16
			
 
				 	subs		\i, \i, #1
			
 
				 	beq		2222f
			
 
				 	.if		\enc == 1
			
@@ -254,7 +255,7 @@
 
				 	.endm
			
 
				 
			
 
				 	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
			
 
				-	ld1		{v15.16b}, [\rk]
			
 
				+	ld1		{v15.4s}, [\rk]
			
 
				 	add		\rkp, \rk, #16
			
 
				 	mov		\i, \rounds
			
 
				 1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
			
@@ -266,7 +267,7 @@
 
				 	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
			
 
				 	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
			
 
				 	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
			
 
				-	ld1		{v15.16b}, [\rkp], #16
			
 
				+	ld1		{v15.4s}, [\rkp], #16
			
 
				 	subs		\i, \i, #1
			
 
				 	beq		2222f
			
 
				 	.if		\enc == 1
			
@@ -306,12 +307,16 @@
 
				 	.text
			
 
				 	.align		4
			
 
				 .LForward_ShiftRows:
			
 
				-	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
			
 
				-	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
			
 
				+CPU_LE(	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3	)
			
 
				+CPU_LE(	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb	)
			
 
				+CPU_BE(	.byte		0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8	)
			
 
				+CPU_BE(	.byte		0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0	)
			
 
				 
			
 
				 .LReverse_ShiftRows:
			
 
				-	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
			
 
				-	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
			
 
				+CPU_LE(	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb	)
			
 
				+CPU_LE(	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3	)
			
 
				+CPU_BE(	.byte		0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8	)
			
 
				+CPU_BE(	.byte		0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0	)
			
 
				 
			
 
				 .LForward_Sbox:
			
 
				 	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
			
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -0,0 +1,266 @@
 
				+/*
			
 
				+ * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+/* GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				+ *
			
 
				+ * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				+ * information or have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Copyright 2012 Xyratex Technology Limited
			
 
				+ *
			
 
				+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
			
 
				+ * calculation.
			
 
				+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
			
 
				+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
			
 
				+ * at:
			
 
				+ * http://www.intel.com/products/processor/manuals/
			
 
				+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
			
 
				+ * Volume 2B: Instruction Set Reference, N-Z
			
 
				+ *
			
 
				+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
			
 
				+ *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
			
 
				+ */
			
 
				+
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				+
			
 
				+	.text
			
 
				+	.align		6
			
 
				+	.cpu		generic+crypto+crc
			
 
				+
			
 
				+.Lcrc32_constants:
			
 
				+	/*
			
 
				+	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
			
 
				+	 * #define CONSTANT_R1  0x154442bd4LL
			
 
				+	 *
			
 
				+	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
			
 
				+	 * #define CONSTANT_R2  0x1c6e41596LL
			
 
				+	 */
			
 
				+	.octa		0x00000001c6e415960000000154442bd4
			
 
				+
			
 
				+	/*
			
 
				+	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
			
 
				+	 * #define CONSTANT_R3  0x1751997d0LL
			
 
				+	 *
			
 
				+	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
			
 
				+	 * #define CONSTANT_R4  0x0ccaa009eLL
			
 
				+	 */
			
 
				+	.octa		0x00000000ccaa009e00000001751997d0
			
 
				+
			
 
				+	/*
			
 
				+	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
			
 
				+	 * #define CONSTANT_R5  0x163cd6124LL
			
 
				+	 */
			
 
				+	.quad		0x0000000163cd6124
			
 
				+	.quad		0x00000000FFFFFFFF
			
 
				+
			
 
				+	/*
			
 
				+	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
			
 
				+	 *
			
 
				+	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
			
 
				+	 *                                                      = 0x1F7011641LL
			
 
				+	 * #define CONSTANT_RU  0x1F7011641LL
			
 
				+	 */
			
 
				+	.octa		0x00000001F701164100000001DB710641
			
 
				+
			
 
				+.Lcrc32c_constants:
			
 
				+	.octa		0x000000009e4addf800000000740eef02
			
 
				+	.octa		0x000000014cd00bd600000000f20c0dfe
			
 
				+	.quad		0x00000000dd45aab8
			
 
				+	.quad		0x00000000FFFFFFFF
			
 
				+	.octa		0x00000000dea713f10000000105ec76f0
			
 
				+
			
 
				+	vCONSTANT	.req	v0
			
 
				+	dCONSTANT	.req	d0
			
 
				+	qCONSTANT	.req	q0
			
 
				+
			
 
				+	BUF		.req	x0
			
 
				+	LEN		.req	x1
			
 
				+	CRC		.req	x2
			
 
				+
			
 
				+	vzr		.req	v9
			
 
				+
			
 
				+	/**
			
 
				+	 * Calculate crc32
			
 
				+	 * BUF - buffer
			
 
				+	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
			
 
				+	 * CRC - initial crc32
			
 
				+	 * return %eax crc32
			
 
				+	 * uint crc32_pmull_le(unsigned char const *buffer,
			
 
				+	 *                     size_t len, uint crc32)
			
 
				+	 */
			
 
				+ENTRY(crc32_pmull_le)
			
 
				+	adr		x3, .Lcrc32_constants
			
 
				+	b		0f
			
 
				+
			
 
				+ENTRY(crc32c_pmull_le)
			
 
				+	adr		x3, .Lcrc32c_constants
			
 
				+
			
 
				+0:	bic		LEN, LEN, #15
			
 
				+	ld1		{v1.16b-v4.16b}, [BUF], #0x40
			
 
				+	movi		vzr.16b, #0
			
 
				+	fmov		dCONSTANT, CRC
			
 
				+	eor		v1.16b, v1.16b, vCONSTANT.16b
			
 
				+	sub		LEN, LEN, #0x40
			
 
				+	cmp		LEN, #0x40
			
 
				+	b.lt		less_64
			
 
				+
			
 
				+	ldr		qCONSTANT, [x3]
			
 
				+
			
 
				+loop_64:		/* 64 bytes Full cache line folding */
			
 
				+	sub		LEN, LEN, #0x40
			
 
				+
			
 
				+	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				+	pmull2		v6.1q, v2.2d, vCONSTANT.2d
			
 
				+	pmull2		v7.1q, v3.2d, vCONSTANT.2d
			
 
				+	pmull2		v8.1q, v4.2d, vCONSTANT.2d
			
 
				+
			
 
				+	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				+	pmull		v2.1q, v2.1d, vCONSTANT.1d
			
 
				+	pmull		v3.1q, v3.1d, vCONSTANT.1d
			
 
				+	pmull		v4.1q, v4.1d, vCONSTANT.1d
			
 
				+
			
 
				+	eor		v1.16b, v1.16b, v5.16b
			
 
				+	ld1		{v5.16b}, [BUF], #0x10
			
 
				+	eor		v2.16b, v2.16b, v6.16b
			
 
				+	ld1		{v6.16b}, [BUF], #0x10
			
 
				+	eor		v3.16b, v3.16b, v7.16b
			
 
				+	ld1		{v7.16b}, [BUF], #0x10
			
 
				+	eor		v4.16b, v4.16b, v8.16b
			
 
				+	ld1		{v8.16b}, [BUF], #0x10
			
 
				+
			
 
				+	eor		v1.16b, v1.16b, v5.16b
			
 
				+	eor		v2.16b, v2.16b, v6.16b
			
 
				+	eor		v3.16b, v3.16b, v7.16b
			
 
				+	eor		v4.16b, v4.16b, v8.16b
			
 
				+
			
 
				+	cmp		LEN, #0x40
			
 
				+	b.ge		loop_64
			
 
				+
			
 
				+less_64:		/* Folding cache line into 128bit */
			
 
				+	ldr		qCONSTANT, [x3, #16]
			
 
				+
			
 
				+	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				+	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				+	eor		v1.16b, v1.16b, v5.16b
			
 
				+	eor		v1.16b, v1.16b, v2.16b
			
 
				+
			
 
				+	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				+	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				+	eor		v1.16b, v1.16b, v5.16b
			
 
				+	eor		v1.16b, v1.16b, v3.16b
			
 
				+
			
 
				+	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				+	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				+	eor		v1.16b, v1.16b, v5.16b
			
 
				+	eor		v1.16b, v1.16b, v4.16b
			
 
				+
			
 
				+	cbz		LEN, fold_64
			
 
				+
			
 
				+loop_16:		/* Folding rest buffer into 128bit */
			
 
				+	subs		LEN, LEN, #0x10
			
 
				+
			
 
				+	ld1		{v2.16b}, [BUF], #0x10
			
 
				+	pmull2		v5.1q, v1.2d, vCONSTANT.2d
			
 
				+	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				+	eor		v1.16b, v1.16b, v5.16b
			
 
				+	eor		v1.16b, v1.16b, v2.16b
			
 
				+
			
 
				+	b.ne		loop_16
			
 
				+
			
 
				+fold_64:
			
 
				+	/* perform the last 64 bit fold, also adds 32 zeroes
			
 
				+	 * to the input stream */
			
 
				+	ext		v2.16b, v1.16b, v1.16b, #8
			
 
				+	pmull2		v2.1q, v2.2d, vCONSTANT.2d
			
 
				+	ext		v1.16b, v1.16b, vzr.16b, #8
			
 
				+	eor		v1.16b, v1.16b, v2.16b
			
 
				+
			
 
				+	/* final 32-bit fold */
			
 
				+	ldr		dCONSTANT, [x3, #32]
			
 
				+	ldr		d3, [x3, #40]
			
 
				+
			
 
				+	ext		v2.16b, v1.16b, vzr.16b, #4
			
 
				+	and		v1.16b, v1.16b, v3.16b
			
 
				+	pmull		v1.1q, v1.1d, vCONSTANT.1d
			
 
				+	eor		v1.16b, v1.16b, v2.16b
			
 
				+
			
 
				+	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
			
 
				+	ldr		qCONSTANT, [x3, #48]
			
 
				+
			
 
				+	and		v2.16b, v1.16b, v3.16b
			
 
				+	ext		v2.16b, vzr.16b, v2.16b, #8
			
 
				+	pmull2		v2.1q, v2.2d, vCONSTANT.2d
			
 
				+	and		v2.16b, v2.16b, v3.16b
			
 
				+	pmull		v2.1q, v2.1d, vCONSTANT.1d
			
 
				+	eor		v1.16b, v1.16b, v2.16b
			
 
				+	mov		w0, v1.s[1]
			
 
				+
			
 
				+	ret
			
 
				+ENDPROC(crc32_pmull_le)
			
 
				+ENDPROC(crc32c_pmull_le)
			
 
				+
			
 
				+	.macro		__crc32, c
			
 
				+0:	subs		x2, x2, #16
			
 
				+	b.mi		8f
			
 
				+	ldp		x3, x4, [x1], #16
			
 
				+CPU_BE(	rev		x3, x3		)
			
 
				+CPU_BE(	rev		x4, x4		)
			
 
				+	crc32\c\()x	w0, w0, x3
			
 
				+	crc32\c\()x	w0, w0, x4
			
 
				+	b.ne		0b
			
 
				+	ret
			
 
				+
			
 
				+8:	tbz		x2, #3, 4f
			
 
				+	ldr		x3, [x1], #8
			
 
				+CPU_BE(	rev		x3, x3		)
			
 
				+	crc32\c\()x	w0, w0, x3
			
 
				+4:	tbz		x2, #2, 2f
			
 
				+	ldr		w3, [x1], #4
			
 
				+CPU_BE(	rev		w3, w3		)
			
 
				+	crc32\c\()w	w0, w0, w3
			
 
				+2:	tbz		x2, #1, 1f
			
 
				+	ldrh		w3, [x1], #2
			
 
				+CPU_BE(	rev16		w3, w3		)
			
 
				+	crc32\c\()h	w0, w0, w3
			
 
				+1:	tbz		x2, #0, 0f
			
 
				+	ldrb		w3, [x1]
			
 
				+	crc32\c\()b	w0, w0, w3
			
 
				+0:	ret
			
 
				+	.endm
			
 
				+
			
 
				+	.align		5
			
 
				+ENTRY(crc32_armv8_le)
			
 
				+	__crc32
			
 
				+ENDPROC(crc32_armv8_le)
			
 
				+
			
 
				+	.align		5
			
 
				+ENTRY(crc32c_armv8_le)
			
 
				+	__crc32		c
			
 
				+ENDPROC(crc32c_armv8_le)
			
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -0,0 +1,212 @@
 
				+/*
			
 
				+ * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/cpufeature.h>
			
 
				+#include <linux/crc32.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+#include <crypto/internal/hash.h>
			
 
				+
			
 
				+#include <asm/hwcap.h>
			
 
				+#include <asm/neon.h>
			
 
				+#include <asm/unaligned.h>
			
 
				+
			
 
				+#define PMULL_MIN_LEN		64L	/* minimum size of buffer
			
 
				+					 * for crc32_pmull_le_16 */
			
 
				+#define SCALE_F			16L	/* size of NEON register */
			
 
				+
			
 
				+asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
			
 
				+asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
			
 
				+
			
 
				+asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
			
 
				+asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
			
 
				+
			
 
				+static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
			
 
				+static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
			
 
				+
			
 
				+static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	u32 *key = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	*key = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	u32 *key = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	*key = ~0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
			
 
				+			      unsigned int keylen)
			
 
				+{
			
 
				+	u32 *mctx = crypto_shash_ctx(hash);
			
 
				+
			
 
				+	if (keylen != sizeof(u32)) {
			
 
				+		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	*mctx = le32_to_cpup((__le32 *)key);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_pmull_init(struct shash_desc *desc)
			
 
				+{
			
 
				+	u32 *mctx = crypto_shash_ctx(desc->tfm);
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*crc = *mctx;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
			
 
				+			 unsigned int length)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+	unsigned int l;
			
 
				+
			
 
				+	if ((u64)data % SCALE_F) {
			
 
				+		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
			
 
				+
			
 
				+		*crc = fallback_crc32(*crc, data, l);
			
 
				+
			
 
				+		data += l;
			
 
				+		length -= l;
			
 
				+	}
			
 
				+
			
 
				+	if (length >= PMULL_MIN_LEN) {
			
 
				+		l = round_down(length, SCALE_F);
			
 
				+
			
 
				+		kernel_neon_begin_partial(10);
			
 
				+		*crc = crc32_pmull_le(data, l, *crc);
			
 
				+		kernel_neon_end();
			
 
				+
			
 
				+		data += l;
			
 
				+		length -= l;
			
 
				+	}
			
 
				+
			
 
				+	if (length > 0)
			
 
				+		*crc = fallback_crc32(*crc, data, length);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
			
 
				+			 unsigned int length)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+	unsigned int l;
			
 
				+
			
 
				+	if ((u64)data % SCALE_F) {
			
 
				+		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
			
 
				+
			
 
				+		*crc = fallback_crc32c(*crc, data, l);
			
 
				+
			
 
				+		data += l;
			
 
				+		length -= l;
			
 
				+	}
			
 
				+
			
 
				+	if (length >= PMULL_MIN_LEN) {
			
 
				+		l = round_down(length, SCALE_F);
			
 
				+
			
 
				+		kernel_neon_begin_partial(10);
			
 
				+		*crc = crc32c_pmull_le(data, l, *crc);
			
 
				+		kernel_neon_end();
			
 
				+
			
 
				+		data += l;
			
 
				+		length -= l;
			
 
				+	}
			
 
				+
			
 
				+	if (length > 0) {
			
 
				+		*crc = fallback_crc32c(*crc, data, length);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	put_unaligned_le32(*crc, out);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	u32 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	put_unaligned_le32(~*crc, out);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg crc32_pmull_algs[] = { {
			
 
				+	.setkey			= crc32_pmull_setkey,
			
 
				+	.init			= crc32_pmull_init,
			
 
				+	.update			= crc32_pmull_update,
			
 
				+	.final			= crc32_pmull_final,
			
 
				+	.descsize		= sizeof(u32),
			
 
				+	.digestsize		= sizeof(u32),
			
 
				+
			
 
				+	.base.cra_ctxsize	= sizeof(u32),
			
 
				+	.base.cra_init		= crc32_pmull_cra_init,
			
 
				+	.base.cra_name		= "crc32",
			
 
				+	.base.cra_driver_name	= "crc32-arm64-ce",
			
 
				+	.base.cra_priority	= 200,
			
 
				+	.base.cra_blocksize	= 1,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+}, {
			
 
				+	.setkey			= crc32_pmull_setkey,
			
 
				+	.init			= crc32_pmull_init,
			
 
				+	.update			= crc32c_pmull_update,
			
 
				+	.final			= crc32c_pmull_final,
			
 
				+	.descsize		= sizeof(u32),
			
 
				+	.digestsize		= sizeof(u32),
			
 
				+
			
 
				+	.base.cra_ctxsize	= sizeof(u32),
			
 
				+	.base.cra_init		= crc32c_pmull_cra_init,
			
 
				+	.base.cra_name		= "crc32c",
			
 
				+	.base.cra_driver_name	= "crc32c-arm64-ce",
			
 
				+	.base.cra_priority	= 200,
			
 
				+	.base.cra_blocksize	= 1,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+} };
			
 
				+
			
 
				+static int __init crc32_pmull_mod_init(void)
			
 
				+{
			
 
				+	if (elf_hwcap & HWCAP_CRC32) {
			
 
				+		fallback_crc32 = crc32_armv8_le;
			
 
				+		fallback_crc32c = crc32c_armv8_le;
			
 
				+	} else {
			
 
				+		fallback_crc32 = crc32_le;
			
 
				+		fallback_crc32c = __crc32c_le;
			
 
				+	}
			
 
				+
			
 
				+	return crypto_register_shashes(crc32_pmull_algs,
			
 
				+				       ARRAY_SIZE(crc32_pmull_algs));
			
 
				+}
			
 
				+
			
 
				+static void __exit crc32_pmull_mod_exit(void)
			
 
				+{
			
 
				+	crypto_unregister_shashes(crc32_pmull_algs,
			
 
				+				  ARRAY_SIZE(crc32_pmull_algs));
			
 
				+}
			
 
				+
			
 
				+module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
			
 
				+module_exit(crc32_pmull_mod_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -0,0 +1,392 @@
 
				+//
			
 
				+// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
			
 
				+//
			
 
				+// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License version 2 as
			
 
				+// published by the Free Software Foundation.
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
			
 
				+//
			
 
				+// Copyright (c) 2013, Intel Corporation
			
 
				+//
			
 
				+// Authors:
			
 
				+//     Erdinc Ozturk <erdinc.ozturk@intel.com>
			
 
				+//     Vinodh Gopal <vinodh.gopal@intel.com>
			
 
				+//     James Guilford <james.guilford@intel.com>
			
 
				+//     Tim Chen <tim.c.chen@linux.intel.com>
			
 
				+//
			
 
				+// This software is available to you under a choice of one of two
			
 
				+// licenses.  You may choose to be licensed under the terms of the GNU
			
 
				+// General Public License (GPL) Version 2, available from the file
			
 
				+// COPYING in the main directory of this source tree, or the
			
 
				+// OpenIB.org BSD license below:
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions are
			
 
				+// met:
			
 
				+//
			
 
				+// * Redistributions of source code must retain the above copyright
			
 
				+//   notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// * Redistributions in binary form must reproduce the above copyright
			
 
				+//   notice, this list of conditions and the following disclaimer in the
			
 
				+//   documentation and/or other materials provided with the
			
 
				+//   distribution.
			
 
				+//
			
 
				+// * Neither the name of the Intel Corporation nor the names of its
			
 
				+//   contributors may be used to endorse or promote products derived from
			
 
				+//   this software without specific prior written permission.
			
 
				+//
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
			
 
				+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
			
 
				+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
			
 
				+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+//
			
 
				+//       Function API:
			
 
				+//       UINT16 crc_t10dif_pcl(
			
 
				+//               UINT16 init_crc, //initial CRC value, 16 bits
			
 
				+//               const unsigned char *buf, //buffer pointer to calculate CRC on
			
 
				+//               UINT64 len //buffer length in bytes (64-bit data)
			
 
				+//       );
			
 
				+//
			
 
				+//       Reference paper titled "Fast CRC Computation for Generic
			
 
				+//	Polynomials Using PCLMULQDQ Instruction"
			
 
				+//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
			
 
				+//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
			
 
				+//
			
 
				+//
			
 
				+
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				+
			
 
				+	.text
			
 
				+	.cpu		generic+crypto
			
 
				+
			
 
				+	arg1_low32	.req	w0
			
 
				+	arg2		.req	x1
			
 
				+	arg3		.req	x2
			
 
				+
			
 
				+	vzr		.req	v13
			
 
				+
			
 
				+ENTRY(crc_t10dif_pmull)
			
 
				+	movi		vzr.16b, #0		// init zero register
			
 
				+
			
 
				+	// adjust the 16-bit initial_crc value, scale it to 32 bits
			
 
				+	lsl		arg1_low32, arg1_low32, #16
			
 
				+
			
 
				+	// check if smaller than 256
			
 
				+	cmp		arg3, #256
			
 
				+
			
 
				+	// for sizes less than 128, we can't fold 64B at a time...
			
 
				+	b.lt		_less_than_128
			
 
				+
			
 
				+	// load the initial crc value
			
 
				+	// crc value does not need to be byte-reflected, but it needs
			
 
				+	// to be moved to the high part of the register.
			
 
				+	// because data will be byte-reflected and will align with
			
 
				+	// initial crc at correct place.
			
 
				+	movi		v10.16b, #0
			
 
				+	mov		v10.s[3], arg1_low32		// initial crc
			
 
				+
			
 
				+	// receive the initial 64B data, xor the initial crc value
			
 
				+	ldp		q0, q1, [arg2]
			
 
				+	ldp		q2, q3, [arg2, #0x20]
			
 
				+	ldp		q4, q5, [arg2, #0x40]
			
 
				+	ldp		q6, q7, [arg2, #0x60]
			
 
				+	add		arg2, arg2, #0x80
			
 
				+
			
 
				+CPU_LE(	rev64		v0.16b, v0.16b			)
			
 
				+CPU_LE(	rev64		v1.16b, v1.16b			)
			
 
				+CPU_LE(	rev64		v2.16b, v2.16b			)
			
 
				+CPU_LE(	rev64		v3.16b, v3.16b			)
			
 
				+CPU_LE(	rev64		v4.16b, v4.16b			)
			
 
				+CPU_LE(	rev64		v5.16b, v5.16b			)
			
 
				+CPU_LE(	rev64		v6.16b, v6.16b			)
			
 
				+CPU_LE(	rev64		v7.16b, v7.16b			)
			
 
				+
			
 
				+CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
			
 
				+CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
			
 
				+CPU_LE(	ext		v2.16b, v2.16b, v2.16b, #8	)
			
 
				+CPU_LE(	ext		v3.16b, v3.16b, v3.16b, #8	)
			
 
				+CPU_LE(	ext		v4.16b, v4.16b, v4.16b, #8	)
			
 
				+CPU_LE(	ext		v5.16b, v5.16b, v5.16b, #8	)
			
 
				+CPU_LE(	ext		v6.16b, v6.16b, v6.16b, #8	)
			
 
				+CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
			
 
				+
			
 
				+	// XOR the initial_crc value
			
 
				+	eor		v0.16b, v0.16b, v10.16b
			
 
				+
			
 
				+	ldr		q10, rk3	// xmm10 has rk3 and rk4
			
 
				+					// type of pmull instruction
			
 
				+					// will determine which constant to use
			
 
				+
			
 
				+	//
			
 
				+	// we subtract 256 instead of 128 to save one instruction from the loop
			
 
				+	//
			
 
				+	sub		arg3, arg3, #256
			
 
				+
			
 
				+	// at this section of the code, there is 64*x+y (0<=y<64) bytes of
			
 
				+	// buffer. The _fold_64_B_loop will fold 64B at a time
			
 
				+	// until we have 64+y Bytes of buffer
			
 
				+
			
 
				+
			
 
				+	// fold 64B at a time. This section of the code folds 4 vector
			
 
				+	// registers in parallel
			
 
				+_fold_64_B_loop:
			
 
				+
			
 
				+	.macro		fold64, reg1, reg2
			
 
				+	ldp		q11, q12, [arg2], #0x20
			
 
				+
			
 
				+	pmull2		v8.1q, \reg1\().2d, v10.2d
			
 
				+	pmull		\reg1\().1q, \reg1\().1d, v10.1d
			
 
				+
			
 
				+CPU_LE(	rev64		v11.16b, v11.16b		)
			
 
				+CPU_LE(	rev64		v12.16b, v12.16b		)
			
 
				+
			
 
				+	pmull2		v9.1q, \reg2\().2d, v10.2d
			
 
				+	pmull		\reg2\().1q, \reg2\().1d, v10.1d
			
 
				+
			
 
				+CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
			
 
				+CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
			
 
				+
			
 
				+	eor		\reg1\().16b, \reg1\().16b, v8.16b
			
 
				+	eor		\reg2\().16b, \reg2\().16b, v9.16b
			
 
				+	eor		\reg1\().16b, \reg1\().16b, v11.16b
			
 
				+	eor		\reg2\().16b, \reg2\().16b, v12.16b
			
 
				+	.endm
			
 
				+
			
 
				+	fold64		v0, v1
			
 
				+	fold64		v2, v3
			
 
				+	fold64		v4, v5
			
 
				+	fold64		v6, v7
			
 
				+
			
 
				+	subs		arg3, arg3, #128
			
 
				+
			
 
				+	// check if there is another 64B in the buffer to be able to fold
			
 
				+	b.ge		_fold_64_B_loop
			
 
				+
			
 
				+	// at this point, the buffer pointer is pointing at the last y Bytes
			
 
				+	// of the buffer the 64B of folded data is in 4 of the vector
			
 
				+	// registers: v0, v1, v2, v3
			
 
				+
			
 
				+	// fold the 8 vector registers to 1 vector register with different
			
 
				+	// constants
			
 
				+
			
 
				+	ldr		q10, rk9
			
 
				+
			
 
				+	.macro		fold16, reg, rk
			
 
				+	pmull		v8.1q, \reg\().1d, v10.1d
			
 
				+	pmull2		\reg\().1q, \reg\().2d, v10.2d
			
 
				+	.ifnb		\rk
			
 
				+	ldr		q10, \rk
			
 
				+	.endif
			
 
				+	eor		v7.16b, v7.16b, v8.16b
			
 
				+	eor		v7.16b, v7.16b, \reg\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	fold16		v0, rk11
			
 
				+	fold16		v1, rk13
			
 
				+	fold16		v2, rk15
			
 
				+	fold16		v3, rk17
			
 
				+	fold16		v4, rk19
			
 
				+	fold16		v5, rk1
			
 
				+	fold16		v6
			
 
				+
			
 
				+	// instead of 64, we add 48 to the loop counter to save 1 instruction
			
 
				+	// from the loop instead of a cmp instruction, we use the negative
			
 
				+	// flag with the jl instruction
			
 
				+	adds		arg3, arg3, #(128-16)
			
 
				+	b.lt		_final_reduction_for_128
			
 
				+
			
 
				+	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
			
 
				+	// and the rest is in memory. We can fold 16 bytes at a time if y>=16
			
 
				+	// continue folding 16B at a time
			
 
				+
			
 
				+_16B_reduction_loop:
			
 
				+	pmull		v8.1q, v7.1d, v10.1d
			
 
				+	pmull2		v7.1q, v7.2d, v10.2d
			
 
				+	eor		v7.16b, v7.16b, v8.16b
			
 
				+
			
 
				+	ldr		q0, [arg2], #16
			
 
				+CPU_LE(	rev64		v0.16b, v0.16b			)
			
 
				+CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
			
 
				+	eor		v7.16b, v7.16b, v0.16b
			
 
				+	subs		arg3, arg3, #16
			
 
				+
			
 
				+	// instead of a cmp instruction, we utilize the flags with the
			
 
				+	// jge instruction equivalent of: cmp arg3, 16-16
			
 
				+	// check if there is any more 16B in the buffer to be able to fold
			
 
				+	b.ge		_16B_reduction_loop
			
 
				+
			
 
				+	// now we have 16+z bytes left to reduce, where 0<= z < 16.
			
 
				+	// first, we reduce the data in the xmm7 register
			
 
				+
			
 
				+_final_reduction_for_128:
			
 
				+	// check if any more data to fold. If not, compute the CRC of
			
 
				+	// the final 128 bits
			
 
				+	adds		arg3, arg3, #16
			
 
				+	b.eq		_128_done
			
 
				+
			
 
				+	// here we are getting data that is less than 16 bytes.
			
 
				+	// since we know that there was data before the pointer, we can
			
 
				+	// offset the input pointer before the actual point, to receive
			
 
				+	// exactly 16 bytes. after that the registers need to be adjusted.
			
 
				+_get_last_two_regs:
			
 
				+	add		arg2, arg2, arg3
			
 
				+	ldr		q1, [arg2, #-16]
			
 
				+CPU_LE(	rev64		v1.16b, v1.16b			)
			
 
				+CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
			
 
				+
			
 
				+	// get rid of the extra data that was loaded before
			
 
				+	// load the shift constant
			
 
				+	adr		x4, tbl_shf_table + 16
			
 
				+	sub		x4, x4, arg3
			
 
				+	ld1		{v0.16b}, [x4]
			
 
				+
			
 
				+	// shift v2 to the left by arg3 bytes
			
 
				+	tbl		v2.16b, {v7.16b}, v0.16b
			
 
				+
			
 
				+	// shift v7 to the right by 16-arg3 bytes
			
 
				+	movi		v9.16b, #0x80
			
 
				+	eor		v0.16b, v0.16b, v9.16b
			
 
				+	tbl		v7.16b, {v7.16b}, v0.16b
			
 
				+
			
 
				+	// blend
			
 
				+	sshr		v0.16b, v0.16b, #7	// convert to 8-bit mask
			
 
				+	bsl		v0.16b, v2.16b, v1.16b
			
 
				+
			
 
				+	// fold 16 Bytes
			
 
				+	pmull		v8.1q, v7.1d, v10.1d
			
 
				+	pmull2		v7.1q, v7.2d, v10.2d
			
 
				+	eor		v7.16b, v7.16b, v8.16b
			
 
				+	eor		v7.16b, v7.16b, v0.16b
			
 
				+
			
 
				+_128_done:
			
 
				+	// compute crc of a 128-bit value
			
 
				+	ldr		q10, rk5		// rk5 and rk6 in xmm10
			
 
				+
			
 
				+	// 64b fold
			
 
				+	ext		v0.16b, vzr.16b, v7.16b, #8
			
 
				+	mov		v7.d[0], v7.d[1]
			
 
				+	pmull		v7.1q, v7.1d, v10.1d
			
 
				+	eor		v7.16b, v7.16b, v0.16b
			
 
				+
			
 
				+	// 32b fold
			
 
				+	ext		v0.16b, v7.16b, vzr.16b, #4
			
 
				+	mov		v7.s[3], vzr.s[0]
			
 
				+	pmull2		v0.1q, v0.2d, v10.2d
			
 
				+	eor		v7.16b, v7.16b, v0.16b
			
 
				+
			
 
				+	// barrett reduction
			
 
				+_barrett:
			
 
				+	ldr		q10, rk7
			
 
				+	mov		v0.d[0], v7.d[1]
			
 
				+
			
 
				+	pmull		v0.1q, v0.1d, v10.1d
			
 
				+	ext		v0.16b, vzr.16b, v0.16b, #12
			
 
				+	pmull2		v0.1q, v0.2d, v10.2d
			
 
				+	ext		v0.16b, vzr.16b, v0.16b, #12
			
 
				+	eor		v7.16b, v7.16b, v0.16b
			
 
				+	mov		w0, v7.s[1]
			
 
				+
			
 
				+_cleanup:
			
 
				+	// scale the result back to 16 bits
			
 
				+	lsr		x0, x0, #16
			
 
				+	ret
			
 
				+
			
 
				+_less_than_128:
			
 
				+	cbz		arg3, _cleanup
			
 
				+
			
 
				+	movi		v0.16b, #0
			
 
				+	mov		v0.s[3], arg1_low32	// get the initial crc value
			
 
				+
			
 
				+	ldr		q7, [arg2], #0x10
			
 
				+CPU_LE(	rev64		v7.16b, v7.16b			)
			
 
				+CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
			
 
				+	eor		v7.16b, v7.16b, v0.16b	// xor the initial crc value
			
 
				+
			
 
				+	cmp		arg3, #16
			
 
				+	b.eq		_128_done		// exactly 16 left
			
 
				+	b.lt		_less_than_16_left
			
 
				+
			
 
				+	ldr		q10, rk1		// rk1 and rk2 in xmm10
			
 
				+
			
 
				+	// update the counter. subtract 32 instead of 16 to save one
			
 
				+	// instruction from the loop
			
 
				+	subs		arg3, arg3, #32
			
 
				+	b.ge		_16B_reduction_loop
			
 
				+
			
 
				+	add		arg3, arg3, #16
			
 
				+	b		_get_last_two_regs
			
 
				+
			
 
				+_less_than_16_left:
			
 
				+	// shl r9, 4
			
 
				+	adr		x0, tbl_shf_table + 16
			
 
				+	sub		x0, x0, arg3
			
 
				+	ld1		{v0.16b}, [x0]
			
 
				+	movi		v9.16b, #0x80
			
 
				+	eor		v0.16b, v0.16b, v9.16b
			
 
				+	tbl		v7.16b, {v7.16b}, v0.16b
			
 
				+	b		_128_done
			
 
				+ENDPROC(crc_t10dif_pmull)
			
 
				+
			
 
				+// precomputed constants
			
 
				+// these constants are precomputed from the poly:
			
 
				+// 0x8bb70000 (0x8bb7 scaled to 32 bits)
			
 
				+	.align		4
			
 
				+// Q = 0x18BB70000
			
 
				+// rk1 = 2^(32*3) mod Q << 32
			
 
				+// rk2 = 2^(32*5) mod Q << 32
			
 
				+// rk3 = 2^(32*15) mod Q << 32
			
 
				+// rk4 = 2^(32*17) mod Q << 32
			
 
				+// rk5 = 2^(32*3) mod Q << 32
			
 
				+// rk6 = 2^(32*2) mod Q << 32
			
 
				+// rk7 = floor(2^64/Q)
			
 
				+// rk8 = Q
			
 
				+
			
 
				+rk1:	.octa		0x06df0000000000002d56000000000000
			
 
				+rk3:	.octa		0x7cf50000000000009d9d000000000000
			
 
				+rk5:	.octa		0x13680000000000002d56000000000000
			
 
				+rk7:	.octa		0x000000018bb7000000000001f65a57f8
			
 
				+rk9:	.octa		0xbfd6000000000000ceae000000000000
			
 
				+rk11:	.octa		0x713c0000000000001e16000000000000
			
 
				+rk13:	.octa		0x80a6000000000000f7f9000000000000
			
 
				+rk15:	.octa		0xe658000000000000044c000000000000
			
 
				+rk17:	.octa		0xa497000000000000ad18000000000000
			
 
				+rk19:	.octa		0xe7b50000000000006ee3000000000000
			
 
				+
			
 
				+tbl_shf_table:
			
 
				+// use these values for shift constants for the tbl/tbx instruction
			
 
				+// different alignments result in values as shown:
			
 
				+//	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
			
 
				+//	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
			
 
				+//	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
			
 
				+//	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
			
 
				+//	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
			
 
				+//	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
			
 
				+//	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
			
 
				+//	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
			
 
				+//	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
			
 
				+//	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
			
 
				+//	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
			
 
				+//	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
			
 
				+//	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
			
 
				+//	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
			
 
				+//	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
			
 
				+
			
 
				+	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
			
 
				+	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
			
 
				+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
			
 
				+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
			
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -0,0 +1,95 @@
 
				+/*
			
 
				+ * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/cpufeature.h>
			
 
				+#include <linux/crc-t10dif.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+#include <crypto/internal/hash.h>
			
 
				+
			
 
				+#include <asm/neon.h>
			
 
				+
			
 
				+#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
			
 
				+
			
 
				+asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
			
 
				+
			
 
				+static int crct10dif_init(struct shash_desc *desc)
			
 
				+{
			
 
				+	u16 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*crc = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crct10dif_update(struct shash_desc *desc, const u8 *data,
			
 
				+			    unsigned int length)
			
 
				+{
			
 
				+	u16 *crc = shash_desc_ctx(desc);
			
 
				+	unsigned int l;
			
 
				+
			
 
				+	if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
			
 
				+		l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
			
 
				+			  ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
			
 
				+
			
 
				+		*crc = crc_t10dif_generic(*crc, data, l);
			
 
				+
			
 
				+		length -= l;
			
 
				+		data += l;
			
 
				+	}
			
 
				+
			
 
				+	if (length > 0) {
			
 
				+		kernel_neon_begin_partial(14);
			
 
				+		*crc = crc_t10dif_pmull(*crc, data, length);
			
 
				+		kernel_neon_end();
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int crct10dif_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	u16 *crc = shash_desc_ctx(desc);
			
 
				+
			
 
				+	*(u16 *)out = *crc;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg crc_t10dif_alg = {
			
 
				+	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
			
 
				+	.init			= crct10dif_init,
			
 
				+	.update			= crct10dif_update,
			
 
				+	.final			= crct10dif_final,
			
 
				+	.descsize		= CRC_T10DIF_DIGEST_SIZE,
			
 
				+
			
 
				+	.base.cra_name		= "crct10dif",
			
 
				+	.base.cra_driver_name	= "crct10dif-arm64-ce",
			
 
				+	.base.cra_priority	= 200,
			
 
				+	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+};
			
 
				+
			
 
				+static int __init crc_t10dif_mod_init(void)
			
 
				+{
			
 
				+	return crypto_register_shash(&crc_t10dif_alg);
			
 
				+}
			
 
				+
			
 
				+static void __exit crc_t10dif_mod_exit(void)
			
 
				+{
			
 
				+	crypto_unregister_shash(&crc_t10dif_alg);
			
 
				+}
			
 
				+
			
 
				+module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
			
 
				+module_exit(crc_t10dif_mod_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -29,8 +29,8 @@
 
				 	 *			   struct ghash_key const *k, const char *head)
			
 
				 	 */
			
 
				 ENTRY(pmull_ghash_update)
			
 
				-	ld1		{SHASH.16b}, [x3]
			
 
				-	ld1		{XL.16b}, [x1]
			
 
				+	ld1		{SHASH.2d}, [x3]
			
 
				+	ld1		{XL.2d}, [x1]
			
 
				 	movi		MASK.16b, #0xe1
			
 
				 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
			
 
				 	shl		MASK.2d, MASK.2d, #57
			
@@ -74,6 +74,6 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 
				 
			
 
				 	cbnz		w0, 0b
			
 
				 
			
 
				-	st1		{XL.16b}, [x1]
			
 
				+	st1		{XL.2d}, [x1]
			
 
				 	ret
			
 
				 ENDPROC(pmull_ghash_update)
			
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform)
 
				 	ld1r		{k3.4s}, [x6]
			
 
				 
			
 
				 	/* load state */
			
 
				-	ldr		dga, [x0]
			
 
				+	ld1		{dgav.4s}, [x0]
			
 
				 	ldr		dgb, [x0, #16]
			
 
				 
			
 
				 	/* load sha1_ce_state::finalize */
			
@@ -144,7 +144,7 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 
				 	b		1b
			
 
				 
			
 
				 	/* store new state */
			
 
				-3:	str		dga, [x0]
			
 
				+3:	st1		{dgav.4s}, [x0]
			
 
				 	str		dgb, [x0, #16]
			
 
				 	ret
			
 
				 ENDPROC(sha1_ce_transform)
			
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform)
 
				 	ld1		{v12.4s-v15.4s}, [x8]
			
 
				 
			
 
				 	/* load state */
			
 
				-	ldp		dga, dgb, [x0]
			
 
				+	ld1		{dgav.4s, dgbv.4s}, [x0]
			
 
				 
			
 
				 	/* load sha256_ce_state::finalize */
			
 
				 	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
			
@@ -148,6 +148,6 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 
				 	b		1b
			
 
				 
			
 
				 	/* store new state */
			
 
				-3:	stp		dga, dgb, [x0]
			
 
				+3:	st1		{dgav.4s, dgbv.4s}, [x0]
			
 
				 	ret
			
 
				 ENDPROC(sha2_ce_transform)
			
--- a/arch/arm64/crypto/sha256-core.S_shipped
+++ b/arch/arm64/crypto/sha256-core.S_shipped
@@ -0,0 +1,2061 @@
 
				+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the OpenSSL license (the "License").  You may not use
			
 
				+// this file except in compliance with the License.  You can obtain a copy
			
 
				+// in the file LICENSE in the source distribution or at
			
 
				+// https://www.openssl.org/source/license.html
			
 
				+
			
 
				+// ====================================================================
			
 
				+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+// project. The module is, however, dual licensed under OpenSSL and
			
 
				+// CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+// details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+//
			
 
				+// Permission to use under GPLv2 terms is granted.
			
 
				+// ====================================================================
			
 
				+//
			
 
				+// SHA256/512 for ARMv8.
			
 
				+//
			
 
				+// Performance in cycles per processed byte and improvement coefficient
			
 
				+// over code generated with "default" compiler:
			
 
				+//
			
 
				+//		SHA256-hw	SHA256(*)	SHA512
			
 
				+// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
			
 
				+// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
			
 
				+// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
			
 
				+// Denver	2.01		10.5 (+26%)	6.70 (+8%)
			
 
				+// X-Gene			20.0 (+100%)	12.8 (+300%(***))
			
 
				+// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
			
 
				+//
			
 
				+// (*)	Software SHA256 results are of lesser relevance, presented
			
 
				+//	mostly for informational purposes.
			
 
				+// (**)	The result is a trade-off: it's possible to improve it by
			
 
				+//	10% (or by 1 cycle per round), but at the cost of 20% loss
			
 
				+//	on Cortex-A53 (or by 4 cycles per round).
			
 
				+// (***)	Super-impressive coefficients over gcc-generated code are
			
 
				+//	indication of some compiler "pathology", most notably code
			
 
				+//	generated with -mgeneral-regs-only is significanty faster
			
 
				+//	and the gap is only 40-90%.
			
 
				+//
			
 
				+// October 2016.
			
 
				+//
			
 
				+// Originally it was reckoned that it makes no sense to implement NEON
			
 
				+// version of SHA256 for 64-bit processors. This is because performance
			
 
				+// improvement on most wide-spread Cortex-A5x processors was observed
			
 
				+// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
			
 
				+// observed that 32-bit NEON SHA256 performs significantly better than
			
 
				+// 64-bit scalar version on *some* of the more recent processors. As
			
 
				+// result 64-bit NEON version of SHA256 was added to provide best
			
 
				+// all-round performance. For example it executes ~30% faster on X-Gene
			
 
				+// and Mongoose. [For reference, NEON version of SHA512 is bound to
			
 
				+// deliver much less improvement, likely *negative* on Cortex-A5x.
			
 
				+// Which is why NEON support is limited to SHA256.]
			
 
				+
			
 
				+#ifndef	__KERNEL__
			
 
				+# include "arm_arch.h"
			
 
				+#endif
			
 
				+
			
 
				+.text
			
 
				+
			
 
				+.extern	OPENSSL_armcap_P
			
 
				+.globl	sha256_block_data_order
			
 
				+.type	sha256_block_data_order,%function
			
 
				+.align	6
			
 
				+sha256_block_data_order:
			
 
				+#ifndef	__KERNEL__
			
 
				+# ifdef	__ILP32__
			
 
				+	ldrsw	x16,.LOPENSSL_armcap_P
			
 
				+# else
			
 
				+	ldr	x16,.LOPENSSL_armcap_P
			
 
				+# endif
			
 
				+	adr	x17,.LOPENSSL_armcap_P
			
 
				+	add	x16,x16,x17
			
 
				+	ldr	w16,[x16]
			
 
				+	tst	w16,#ARMV8_SHA256
			
 
				+	b.ne	.Lv8_entry
			
 
				+	tst	w16,#ARMV7_NEON
			
 
				+	b.ne	.Lneon_entry
			
 
				+#endif
			
 
				+	stp	x29,x30,[sp,#-128]!
			
 
				+	add	x29,sp,#0
			
 
				+
			
 
				+	stp	x19,x20,[sp,#16]
			
 
				+	stp	x21,x22,[sp,#32]
			
 
				+	stp	x23,x24,[sp,#48]
			
 
				+	stp	x25,x26,[sp,#64]
			
 
				+	stp	x27,x28,[sp,#80]
			
 
				+	sub	sp,sp,#4*4
			
 
				+
			
 
				+	ldp	w20,w21,[x0]				// load context
			
 
				+	ldp	w22,w23,[x0,#2*4]
			
 
				+	ldp	w24,w25,[x0,#4*4]
			
 
				+	add	x2,x1,x2,lsl#6	// end of input
			
 
				+	ldp	w26,w27,[x0,#6*4]
			
 
				+	adr	x30,.LK256
			
 
				+	stp	x0,x2,[x29,#96]
			
 
				+
			
 
				+.Loop:
			
 
				+	ldp	w3,w4,[x1],#2*4
			
 
				+	ldr	w19,[x30],#4			// *K++
			
 
				+	eor	w28,w21,w22				// magic seed
			
 
				+	str	x1,[x29,#112]
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w3,w3			// 0
			
 
				+#endif
			
 
				+	ror	w16,w24,#6
			
 
				+	add	w27,w27,w19			// h+=K[i]
			
 
				+	eor	w6,w24,w24,ror#14
			
 
				+	and	w17,w25,w24
			
 
				+	bic	w19,w26,w24
			
 
				+	add	w27,w27,w3			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w20,w21			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w6,ror#11	// Sigma1(e)
			
 
				+	ror	w6,w20,#2
			
 
				+	add	w27,w27,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w20,w20,ror#9
			
 
				+	add	w27,w27,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w23,w23,w27			// d+=h
			
 
				+	eor	w28,w28,w21			// Maj(a,b,c)
			
 
				+	eor	w17,w6,w17,ror#13	// Sigma0(a)
			
 
				+	add	w27,w27,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w27,w27,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w4,w4			// 1
			
 
				+#endif
			
 
				+	ldp	w5,w6,[x1],#2*4
			
 
				+	add	w27,w27,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w23,#6
			
 
				+	add	w26,w26,w28			// h+=K[i]
			
 
				+	eor	w7,w23,w23,ror#14
			
 
				+	and	w17,w24,w23
			
 
				+	bic	w28,w25,w23
			
 
				+	add	w26,w26,w4			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w27,w20			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w7,ror#11	// Sigma1(e)
			
 
				+	ror	w7,w27,#2
			
 
				+	add	w26,w26,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w27,w27,ror#9
			
 
				+	add	w26,w26,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w22,w22,w26			// d+=h
			
 
				+	eor	w19,w19,w20			// Maj(a,b,c)
			
 
				+	eor	w17,w7,w17,ror#13	// Sigma0(a)
			
 
				+	add	w26,w26,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w26,w26,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w5,w5			// 2
			
 
				+#endif
			
 
				+	add	w26,w26,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w22,#6
			
 
				+	add	w25,w25,w19			// h+=K[i]
			
 
				+	eor	w8,w22,w22,ror#14
			
 
				+	and	w17,w23,w22
			
 
				+	bic	w19,w24,w22
			
 
				+	add	w25,w25,w5			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w26,w27			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w8,ror#11	// Sigma1(e)
			
 
				+	ror	w8,w26,#2
			
 
				+	add	w25,w25,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w26,w26,ror#9
			
 
				+	add	w25,w25,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w21,w21,w25			// d+=h
			
 
				+	eor	w28,w28,w27			// Maj(a,b,c)
			
 
				+	eor	w17,w8,w17,ror#13	// Sigma0(a)
			
 
				+	add	w25,w25,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w25,w25,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w6,w6			// 3
			
 
				+#endif
			
 
				+	ldp	w7,w8,[x1],#2*4
			
 
				+	add	w25,w25,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w21,#6
			
 
				+	add	w24,w24,w28			// h+=K[i]
			
 
				+	eor	w9,w21,w21,ror#14
			
 
				+	and	w17,w22,w21
			
 
				+	bic	w28,w23,w21
			
 
				+	add	w24,w24,w6			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w25,w26			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w9,ror#11	// Sigma1(e)
			
 
				+	ror	w9,w25,#2
			
 
				+	add	w24,w24,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w25,w25,ror#9
			
 
				+	add	w24,w24,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w20,w20,w24			// d+=h
			
 
				+	eor	w19,w19,w26			// Maj(a,b,c)
			
 
				+	eor	w17,w9,w17,ror#13	// Sigma0(a)
			
 
				+	add	w24,w24,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w24,w24,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w7,w7			// 4
			
 
				+#endif
			
 
				+	add	w24,w24,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w20,#6
			
 
				+	add	w23,w23,w19			// h+=K[i]
			
 
				+	eor	w10,w20,w20,ror#14
			
 
				+	and	w17,w21,w20
			
 
				+	bic	w19,w22,w20
			
 
				+	add	w23,w23,w7			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w24,w25			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w10,ror#11	// Sigma1(e)
			
 
				+	ror	w10,w24,#2
			
 
				+	add	w23,w23,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w24,w24,ror#9
			
 
				+	add	w23,w23,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w27,w27,w23			// d+=h
			
 
				+	eor	w28,w28,w25			// Maj(a,b,c)
			
 
				+	eor	w17,w10,w17,ror#13	// Sigma0(a)
			
 
				+	add	w23,w23,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w23,w23,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w8,w8			// 5
			
 
				+#endif
			
 
				+	ldp	w9,w10,[x1],#2*4
			
 
				+	add	w23,w23,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w27,#6
			
 
				+	add	w22,w22,w28			// h+=K[i]
			
 
				+	eor	w11,w27,w27,ror#14
			
 
				+	and	w17,w20,w27
			
 
				+	bic	w28,w21,w27
			
 
				+	add	w22,w22,w8			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w23,w24			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w11,ror#11	// Sigma1(e)
			
 
				+	ror	w11,w23,#2
			
 
				+	add	w22,w22,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w23,w23,ror#9
			
 
				+	add	w22,w22,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w26,w26,w22			// d+=h
			
 
				+	eor	w19,w19,w24			// Maj(a,b,c)
			
 
				+	eor	w17,w11,w17,ror#13	// Sigma0(a)
			
 
				+	add	w22,w22,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w22,w22,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w9,w9			// 6
			
 
				+#endif
			
 
				+	add	w22,w22,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w26,#6
			
 
				+	add	w21,w21,w19			// h+=K[i]
			
 
				+	eor	w12,w26,w26,ror#14
			
 
				+	and	w17,w27,w26
			
 
				+	bic	w19,w20,w26
			
 
				+	add	w21,w21,w9			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w22,w23			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w12,ror#11	// Sigma1(e)
			
 
				+	ror	w12,w22,#2
			
 
				+	add	w21,w21,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w22,w22,ror#9
			
 
				+	add	w21,w21,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w25,w25,w21			// d+=h
			
 
				+	eor	w28,w28,w23			// Maj(a,b,c)
			
 
				+	eor	w17,w12,w17,ror#13	// Sigma0(a)
			
 
				+	add	w21,w21,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w21,w21,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w10,w10			// 7
			
 
				+#endif
			
 
				+	ldp	w11,w12,[x1],#2*4
			
 
				+	add	w21,w21,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w25,#6
			
 
				+	add	w20,w20,w28			// h+=K[i]
			
 
				+	eor	w13,w25,w25,ror#14
			
 
				+	and	w17,w26,w25
			
 
				+	bic	w28,w27,w25
			
 
				+	add	w20,w20,w10			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w21,w22			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w13,ror#11	// Sigma1(e)
			
 
				+	ror	w13,w21,#2
			
 
				+	add	w20,w20,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w21,w21,ror#9
			
 
				+	add	w20,w20,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w24,w24,w20			// d+=h
			
 
				+	eor	w19,w19,w22			// Maj(a,b,c)
			
 
				+	eor	w17,w13,w17,ror#13	// Sigma0(a)
			
 
				+	add	w20,w20,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w20,w20,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w11,w11			// 8
			
 
				+#endif
			
 
				+	add	w20,w20,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w24,#6
			
 
				+	add	w27,w27,w19			// h+=K[i]
			
 
				+	eor	w14,w24,w24,ror#14
			
 
				+	and	w17,w25,w24
			
 
				+	bic	w19,w26,w24
			
 
				+	add	w27,w27,w11			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w20,w21			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w14,ror#11	// Sigma1(e)
			
 
				+	ror	w14,w20,#2
			
 
				+	add	w27,w27,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w20,w20,ror#9
			
 
				+	add	w27,w27,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w23,w23,w27			// d+=h
			
 
				+	eor	w28,w28,w21			// Maj(a,b,c)
			
 
				+	eor	w17,w14,w17,ror#13	// Sigma0(a)
			
 
				+	add	w27,w27,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w27,w27,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w12,w12			// 9
			
 
				+#endif
			
 
				+	ldp	w13,w14,[x1],#2*4
			
 
				+	add	w27,w27,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w23,#6
			
 
				+	add	w26,w26,w28			// h+=K[i]
			
 
				+	eor	w15,w23,w23,ror#14
			
 
				+	and	w17,w24,w23
			
 
				+	bic	w28,w25,w23
			
 
				+	add	w26,w26,w12			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w27,w20			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w15,ror#11	// Sigma1(e)
			
 
				+	ror	w15,w27,#2
			
 
				+	add	w26,w26,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w27,w27,ror#9
			
 
				+	add	w26,w26,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w22,w22,w26			// d+=h
			
 
				+	eor	w19,w19,w20			// Maj(a,b,c)
			
 
				+	eor	w17,w15,w17,ror#13	// Sigma0(a)
			
 
				+	add	w26,w26,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w26,w26,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w13,w13			// 10
			
 
				+#endif
			
 
				+	add	w26,w26,w17			// h+=Sigma0(a)
			
 
				+	ror	w16,w22,#6
			
 
				+	add	w25,w25,w19			// h+=K[i]
			
 
				+	eor	w0,w22,w22,ror#14
			
 
				+	and	w17,w23,w22
			
 
				+	bic	w19,w24,w22
			
 
				+	add	w25,w25,w13			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w26,w27			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w0,ror#11	// Sigma1(e)
			
 
				+	ror	w0,w26,#2
			
 
				+	add	w25,w25,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w26,w26,ror#9
			
 
				+	add	w25,w25,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w21,w21,w25			// d+=h
			
 
				+	eor	w28,w28,w27			// Maj(a,b,c)
			
 
				+	eor	w17,w0,w17,ror#13	// Sigma0(a)
			
 
				+	add	w25,w25,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w25,w25,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w14,w14			// 11
			
 
				+#endif
			
 
				+	ldp	w15,w0,[x1],#2*4
			
 
				+	add	w25,w25,w17			// h+=Sigma0(a)
			
 
				+	str	w6,[sp,#12]
			
 
				+	ror	w16,w21,#6
			
 
				+	add	w24,w24,w28			// h+=K[i]
			
 
				+	eor	w6,w21,w21,ror#14
			
 
				+	and	w17,w22,w21
			
 
				+	bic	w28,w23,w21
			
 
				+	add	w24,w24,w14			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w25,w26			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w6,ror#11	// Sigma1(e)
			
 
				+	ror	w6,w25,#2
			
 
				+	add	w24,w24,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w25,w25,ror#9
			
 
				+	add	w24,w24,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w20,w20,w24			// d+=h
			
 
				+	eor	w19,w19,w26			// Maj(a,b,c)
			
 
				+	eor	w17,w6,w17,ror#13	// Sigma0(a)
			
 
				+	add	w24,w24,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w24,w24,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w15,w15			// 12
			
 
				+#endif
			
 
				+	add	w24,w24,w17			// h+=Sigma0(a)
			
 
				+	str	w7,[sp,#0]
			
 
				+	ror	w16,w20,#6
			
 
				+	add	w23,w23,w19			// h+=K[i]
			
 
				+	eor	w7,w20,w20,ror#14
			
 
				+	and	w17,w21,w20
			
 
				+	bic	w19,w22,w20
			
 
				+	add	w23,w23,w15			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w24,w25			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w7,ror#11	// Sigma1(e)
			
 
				+	ror	w7,w24,#2
			
 
				+	add	w23,w23,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w24,w24,ror#9
			
 
				+	add	w23,w23,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w27,w27,w23			// d+=h
			
 
				+	eor	w28,w28,w25			// Maj(a,b,c)
			
 
				+	eor	w17,w7,w17,ror#13	// Sigma0(a)
			
 
				+	add	w23,w23,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w23,w23,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w0,w0			// 13
			
 
				+#endif
			
 
				+	ldp	w1,w2,[x1]
			
 
				+	add	w23,w23,w17			// h+=Sigma0(a)
			
 
				+	str	w8,[sp,#4]
			
 
				+	ror	w16,w27,#6
			
 
				+	add	w22,w22,w28			// h+=K[i]
			
 
				+	eor	w8,w27,w27,ror#14
			
 
				+	and	w17,w20,w27
			
 
				+	bic	w28,w21,w27
			
 
				+	add	w22,w22,w0			// h+=X[i]
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w23,w24			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w8,ror#11	// Sigma1(e)
			
 
				+	ror	w8,w23,#2
			
 
				+	add	w22,w22,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w23,w23,ror#9
			
 
				+	add	w22,w22,w16			// h+=Sigma1(e)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	add	w26,w26,w22			// d+=h
			
 
				+	eor	w19,w19,w24			// Maj(a,b,c)
			
 
				+	eor	w17,w8,w17,ror#13	// Sigma0(a)
			
 
				+	add	w22,w22,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	//add	w22,w22,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w1,w1			// 14
			
 
				+#endif
			
 
				+	ldr	w6,[sp,#12]
			
 
				+	add	w22,w22,w17			// h+=Sigma0(a)
			
 
				+	str	w9,[sp,#8]
			
 
				+	ror	w16,w26,#6
			
 
				+	add	w21,w21,w19			// h+=K[i]
			
 
				+	eor	w9,w26,w26,ror#14
			
 
				+	and	w17,w27,w26
			
 
				+	bic	w19,w20,w26
			
 
				+	add	w21,w21,w1			// h+=X[i]
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w22,w23			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w9,ror#11	// Sigma1(e)
			
 
				+	ror	w9,w22,#2
			
 
				+	add	w21,w21,w17			// h+=Ch(e,f,g)
			
 
				+	eor	w17,w22,w22,ror#9
			
 
				+	add	w21,w21,w16			// h+=Sigma1(e)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	add	w25,w25,w21			// d+=h
			
 
				+	eor	w28,w28,w23			// Maj(a,b,c)
			
 
				+	eor	w17,w9,w17,ror#13	// Sigma0(a)
			
 
				+	add	w21,w21,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	//add	w21,w21,w17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	w2,w2			// 15
			
 
				+#endif
			
 
				+	ldr	w7,[sp,#0]
			
 
				+	add	w21,w21,w17			// h+=Sigma0(a)
			
 
				+	str	w10,[sp,#12]
			
 
				+	ror	w16,w25,#6
			
 
				+	add	w20,w20,w28			// h+=K[i]
			
 
				+	ror	w9,w4,#7
			
 
				+	and	w17,w26,w25
			
 
				+	ror	w8,w1,#17
			
 
				+	bic	w28,w27,w25
			
 
				+	ror	w10,w21,#2
			
 
				+	add	w20,w20,w2			// h+=X[i]
			
 
				+	eor	w16,w16,w25,ror#11
			
 
				+	eor	w9,w9,w4,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w21,w22			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w25,ror#25	// Sigma1(e)
			
 
				+	eor	w10,w10,w21,ror#13
			
 
				+	add	w20,w20,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w8,w8,w1,ror#19
			
 
				+	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w20,w20,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w22			// Maj(a,b,c)
			
 
				+	eor	w17,w10,w21,ror#22	// Sigma0(a)
			
 
				+	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w3,w3,w12
			
 
				+	add	w24,w24,w20			// d+=h
			
 
				+	add	w20,w20,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w3,w3,w9
			
 
				+	add	w20,w20,w17			// h+=Sigma0(a)
			
 
				+	add	w3,w3,w8
			
 
				+.Loop_16_xx:
			
 
				+	ldr	w8,[sp,#4]
			
 
				+	str	w11,[sp,#0]
			
 
				+	ror	w16,w24,#6
			
 
				+	add	w27,w27,w19			// h+=K[i]
			
 
				+	ror	w10,w5,#7
			
 
				+	and	w17,w25,w24
			
 
				+	ror	w9,w2,#17
			
 
				+	bic	w19,w26,w24
			
 
				+	ror	w11,w20,#2
			
 
				+	add	w27,w27,w3			// h+=X[i]
			
 
				+	eor	w16,w16,w24,ror#11
			
 
				+	eor	w10,w10,w5,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w20,w21			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w24,ror#25	// Sigma1(e)
			
 
				+	eor	w11,w11,w20,ror#13
			
 
				+	add	w27,w27,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w9,w9,w2,ror#19
			
 
				+	eor	w10,w10,w5,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w27,w27,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w21			// Maj(a,b,c)
			
 
				+	eor	w17,w11,w20,ror#22	// Sigma0(a)
			
 
				+	eor	w9,w9,w2,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w4,w4,w13
			
 
				+	add	w23,w23,w27			// d+=h
			
 
				+	add	w27,w27,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w4,w4,w10
			
 
				+	add	w27,w27,w17			// h+=Sigma0(a)
			
 
				+	add	w4,w4,w9
			
 
				+	ldr	w9,[sp,#8]
			
 
				+	str	w12,[sp,#4]
			
 
				+	ror	w16,w23,#6
			
 
				+	add	w26,w26,w28			// h+=K[i]
			
 
				+	ror	w11,w6,#7
			
 
				+	and	w17,w24,w23
			
 
				+	ror	w10,w3,#17
			
 
				+	bic	w28,w25,w23
			
 
				+	ror	w12,w27,#2
			
 
				+	add	w26,w26,w4			// h+=X[i]
			
 
				+	eor	w16,w16,w23,ror#11
			
 
				+	eor	w11,w11,w6,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w27,w20			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w23,ror#25	// Sigma1(e)
			
 
				+	eor	w12,w12,w27,ror#13
			
 
				+	add	w26,w26,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w10,w10,w3,ror#19
			
 
				+	eor	w11,w11,w6,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w26,w26,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w20			// Maj(a,b,c)
			
 
				+	eor	w17,w12,w27,ror#22	// Sigma0(a)
			
 
				+	eor	w10,w10,w3,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w5,w5,w14
			
 
				+	add	w22,w22,w26			// d+=h
			
 
				+	add	w26,w26,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w5,w5,w11
			
 
				+	add	w26,w26,w17			// h+=Sigma0(a)
			
 
				+	add	w5,w5,w10
			
 
				+	ldr	w10,[sp,#12]
			
 
				+	str	w13,[sp,#8]
			
 
				+	ror	w16,w22,#6
			
 
				+	add	w25,w25,w19			// h+=K[i]
			
 
				+	ror	w12,w7,#7
			
 
				+	and	w17,w23,w22
			
 
				+	ror	w11,w4,#17
			
 
				+	bic	w19,w24,w22
			
 
				+	ror	w13,w26,#2
			
 
				+	add	w25,w25,w5			// h+=X[i]
			
 
				+	eor	w16,w16,w22,ror#11
			
 
				+	eor	w12,w12,w7,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w26,w27			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w22,ror#25	// Sigma1(e)
			
 
				+	eor	w13,w13,w26,ror#13
			
 
				+	add	w25,w25,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w11,w11,w4,ror#19
			
 
				+	eor	w12,w12,w7,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w25,w25,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w27			// Maj(a,b,c)
			
 
				+	eor	w17,w13,w26,ror#22	// Sigma0(a)
			
 
				+	eor	w11,w11,w4,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w6,w6,w15
			
 
				+	add	w21,w21,w25			// d+=h
			
 
				+	add	w25,w25,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w6,w6,w12
			
 
				+	add	w25,w25,w17			// h+=Sigma0(a)
			
 
				+	add	w6,w6,w11
			
 
				+	ldr	w11,[sp,#0]
			
 
				+	str	w14,[sp,#12]
			
 
				+	ror	w16,w21,#6
			
 
				+	add	w24,w24,w28			// h+=K[i]
			
 
				+	ror	w13,w8,#7
			
 
				+	and	w17,w22,w21
			
 
				+	ror	w12,w5,#17
			
 
				+	bic	w28,w23,w21
			
 
				+	ror	w14,w25,#2
			
 
				+	add	w24,w24,w6			// h+=X[i]
			
 
				+	eor	w16,w16,w21,ror#11
			
 
				+	eor	w13,w13,w8,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w25,w26			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w21,ror#25	// Sigma1(e)
			
 
				+	eor	w14,w14,w25,ror#13
			
 
				+	add	w24,w24,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w12,w12,w5,ror#19
			
 
				+	eor	w13,w13,w8,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w24,w24,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w26			// Maj(a,b,c)
			
 
				+	eor	w17,w14,w25,ror#22	// Sigma0(a)
			
 
				+	eor	w12,w12,w5,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w7,w7,w0
			
 
				+	add	w20,w20,w24			// d+=h
			
 
				+	add	w24,w24,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w7,w7,w13
			
 
				+	add	w24,w24,w17			// h+=Sigma0(a)
			
 
				+	add	w7,w7,w12
			
 
				+	ldr	w12,[sp,#4]
			
 
				+	str	w15,[sp,#0]
			
 
				+	ror	w16,w20,#6
			
 
				+	add	w23,w23,w19			// h+=K[i]
			
 
				+	ror	w14,w9,#7
			
 
				+	and	w17,w21,w20
			
 
				+	ror	w13,w6,#17
			
 
				+	bic	w19,w22,w20
			
 
				+	ror	w15,w24,#2
			
 
				+	add	w23,w23,w7			// h+=X[i]
			
 
				+	eor	w16,w16,w20,ror#11
			
 
				+	eor	w14,w14,w9,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w24,w25			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w20,ror#25	// Sigma1(e)
			
 
				+	eor	w15,w15,w24,ror#13
			
 
				+	add	w23,w23,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w13,w13,w6,ror#19
			
 
				+	eor	w14,w14,w9,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w23,w23,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w25			// Maj(a,b,c)
			
 
				+	eor	w17,w15,w24,ror#22	// Sigma0(a)
			
 
				+	eor	w13,w13,w6,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w8,w8,w1
			
 
				+	add	w27,w27,w23			// d+=h
			
 
				+	add	w23,w23,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w8,w8,w14
			
 
				+	add	w23,w23,w17			// h+=Sigma0(a)
			
 
				+	add	w8,w8,w13
			
 
				+	ldr	w13,[sp,#8]
			
 
				+	str	w0,[sp,#4]
			
 
				+	ror	w16,w27,#6
			
 
				+	add	w22,w22,w28			// h+=K[i]
			
 
				+	ror	w15,w10,#7
			
 
				+	and	w17,w20,w27
			
 
				+	ror	w14,w7,#17
			
 
				+	bic	w28,w21,w27
			
 
				+	ror	w0,w23,#2
			
 
				+	add	w22,w22,w8			// h+=X[i]
			
 
				+	eor	w16,w16,w27,ror#11
			
 
				+	eor	w15,w15,w10,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w23,w24			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w27,ror#25	// Sigma1(e)
			
 
				+	eor	w0,w0,w23,ror#13
			
 
				+	add	w22,w22,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w14,w14,w7,ror#19
			
 
				+	eor	w15,w15,w10,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w22,w22,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w24			// Maj(a,b,c)
			
 
				+	eor	w17,w0,w23,ror#22	// Sigma0(a)
			
 
				+	eor	w14,w14,w7,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w9,w9,w2
			
 
				+	add	w26,w26,w22			// d+=h
			
 
				+	add	w22,w22,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w9,w9,w15
			
 
				+	add	w22,w22,w17			// h+=Sigma0(a)
			
 
				+	add	w9,w9,w14
			
 
				+	ldr	w14,[sp,#12]
			
 
				+	str	w1,[sp,#8]
			
 
				+	ror	w16,w26,#6
			
 
				+	add	w21,w21,w19			// h+=K[i]
			
 
				+	ror	w0,w11,#7
			
 
				+	and	w17,w27,w26
			
 
				+	ror	w15,w8,#17
			
 
				+	bic	w19,w20,w26
			
 
				+	ror	w1,w22,#2
			
 
				+	add	w21,w21,w9			// h+=X[i]
			
 
				+	eor	w16,w16,w26,ror#11
			
 
				+	eor	w0,w0,w11,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w22,w23			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w26,ror#25	// Sigma1(e)
			
 
				+	eor	w1,w1,w22,ror#13
			
 
				+	add	w21,w21,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w15,w15,w8,ror#19
			
 
				+	eor	w0,w0,w11,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w21,w21,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w23			// Maj(a,b,c)
			
 
				+	eor	w17,w1,w22,ror#22	// Sigma0(a)
			
 
				+	eor	w15,w15,w8,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w10,w10,w3
			
 
				+	add	w25,w25,w21			// d+=h
			
 
				+	add	w21,w21,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w10,w10,w0
			
 
				+	add	w21,w21,w17			// h+=Sigma0(a)
			
 
				+	add	w10,w10,w15
			
 
				+	ldr	w15,[sp,#0]
			
 
				+	str	w2,[sp,#12]
			
 
				+	ror	w16,w25,#6
			
 
				+	add	w20,w20,w28			// h+=K[i]
			
 
				+	ror	w1,w12,#7
			
 
				+	and	w17,w26,w25
			
 
				+	ror	w0,w9,#17
			
 
				+	bic	w28,w27,w25
			
 
				+	ror	w2,w21,#2
			
 
				+	add	w20,w20,w10			// h+=X[i]
			
 
				+	eor	w16,w16,w25,ror#11
			
 
				+	eor	w1,w1,w12,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w21,w22			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w25,ror#25	// Sigma1(e)
			
 
				+	eor	w2,w2,w21,ror#13
			
 
				+	add	w20,w20,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w0,w0,w9,ror#19
			
 
				+	eor	w1,w1,w12,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w20,w20,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w22			// Maj(a,b,c)
			
 
				+	eor	w17,w2,w21,ror#22	// Sigma0(a)
			
 
				+	eor	w0,w0,w9,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w11,w11,w4
			
 
				+	add	w24,w24,w20			// d+=h
			
 
				+	add	w20,w20,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w11,w11,w1
			
 
				+	add	w20,w20,w17			// h+=Sigma0(a)
			
 
				+	add	w11,w11,w0
			
 
				+	ldr	w0,[sp,#4]
			
 
				+	str	w3,[sp,#0]
			
 
				+	ror	w16,w24,#6
			
 
				+	add	w27,w27,w19			// h+=K[i]
			
 
				+	ror	w2,w13,#7
			
 
				+	and	w17,w25,w24
			
 
				+	ror	w1,w10,#17
			
 
				+	bic	w19,w26,w24
			
 
				+	ror	w3,w20,#2
			
 
				+	add	w27,w27,w11			// h+=X[i]
			
 
				+	eor	w16,w16,w24,ror#11
			
 
				+	eor	w2,w2,w13,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w20,w21			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w24,ror#25	// Sigma1(e)
			
 
				+	eor	w3,w3,w20,ror#13
			
 
				+	add	w27,w27,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w1,w1,w10,ror#19
			
 
				+	eor	w2,w2,w13,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w27,w27,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w21			// Maj(a,b,c)
			
 
				+	eor	w17,w3,w20,ror#22	// Sigma0(a)
			
 
				+	eor	w1,w1,w10,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w12,w12,w5
			
 
				+	add	w23,w23,w27			// d+=h
			
 
				+	add	w27,w27,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w12,w12,w2
			
 
				+	add	w27,w27,w17			// h+=Sigma0(a)
			
 
				+	add	w12,w12,w1
			
 
				+	ldr	w1,[sp,#8]
			
 
				+	str	w4,[sp,#4]
			
 
				+	ror	w16,w23,#6
			
 
				+	add	w26,w26,w28			// h+=K[i]
			
 
				+	ror	w3,w14,#7
			
 
				+	and	w17,w24,w23
			
 
				+	ror	w2,w11,#17
			
 
				+	bic	w28,w25,w23
			
 
				+	ror	w4,w27,#2
			
 
				+	add	w26,w26,w12			// h+=X[i]
			
 
				+	eor	w16,w16,w23,ror#11
			
 
				+	eor	w3,w3,w14,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w27,w20			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w23,ror#25	// Sigma1(e)
			
 
				+	eor	w4,w4,w27,ror#13
			
 
				+	add	w26,w26,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w2,w2,w11,ror#19
			
 
				+	eor	w3,w3,w14,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w26,w26,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w20			// Maj(a,b,c)
			
 
				+	eor	w17,w4,w27,ror#22	// Sigma0(a)
			
 
				+	eor	w2,w2,w11,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w13,w13,w6
			
 
				+	add	w22,w22,w26			// d+=h
			
 
				+	add	w26,w26,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w13,w13,w3
			
 
				+	add	w26,w26,w17			// h+=Sigma0(a)
			
 
				+	add	w13,w13,w2
			
 
				+	ldr	w2,[sp,#12]
			
 
				+	str	w5,[sp,#8]
			
 
				+	ror	w16,w22,#6
			
 
				+	add	w25,w25,w19			// h+=K[i]
			
 
				+	ror	w4,w15,#7
			
 
				+	and	w17,w23,w22
			
 
				+	ror	w3,w12,#17
			
 
				+	bic	w19,w24,w22
			
 
				+	ror	w5,w26,#2
			
 
				+	add	w25,w25,w13			// h+=X[i]
			
 
				+	eor	w16,w16,w22,ror#11
			
 
				+	eor	w4,w4,w15,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w26,w27			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w22,ror#25	// Sigma1(e)
			
 
				+	eor	w5,w5,w26,ror#13
			
 
				+	add	w25,w25,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w3,w3,w12,ror#19
			
 
				+	eor	w4,w4,w15,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w25,w25,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w27			// Maj(a,b,c)
			
 
				+	eor	w17,w5,w26,ror#22	// Sigma0(a)
			
 
				+	eor	w3,w3,w12,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w14,w14,w7
			
 
				+	add	w21,w21,w25			// d+=h
			
 
				+	add	w25,w25,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w14,w14,w4
			
 
				+	add	w25,w25,w17			// h+=Sigma0(a)
			
 
				+	add	w14,w14,w3
			
 
				+	ldr	w3,[sp,#0]
			
 
				+	str	w6,[sp,#12]
			
 
				+	ror	w16,w21,#6
			
 
				+	add	w24,w24,w28			// h+=K[i]
			
 
				+	ror	w5,w0,#7
			
 
				+	and	w17,w22,w21
			
 
				+	ror	w4,w13,#17
			
 
				+	bic	w28,w23,w21
			
 
				+	ror	w6,w25,#2
			
 
				+	add	w24,w24,w14			// h+=X[i]
			
 
				+	eor	w16,w16,w21,ror#11
			
 
				+	eor	w5,w5,w0,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w25,w26			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w21,ror#25	// Sigma1(e)
			
 
				+	eor	w6,w6,w25,ror#13
			
 
				+	add	w24,w24,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w4,w4,w13,ror#19
			
 
				+	eor	w5,w5,w0,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w24,w24,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w26			// Maj(a,b,c)
			
 
				+	eor	w17,w6,w25,ror#22	// Sigma0(a)
			
 
				+	eor	w4,w4,w13,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w15,w15,w8
			
 
				+	add	w20,w20,w24			// d+=h
			
 
				+	add	w24,w24,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w15,w15,w5
			
 
				+	add	w24,w24,w17			// h+=Sigma0(a)
			
 
				+	add	w15,w15,w4
			
 
				+	ldr	w4,[sp,#4]
			
 
				+	str	w7,[sp,#0]
			
 
				+	ror	w16,w20,#6
			
 
				+	add	w23,w23,w19			// h+=K[i]
			
 
				+	ror	w6,w1,#7
			
 
				+	and	w17,w21,w20
			
 
				+	ror	w5,w14,#17
			
 
				+	bic	w19,w22,w20
			
 
				+	ror	w7,w24,#2
			
 
				+	add	w23,w23,w15			// h+=X[i]
			
 
				+	eor	w16,w16,w20,ror#11
			
 
				+	eor	w6,w6,w1,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w24,w25			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w20,ror#25	// Sigma1(e)
			
 
				+	eor	w7,w7,w24,ror#13
			
 
				+	add	w23,w23,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w5,w5,w14,ror#19
			
 
				+	eor	w6,w6,w1,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w23,w23,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w25			// Maj(a,b,c)
			
 
				+	eor	w17,w7,w24,ror#22	// Sigma0(a)
			
 
				+	eor	w5,w5,w14,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w0,w0,w9
			
 
				+	add	w27,w27,w23			// d+=h
			
 
				+	add	w23,w23,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w0,w0,w6
			
 
				+	add	w23,w23,w17			// h+=Sigma0(a)
			
 
				+	add	w0,w0,w5
			
 
				+	ldr	w5,[sp,#8]
			
 
				+	str	w8,[sp,#4]
			
 
				+	ror	w16,w27,#6
			
 
				+	add	w22,w22,w28			// h+=K[i]
			
 
				+	ror	w7,w2,#7
			
 
				+	and	w17,w20,w27
			
 
				+	ror	w6,w15,#17
			
 
				+	bic	w28,w21,w27
			
 
				+	ror	w8,w23,#2
			
 
				+	add	w22,w22,w0			// h+=X[i]
			
 
				+	eor	w16,w16,w27,ror#11
			
 
				+	eor	w7,w7,w2,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w23,w24			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w27,ror#25	// Sigma1(e)
			
 
				+	eor	w8,w8,w23,ror#13
			
 
				+	add	w22,w22,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w6,w6,w15,ror#19
			
 
				+	eor	w7,w7,w2,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w22,w22,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w24			// Maj(a,b,c)
			
 
				+	eor	w17,w8,w23,ror#22	// Sigma0(a)
			
 
				+	eor	w6,w6,w15,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w1,w1,w10
			
 
				+	add	w26,w26,w22			// d+=h
			
 
				+	add	w22,w22,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w1,w1,w7
			
 
				+	add	w22,w22,w17			// h+=Sigma0(a)
			
 
				+	add	w1,w1,w6
			
 
				+	ldr	w6,[sp,#12]
			
 
				+	str	w9,[sp,#8]
			
 
				+	ror	w16,w26,#6
			
 
				+	add	w21,w21,w19			// h+=K[i]
			
 
				+	ror	w8,w3,#7
			
 
				+	and	w17,w27,w26
			
 
				+	ror	w7,w0,#17
			
 
				+	bic	w19,w20,w26
			
 
				+	ror	w9,w22,#2
			
 
				+	add	w21,w21,w1			// h+=X[i]
			
 
				+	eor	w16,w16,w26,ror#11
			
 
				+	eor	w8,w8,w3,ror#18
			
 
				+	orr	w17,w17,w19			// Ch(e,f,g)
			
 
				+	eor	w19,w22,w23			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w26,ror#25	// Sigma1(e)
			
 
				+	eor	w9,w9,w22,ror#13
			
 
				+	add	w21,w21,w17			// h+=Ch(e,f,g)
			
 
				+	and	w28,w28,w19			// (b^c)&=(a^b)
			
 
				+	eor	w7,w7,w0,ror#19
			
 
				+	eor	w8,w8,w3,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w21,w21,w16			// h+=Sigma1(e)
			
 
				+	eor	w28,w28,w23			// Maj(a,b,c)
			
 
				+	eor	w17,w9,w22,ror#22	// Sigma0(a)
			
 
				+	eor	w7,w7,w0,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w2,w2,w11
			
 
				+	add	w25,w25,w21			// d+=h
			
 
				+	add	w21,w21,w28			// h+=Maj(a,b,c)
			
 
				+	ldr	w28,[x30],#4		// *K++, w19 in next round
			
 
				+	add	w2,w2,w8
			
 
				+	add	w21,w21,w17			// h+=Sigma0(a)
			
 
				+	add	w2,w2,w7
			
 
				+	ldr	w7,[sp,#0]
			
 
				+	str	w10,[sp,#12]
			
 
				+	ror	w16,w25,#6
			
 
				+	add	w20,w20,w28			// h+=K[i]
			
 
				+	ror	w9,w4,#7
			
 
				+	and	w17,w26,w25
			
 
				+	ror	w8,w1,#17
			
 
				+	bic	w28,w27,w25
			
 
				+	ror	w10,w21,#2
			
 
				+	add	w20,w20,w2			// h+=X[i]
			
 
				+	eor	w16,w16,w25,ror#11
			
 
				+	eor	w9,w9,w4,ror#18
			
 
				+	orr	w17,w17,w28			// Ch(e,f,g)
			
 
				+	eor	w28,w21,w22			// a^b, b^c in next round
			
 
				+	eor	w16,w16,w25,ror#25	// Sigma1(e)
			
 
				+	eor	w10,w10,w21,ror#13
			
 
				+	add	w20,w20,w17			// h+=Ch(e,f,g)
			
 
				+	and	w19,w19,w28			// (b^c)&=(a^b)
			
 
				+	eor	w8,w8,w1,ror#19
			
 
				+	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
			
 
				+	add	w20,w20,w16			// h+=Sigma1(e)
			
 
				+	eor	w19,w19,w22			// Maj(a,b,c)
			
 
				+	eor	w17,w10,w21,ror#22	// Sigma0(a)
			
 
				+	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
			
 
				+	add	w3,w3,w12
			
 
				+	add	w24,w24,w20			// d+=h
			
 
				+	add	w20,w20,w19			// h+=Maj(a,b,c)
			
 
				+	ldr	w19,[x30],#4		// *K++, w28 in next round
			
 
				+	add	w3,w3,w9
			
 
				+	add	w20,w20,w17			// h+=Sigma0(a)
			
 
				+	add	w3,w3,w8
			
 
				+	cbnz	w19,.Loop_16_xx
			
 
				+
			
 
				+	ldp	x0,x2,[x29,#96]
			
 
				+	ldr	x1,[x29,#112]
			
 
				+	sub	x30,x30,#260		// rewind
			
 
				+
			
 
				+	ldp	w3,w4,[x0]
			
 
				+	ldp	w5,w6,[x0,#2*4]
			
 
				+	add	x1,x1,#14*4			// advance input pointer
			
 
				+	ldp	w7,w8,[x0,#4*4]
			
 
				+	add	w20,w20,w3
			
 
				+	ldp	w9,w10,[x0,#6*4]
			
 
				+	add	w21,w21,w4
			
 
				+	add	w22,w22,w5
			
 
				+	add	w23,w23,w6
			
 
				+	stp	w20,w21,[x0]
			
 
				+	add	w24,w24,w7
			
 
				+	add	w25,w25,w8
			
 
				+	stp	w22,w23,[x0,#2*4]
			
 
				+	add	w26,w26,w9
			
 
				+	add	w27,w27,w10
			
 
				+	cmp	x1,x2
			
 
				+	stp	w24,w25,[x0,#4*4]
			
 
				+	stp	w26,w27,[x0,#6*4]
			
 
				+	b.ne	.Loop
			
 
				+
			
 
				+	ldp	x19,x20,[x29,#16]
			
 
				+	add	sp,sp,#4*4
			
 
				+	ldp	x21,x22,[x29,#32]
			
 
				+	ldp	x23,x24,[x29,#48]
			
 
				+	ldp	x25,x26,[x29,#64]
			
 
				+	ldp	x27,x28,[x29,#80]
			
 
				+	ldp	x29,x30,[sp],#128
			
 
				+	ret
			
 
				+.size	sha256_block_data_order,.-sha256_block_data_order
			
 
				+
			
 
				+.align	6
			
 
				+.type	.LK256,%object
			
 
				+.LK256:
			
 
				+	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
			
 
				+	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
			
 
				+	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
			
 
				+	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
			
 
				+	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
			
 
				+	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
			
 
				+	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
			
 
				+	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
			
 
				+	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
			
 
				+	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
			
 
				+	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
			
 
				+	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
			
 
				+	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
			
 
				+	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
			
 
				+	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
			
 
				+	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
			
 
				+	.long	0	//terminator
			
 
				+.size	.LK256,.-.LK256
			
 
				+#ifndef	__KERNEL__
			
 
				+.align	3
			
 
				+.LOPENSSL_armcap_P:
			
 
				+# ifdef	__ILP32__
			
 
				+	.long	OPENSSL_armcap_P-.
			
 
				+# else
			
 
				+	.quad	OPENSSL_armcap_P-.
			
 
				+# endif
			
 
				+#endif
			
 
				+.asciz	"SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
			
 
				+.align	2
			
 
				+#ifndef	__KERNEL__
			
 
				+.type	sha256_block_armv8,%function
			
 
				+.align	6
			
 
				+sha256_block_armv8:
			
 
				+.Lv8_entry:
			
 
				+	stp		x29,x30,[sp,#-16]!
			
 
				+	add		x29,sp,#0
			
 
				+
			
 
				+	ld1		{v0.4s,v1.4s},[x0]
			
 
				+	adr		x3,.LK256
			
 
				+
			
 
				+.Loop_hw:
			
 
				+	ld1		{v4.16b-v7.16b},[x1],#64
			
 
				+	sub		x2,x2,#1
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	rev32		v4.16b,v4.16b
			
 
				+	rev32		v5.16b,v5.16b
			
 
				+	rev32		v6.16b,v6.16b
			
 
				+	rev32		v7.16b,v7.16b
			
 
				+	orr		v18.16b,v0.16b,v0.16b		// offload
			
 
				+	orr		v19.16b,v1.16b,v1.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v4.4s
			
 
				+	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v5.4s
			
 
				+	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v6.4s
			
 
				+	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v7.4s
			
 
				+	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v4.4s
			
 
				+	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v5.4s
			
 
				+	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v6.4s
			
 
				+	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v7.4s
			
 
				+	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v4.4s
			
 
				+	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v5.4s
			
 
				+	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v6.4s
			
 
				+	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v7.4s
			
 
				+	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
			
 
				+	ld1		{v17.4s},[x3],#16
			
 
				+	add		v16.4s,v16.4s,v4.4s
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+
			
 
				+	ld1		{v16.4s},[x3],#16
			
 
				+	add		v17.4s,v17.4s,v5.4s
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+
			
 
				+	ld1		{v17.4s},[x3]
			
 
				+	add		v16.4s,v16.4s,v6.4s
			
 
				+	sub		x3,x3,#64*4-16	// rewind
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
			
 
				+	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
			
 
				+
			
 
				+	add		v17.4s,v17.4s,v7.4s
			
 
				+	orr		v2.16b,v0.16b,v0.16b
			
 
				+	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
			
 
				+	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
			
 
				+
			
 
				+	add		v0.4s,v0.4s,v18.4s
			
 
				+	add		v1.4s,v1.4s,v19.4s
			
 
				+
			
 
				+	cbnz		x2,.Loop_hw
			
 
				+
			
 
				+	st1		{v0.4s,v1.4s},[x0]
			
 
				+
			
 
				+	ldr		x29,[sp],#16
			
 
				+	ret
			
 
				+.size	sha256_block_armv8,.-sha256_block_armv8
			
 
				+#endif
			
 
				+#ifdef	__KERNEL__
			
 
				+.globl	sha256_block_neon
			
 
				+#endif
			
 
				+.type	sha256_block_neon,%function
			
 
				+.align	4
			
 
				+sha256_block_neon:
			
 
				+.Lneon_entry:
			
 
				+	stp	x29, x30, [sp, #-16]!
			
 
				+	mov	x29, sp
			
 
				+	sub	sp,sp,#16*4
			
 
				+
			
 
				+	adr	x16,.LK256
			
 
				+	add	x2,x1,x2,lsl#6	// len to point at the end of inp
			
 
				+
			
 
				+	ld1	{v0.16b},[x1], #16
			
 
				+	ld1	{v1.16b},[x1], #16
			
 
				+	ld1	{v2.16b},[x1], #16
			
 
				+	ld1	{v3.16b},[x1], #16
			
 
				+	ld1	{v4.4s},[x16], #16
			
 
				+	ld1	{v5.4s},[x16], #16
			
 
				+	ld1	{v6.4s},[x16], #16
			
 
				+	ld1	{v7.4s},[x16], #16
			
 
				+	rev32	v0.16b,v0.16b		// yes, even on
			
 
				+	rev32	v1.16b,v1.16b		// big-endian
			
 
				+	rev32	v2.16b,v2.16b
			
 
				+	rev32	v3.16b,v3.16b
			
 
				+	mov	x17,sp
			
 
				+	add	v4.4s,v4.4s,v0.4s
			
 
				+	add	v5.4s,v5.4s,v1.4s
			
 
				+	add	v6.4s,v6.4s,v2.4s
			
 
				+	st1	{v4.4s-v5.4s},[x17], #32
			
 
				+	add	v7.4s,v7.4s,v3.4s
			
 
				+	st1	{v6.4s-v7.4s},[x17]
			
 
				+	sub	x17,x17,#32
			
 
				+
			
 
				+	ldp	w3,w4,[x0]
			
 
				+	ldp	w5,w6,[x0,#8]
			
 
				+	ldp	w7,w8,[x0,#16]
			
 
				+	ldp	w9,w10,[x0,#24]
			
 
				+	ldr	w12,[sp,#0]
			
 
				+	mov	w13,wzr
			
 
				+	eor	w14,w4,w5
			
 
				+	mov	w15,wzr
			
 
				+	b	.L_00_48
			
 
				+
			
 
				+.align	4
			
 
				+.L_00_48:
			
 
				+	ext	v4.16b,v0.16b,v1.16b,#4
			
 
				+	add	w10,w10,w12
			
 
				+	add	w3,w3,w15
			
 
				+	and	w12,w8,w7
			
 
				+	bic	w15,w9,w7
			
 
				+	ext	v7.16b,v2.16b,v3.16b,#4
			
 
				+	eor	w11,w7,w7,ror#5
			
 
				+	add	w3,w3,w13
			
 
				+	mov	d19,v3.d[1]
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w7,ror#19
			
 
				+	ushr	v6.4s,v4.4s,#7
			
 
				+	eor	w15,w3,w3,ror#11
			
 
				+	ushr	v5.4s,v4.4s,#3
			
 
				+	add	w10,w10,w12
			
 
				+	add	v0.4s,v0.4s,v7.4s
			
 
				+	ror	w11,w11,#6
			
 
				+	sli	v6.4s,v4.4s,#25
			
 
				+	eor	w13,w3,w4
			
 
				+	eor	w15,w15,w3,ror#20
			
 
				+	ushr	v7.4s,v4.4s,#18
			
 
				+	add	w10,w10,w11
			
 
				+	ldr	w12,[sp,#4]
			
 
				+	and	w14,w14,w13
			
 
				+	eor	v5.16b,v5.16b,v6.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w6,w6,w10
			
 
				+	sli	v7.4s,v4.4s,#14
			
 
				+	eor	w14,w14,w4
			
 
				+	ushr	v16.4s,v19.4s,#17
			
 
				+	add	w9,w9,w12
			
 
				+	add	w10,w10,w15
			
 
				+	and	w12,w7,w6
			
 
				+	eor	v5.16b,v5.16b,v7.16b
			
 
				+	bic	w15,w8,w6
			
 
				+	eor	w11,w6,w6,ror#5
			
 
				+	sli	v16.4s,v19.4s,#15
			
 
				+	add	w10,w10,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v17.4s,v19.4s,#10
			
 
				+	eor	w11,w11,w6,ror#19
			
 
				+	eor	w15,w10,w10,ror#11
			
 
				+	ushr	v7.4s,v19.4s,#19
			
 
				+	add	w9,w9,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	add	v0.4s,v0.4s,v5.4s
			
 
				+	eor	w14,w10,w3
			
 
				+	eor	w15,w15,w10,ror#20
			
 
				+	sli	v7.4s,v19.4s,#13
			
 
				+	add	w9,w9,w11
			
 
				+	ldr	w12,[sp,#8]
			
 
				+	and	w13,w13,w14
			
 
				+	eor	v17.16b,v17.16b,v16.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w5,w5,w9
			
 
				+	eor	w13,w13,w3
			
 
				+	eor	v17.16b,v17.16b,v7.16b
			
 
				+	add	w8,w8,w12
			
 
				+	add	w9,w9,w15
			
 
				+	and	w12,w6,w5
			
 
				+	add	v0.4s,v0.4s,v17.4s
			
 
				+	bic	w15,w7,w5
			
 
				+	eor	w11,w5,w5,ror#5
			
 
				+	add	w9,w9,w13
			
 
				+	ushr	v18.4s,v0.4s,#17
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v19.4s,v0.4s,#10
			
 
				+	eor	w11,w11,w5,ror#19
			
 
				+	eor	w15,w9,w9,ror#11
			
 
				+	sli	v18.4s,v0.4s,#15
			
 
				+	add	w8,w8,w12
			
 
				+	ushr	v17.4s,v0.4s,#19
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w9,w10
			
 
				+	eor	v19.16b,v19.16b,v18.16b
			
 
				+	eor	w15,w15,w9,ror#20
			
 
				+	add	w8,w8,w11
			
 
				+	sli	v17.4s,v0.4s,#13
			
 
				+	ldr	w12,[sp,#12]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	ld1	{v4.4s},[x16], #16
			
 
				+	add	w4,w4,w8
			
 
				+	eor	v19.16b,v19.16b,v17.16b
			
 
				+	eor	w14,w14,w10
			
 
				+	eor	v17.16b,v17.16b,v17.16b
			
 
				+	add	w7,w7,w12
			
 
				+	add	w8,w8,w15
			
 
				+	and	w12,w5,w4
			
 
				+	mov	v17.d[1],v19.d[0]
			
 
				+	bic	w15,w6,w4
			
 
				+	eor	w11,w4,w4,ror#5
			
 
				+	add	w8,w8,w14
			
 
				+	add	v0.4s,v0.4s,v17.4s
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w4,ror#19
			
 
				+	eor	w15,w8,w8,ror#11
			
 
				+	add	v4.4s,v4.4s,v0.4s
			
 
				+	add	w7,w7,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w8,w9
			
 
				+	eor	w15,w15,w8,ror#20
			
 
				+	add	w7,w7,w11
			
 
				+	ldr	w12,[sp,#16]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w3,w3,w7
			
 
				+	eor	w13,w13,w9
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	ext	v4.16b,v1.16b,v2.16b,#4
			
 
				+	add	w6,w6,w12
			
 
				+	add	w7,w7,w15
			
 
				+	and	w12,w4,w3
			
 
				+	bic	w15,w5,w3
			
 
				+	ext	v7.16b,v3.16b,v0.16b,#4
			
 
				+	eor	w11,w3,w3,ror#5
			
 
				+	add	w7,w7,w13
			
 
				+	mov	d19,v0.d[1]
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w3,ror#19
			
 
				+	ushr	v6.4s,v4.4s,#7
			
 
				+	eor	w15,w7,w7,ror#11
			
 
				+	ushr	v5.4s,v4.4s,#3
			
 
				+	add	w6,w6,w12
			
 
				+	add	v1.4s,v1.4s,v7.4s
			
 
				+	ror	w11,w11,#6
			
 
				+	sli	v6.4s,v4.4s,#25
			
 
				+	eor	w13,w7,w8
			
 
				+	eor	w15,w15,w7,ror#20
			
 
				+	ushr	v7.4s,v4.4s,#18
			
 
				+	add	w6,w6,w11
			
 
				+	ldr	w12,[sp,#20]
			
 
				+	and	w14,w14,w13
			
 
				+	eor	v5.16b,v5.16b,v6.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w10,w10,w6
			
 
				+	sli	v7.4s,v4.4s,#14
			
 
				+	eor	w14,w14,w8
			
 
				+	ushr	v16.4s,v19.4s,#17
			
 
				+	add	w5,w5,w12
			
 
				+	add	w6,w6,w15
			
 
				+	and	w12,w3,w10
			
 
				+	eor	v5.16b,v5.16b,v7.16b
			
 
				+	bic	w15,w4,w10
			
 
				+	eor	w11,w10,w10,ror#5
			
 
				+	sli	v16.4s,v19.4s,#15
			
 
				+	add	w6,w6,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v17.4s,v19.4s,#10
			
 
				+	eor	w11,w11,w10,ror#19
			
 
				+	eor	w15,w6,w6,ror#11
			
 
				+	ushr	v7.4s,v19.4s,#19
			
 
				+	add	w5,w5,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	add	v1.4s,v1.4s,v5.4s
			
 
				+	eor	w14,w6,w7
			
 
				+	eor	w15,w15,w6,ror#20
			
 
				+	sli	v7.4s,v19.4s,#13
			
 
				+	add	w5,w5,w11
			
 
				+	ldr	w12,[sp,#24]
			
 
				+	and	w13,w13,w14
			
 
				+	eor	v17.16b,v17.16b,v16.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w9,w9,w5
			
 
				+	eor	w13,w13,w7
			
 
				+	eor	v17.16b,v17.16b,v7.16b
			
 
				+	add	w4,w4,w12
			
 
				+	add	w5,w5,w15
			
 
				+	and	w12,w10,w9
			
 
				+	add	v1.4s,v1.4s,v17.4s
			
 
				+	bic	w15,w3,w9
			
 
				+	eor	w11,w9,w9,ror#5
			
 
				+	add	w5,w5,w13
			
 
				+	ushr	v18.4s,v1.4s,#17
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v19.4s,v1.4s,#10
			
 
				+	eor	w11,w11,w9,ror#19
			
 
				+	eor	w15,w5,w5,ror#11
			
 
				+	sli	v18.4s,v1.4s,#15
			
 
				+	add	w4,w4,w12
			
 
				+	ushr	v17.4s,v1.4s,#19
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w5,w6
			
 
				+	eor	v19.16b,v19.16b,v18.16b
			
 
				+	eor	w15,w15,w5,ror#20
			
 
				+	add	w4,w4,w11
			
 
				+	sli	v17.4s,v1.4s,#13
			
 
				+	ldr	w12,[sp,#28]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	ld1	{v4.4s},[x16], #16
			
 
				+	add	w8,w8,w4
			
 
				+	eor	v19.16b,v19.16b,v17.16b
			
 
				+	eor	w14,w14,w6
			
 
				+	eor	v17.16b,v17.16b,v17.16b
			
 
				+	add	w3,w3,w12
			
 
				+	add	w4,w4,w15
			
 
				+	and	w12,w9,w8
			
 
				+	mov	v17.d[1],v19.d[0]
			
 
				+	bic	w15,w10,w8
			
 
				+	eor	w11,w8,w8,ror#5
			
 
				+	add	w4,w4,w14
			
 
				+	add	v1.4s,v1.4s,v17.4s
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w8,ror#19
			
 
				+	eor	w15,w4,w4,ror#11
			
 
				+	add	v4.4s,v4.4s,v1.4s
			
 
				+	add	w3,w3,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w4,w5
			
 
				+	eor	w15,w15,w4,ror#20
			
 
				+	add	w3,w3,w11
			
 
				+	ldr	w12,[sp,#32]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w7,w7,w3
			
 
				+	eor	w13,w13,w5
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	ext	v4.16b,v2.16b,v3.16b,#4
			
 
				+	add	w10,w10,w12
			
 
				+	add	w3,w3,w15
			
 
				+	and	w12,w8,w7
			
 
				+	bic	w15,w9,w7
			
 
				+	ext	v7.16b,v0.16b,v1.16b,#4
			
 
				+	eor	w11,w7,w7,ror#5
			
 
				+	add	w3,w3,w13
			
 
				+	mov	d19,v1.d[1]
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w7,ror#19
			
 
				+	ushr	v6.4s,v4.4s,#7
			
 
				+	eor	w15,w3,w3,ror#11
			
 
				+	ushr	v5.4s,v4.4s,#3
			
 
				+	add	w10,w10,w12
			
 
				+	add	v2.4s,v2.4s,v7.4s
			
 
				+	ror	w11,w11,#6
			
 
				+	sli	v6.4s,v4.4s,#25
			
 
				+	eor	w13,w3,w4
			
 
				+	eor	w15,w15,w3,ror#20
			
 
				+	ushr	v7.4s,v4.4s,#18
			
 
				+	add	w10,w10,w11
			
 
				+	ldr	w12,[sp,#36]
			
 
				+	and	w14,w14,w13
			
 
				+	eor	v5.16b,v5.16b,v6.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w6,w6,w10
			
 
				+	sli	v7.4s,v4.4s,#14
			
 
				+	eor	w14,w14,w4
			
 
				+	ushr	v16.4s,v19.4s,#17
			
 
				+	add	w9,w9,w12
			
 
				+	add	w10,w10,w15
			
 
				+	and	w12,w7,w6
			
 
				+	eor	v5.16b,v5.16b,v7.16b
			
 
				+	bic	w15,w8,w6
			
 
				+	eor	w11,w6,w6,ror#5
			
 
				+	sli	v16.4s,v19.4s,#15
			
 
				+	add	w10,w10,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v17.4s,v19.4s,#10
			
 
				+	eor	w11,w11,w6,ror#19
			
 
				+	eor	w15,w10,w10,ror#11
			
 
				+	ushr	v7.4s,v19.4s,#19
			
 
				+	add	w9,w9,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	add	v2.4s,v2.4s,v5.4s
			
 
				+	eor	w14,w10,w3
			
 
				+	eor	w15,w15,w10,ror#20
			
 
				+	sli	v7.4s,v19.4s,#13
			
 
				+	add	w9,w9,w11
			
 
				+	ldr	w12,[sp,#40]
			
 
				+	and	w13,w13,w14
			
 
				+	eor	v17.16b,v17.16b,v16.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w5,w5,w9
			
 
				+	eor	w13,w13,w3
			
 
				+	eor	v17.16b,v17.16b,v7.16b
			
 
				+	add	w8,w8,w12
			
 
				+	add	w9,w9,w15
			
 
				+	and	w12,w6,w5
			
 
				+	add	v2.4s,v2.4s,v17.4s
			
 
				+	bic	w15,w7,w5
			
 
				+	eor	w11,w5,w5,ror#5
			
 
				+	add	w9,w9,w13
			
 
				+	ushr	v18.4s,v2.4s,#17
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v19.4s,v2.4s,#10
			
 
				+	eor	w11,w11,w5,ror#19
			
 
				+	eor	w15,w9,w9,ror#11
			
 
				+	sli	v18.4s,v2.4s,#15
			
 
				+	add	w8,w8,w12
			
 
				+	ushr	v17.4s,v2.4s,#19
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w9,w10
			
 
				+	eor	v19.16b,v19.16b,v18.16b
			
 
				+	eor	w15,w15,w9,ror#20
			
 
				+	add	w8,w8,w11
			
 
				+	sli	v17.4s,v2.4s,#13
			
 
				+	ldr	w12,[sp,#44]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	ld1	{v4.4s},[x16], #16
			
 
				+	add	w4,w4,w8
			
 
				+	eor	v19.16b,v19.16b,v17.16b
			
 
				+	eor	w14,w14,w10
			
 
				+	eor	v17.16b,v17.16b,v17.16b
			
 
				+	add	w7,w7,w12
			
 
				+	add	w8,w8,w15
			
 
				+	and	w12,w5,w4
			
 
				+	mov	v17.d[1],v19.d[0]
			
 
				+	bic	w15,w6,w4
			
 
				+	eor	w11,w4,w4,ror#5
			
 
				+	add	w8,w8,w14
			
 
				+	add	v2.4s,v2.4s,v17.4s
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w4,ror#19
			
 
				+	eor	w15,w8,w8,ror#11
			
 
				+	add	v4.4s,v4.4s,v2.4s
			
 
				+	add	w7,w7,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w8,w9
			
 
				+	eor	w15,w15,w8,ror#20
			
 
				+	add	w7,w7,w11
			
 
				+	ldr	w12,[sp,#48]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w3,w3,w7
			
 
				+	eor	w13,w13,w9
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	ext	v4.16b,v3.16b,v0.16b,#4
			
 
				+	add	w6,w6,w12
			
 
				+	add	w7,w7,w15
			
 
				+	and	w12,w4,w3
			
 
				+	bic	w15,w5,w3
			
 
				+	ext	v7.16b,v1.16b,v2.16b,#4
			
 
				+	eor	w11,w3,w3,ror#5
			
 
				+	add	w7,w7,w13
			
 
				+	mov	d19,v2.d[1]
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w3,ror#19
			
 
				+	ushr	v6.4s,v4.4s,#7
			
 
				+	eor	w15,w7,w7,ror#11
			
 
				+	ushr	v5.4s,v4.4s,#3
			
 
				+	add	w6,w6,w12
			
 
				+	add	v3.4s,v3.4s,v7.4s
			
 
				+	ror	w11,w11,#6
			
 
				+	sli	v6.4s,v4.4s,#25
			
 
				+	eor	w13,w7,w8
			
 
				+	eor	w15,w15,w7,ror#20
			
 
				+	ushr	v7.4s,v4.4s,#18
			
 
				+	add	w6,w6,w11
			
 
				+	ldr	w12,[sp,#52]
			
 
				+	and	w14,w14,w13
			
 
				+	eor	v5.16b,v5.16b,v6.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w10,w10,w6
			
 
				+	sli	v7.4s,v4.4s,#14
			
 
				+	eor	w14,w14,w8
			
 
				+	ushr	v16.4s,v19.4s,#17
			
 
				+	add	w5,w5,w12
			
 
				+	add	w6,w6,w15
			
 
				+	and	w12,w3,w10
			
 
				+	eor	v5.16b,v5.16b,v7.16b
			
 
				+	bic	w15,w4,w10
			
 
				+	eor	w11,w10,w10,ror#5
			
 
				+	sli	v16.4s,v19.4s,#15
			
 
				+	add	w6,w6,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v17.4s,v19.4s,#10
			
 
				+	eor	w11,w11,w10,ror#19
			
 
				+	eor	w15,w6,w6,ror#11
			
 
				+	ushr	v7.4s,v19.4s,#19
			
 
				+	add	w5,w5,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	add	v3.4s,v3.4s,v5.4s
			
 
				+	eor	w14,w6,w7
			
 
				+	eor	w15,w15,w6,ror#20
			
 
				+	sli	v7.4s,v19.4s,#13
			
 
				+	add	w5,w5,w11
			
 
				+	ldr	w12,[sp,#56]
			
 
				+	and	w13,w13,w14
			
 
				+	eor	v17.16b,v17.16b,v16.16b
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w9,w9,w5
			
 
				+	eor	w13,w13,w7
			
 
				+	eor	v17.16b,v17.16b,v7.16b
			
 
				+	add	w4,w4,w12
			
 
				+	add	w5,w5,w15
			
 
				+	and	w12,w10,w9
			
 
				+	add	v3.4s,v3.4s,v17.4s
			
 
				+	bic	w15,w3,w9
			
 
				+	eor	w11,w9,w9,ror#5
			
 
				+	add	w5,w5,w13
			
 
				+	ushr	v18.4s,v3.4s,#17
			
 
				+	orr	w12,w12,w15
			
 
				+	ushr	v19.4s,v3.4s,#10
			
 
				+	eor	w11,w11,w9,ror#19
			
 
				+	eor	w15,w5,w5,ror#11
			
 
				+	sli	v18.4s,v3.4s,#15
			
 
				+	add	w4,w4,w12
			
 
				+	ushr	v17.4s,v3.4s,#19
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w5,w6
			
 
				+	eor	v19.16b,v19.16b,v18.16b
			
 
				+	eor	w15,w15,w5,ror#20
			
 
				+	add	w4,w4,w11
			
 
				+	sli	v17.4s,v3.4s,#13
			
 
				+	ldr	w12,[sp,#60]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	ld1	{v4.4s},[x16], #16
			
 
				+	add	w8,w8,w4
			
 
				+	eor	v19.16b,v19.16b,v17.16b
			
 
				+	eor	w14,w14,w6
			
 
				+	eor	v17.16b,v17.16b,v17.16b
			
 
				+	add	w3,w3,w12
			
 
				+	add	w4,w4,w15
			
 
				+	and	w12,w9,w8
			
 
				+	mov	v17.d[1],v19.d[0]
			
 
				+	bic	w15,w10,w8
			
 
				+	eor	w11,w8,w8,ror#5
			
 
				+	add	w4,w4,w14
			
 
				+	add	v3.4s,v3.4s,v17.4s
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w8,ror#19
			
 
				+	eor	w15,w4,w4,ror#11
			
 
				+	add	v4.4s,v4.4s,v3.4s
			
 
				+	add	w3,w3,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w4,w5
			
 
				+	eor	w15,w15,w4,ror#20
			
 
				+	add	w3,w3,w11
			
 
				+	ldr	w12,[x16]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w7,w7,w3
			
 
				+	eor	w13,w13,w5
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	cmp	w12,#0				// check for K256 terminator
			
 
				+	ldr	w12,[sp,#0]
			
 
				+	sub	x17,x17,#64
			
 
				+	bne	.L_00_48
			
 
				+
			
 
				+	sub	x16,x16,#256		// rewind x16
			
 
				+	cmp	x1,x2
			
 
				+	mov	x17, #64
			
 
				+	csel	x17, x17, xzr, eq
			
 
				+	sub	x1,x1,x17			// avoid SEGV
			
 
				+	mov	x17,sp
			
 
				+	add	w10,w10,w12
			
 
				+	add	w3,w3,w15
			
 
				+	and	w12,w8,w7
			
 
				+	ld1	{v0.16b},[x1],#16
			
 
				+	bic	w15,w9,w7
			
 
				+	eor	w11,w7,w7,ror#5
			
 
				+	ld1	{v4.4s},[x16],#16
			
 
				+	add	w3,w3,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w7,ror#19
			
 
				+	eor	w15,w3,w3,ror#11
			
 
				+	rev32	v0.16b,v0.16b
			
 
				+	add	w10,w10,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w3,w4
			
 
				+	eor	w15,w15,w3,ror#20
			
 
				+	add	v4.4s,v4.4s,v0.4s
			
 
				+	add	w10,w10,w11
			
 
				+	ldr	w12,[sp,#4]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w6,w6,w10
			
 
				+	eor	w14,w14,w4
			
 
				+	add	w9,w9,w12
			
 
				+	add	w10,w10,w15
			
 
				+	and	w12,w7,w6
			
 
				+	bic	w15,w8,w6
			
 
				+	eor	w11,w6,w6,ror#5
			
 
				+	add	w10,w10,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w6,ror#19
			
 
				+	eor	w15,w10,w10,ror#11
			
 
				+	add	w9,w9,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w10,w3
			
 
				+	eor	w15,w15,w10,ror#20
			
 
				+	add	w9,w9,w11
			
 
				+	ldr	w12,[sp,#8]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w5,w5,w9
			
 
				+	eor	w13,w13,w3
			
 
				+	add	w8,w8,w12
			
 
				+	add	w9,w9,w15
			
 
				+	and	w12,w6,w5
			
 
				+	bic	w15,w7,w5
			
 
				+	eor	w11,w5,w5,ror#5
			
 
				+	add	w9,w9,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w5,ror#19
			
 
				+	eor	w15,w9,w9,ror#11
			
 
				+	add	w8,w8,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w9,w10
			
 
				+	eor	w15,w15,w9,ror#20
			
 
				+	add	w8,w8,w11
			
 
				+	ldr	w12,[sp,#12]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w4,w4,w8
			
 
				+	eor	w14,w14,w10
			
 
				+	add	w7,w7,w12
			
 
				+	add	w8,w8,w15
			
 
				+	and	w12,w5,w4
			
 
				+	bic	w15,w6,w4
			
 
				+	eor	w11,w4,w4,ror#5
			
 
				+	add	w8,w8,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w4,ror#19
			
 
				+	eor	w15,w8,w8,ror#11
			
 
				+	add	w7,w7,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w8,w9
			
 
				+	eor	w15,w15,w8,ror#20
			
 
				+	add	w7,w7,w11
			
 
				+	ldr	w12,[sp,#16]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w3,w3,w7
			
 
				+	eor	w13,w13,w9
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	add	w6,w6,w12
			
 
				+	add	w7,w7,w15
			
 
				+	and	w12,w4,w3
			
 
				+	ld1	{v1.16b},[x1],#16
			
 
				+	bic	w15,w5,w3
			
 
				+	eor	w11,w3,w3,ror#5
			
 
				+	ld1	{v4.4s},[x16],#16
			
 
				+	add	w7,w7,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w3,ror#19
			
 
				+	eor	w15,w7,w7,ror#11
			
 
				+	rev32	v1.16b,v1.16b
			
 
				+	add	w6,w6,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w7,w8
			
 
				+	eor	w15,w15,w7,ror#20
			
 
				+	add	v4.4s,v4.4s,v1.4s
			
 
				+	add	w6,w6,w11
			
 
				+	ldr	w12,[sp,#20]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w10,w10,w6
			
 
				+	eor	w14,w14,w8
			
 
				+	add	w5,w5,w12
			
 
				+	add	w6,w6,w15
			
 
				+	and	w12,w3,w10
			
 
				+	bic	w15,w4,w10
			
 
				+	eor	w11,w10,w10,ror#5
			
 
				+	add	w6,w6,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w10,ror#19
			
 
				+	eor	w15,w6,w6,ror#11
			
 
				+	add	w5,w5,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w6,w7
			
 
				+	eor	w15,w15,w6,ror#20
			
 
				+	add	w5,w5,w11
			
 
				+	ldr	w12,[sp,#24]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w9,w9,w5
			
 
				+	eor	w13,w13,w7
			
 
				+	add	w4,w4,w12
			
 
				+	add	w5,w5,w15
			
 
				+	and	w12,w10,w9
			
 
				+	bic	w15,w3,w9
			
 
				+	eor	w11,w9,w9,ror#5
			
 
				+	add	w5,w5,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w9,ror#19
			
 
				+	eor	w15,w5,w5,ror#11
			
 
				+	add	w4,w4,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w5,w6
			
 
				+	eor	w15,w15,w5,ror#20
			
 
				+	add	w4,w4,w11
			
 
				+	ldr	w12,[sp,#28]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w8,w8,w4
			
 
				+	eor	w14,w14,w6
			
 
				+	add	w3,w3,w12
			
 
				+	add	w4,w4,w15
			
 
				+	and	w12,w9,w8
			
 
				+	bic	w15,w10,w8
			
 
				+	eor	w11,w8,w8,ror#5
			
 
				+	add	w4,w4,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w8,ror#19
			
 
				+	eor	w15,w4,w4,ror#11
			
 
				+	add	w3,w3,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w4,w5
			
 
				+	eor	w15,w15,w4,ror#20
			
 
				+	add	w3,w3,w11
			
 
				+	ldr	w12,[sp,#32]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w7,w7,w3
			
 
				+	eor	w13,w13,w5
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	add	w10,w10,w12
			
 
				+	add	w3,w3,w15
			
 
				+	and	w12,w8,w7
			
 
				+	ld1	{v2.16b},[x1],#16
			
 
				+	bic	w15,w9,w7
			
 
				+	eor	w11,w7,w7,ror#5
			
 
				+	ld1	{v4.4s},[x16],#16
			
 
				+	add	w3,w3,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w7,ror#19
			
 
				+	eor	w15,w3,w3,ror#11
			
 
				+	rev32	v2.16b,v2.16b
			
 
				+	add	w10,w10,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w3,w4
			
 
				+	eor	w15,w15,w3,ror#20
			
 
				+	add	v4.4s,v4.4s,v2.4s
			
 
				+	add	w10,w10,w11
			
 
				+	ldr	w12,[sp,#36]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w6,w6,w10
			
 
				+	eor	w14,w14,w4
			
 
				+	add	w9,w9,w12
			
 
				+	add	w10,w10,w15
			
 
				+	and	w12,w7,w6
			
 
				+	bic	w15,w8,w6
			
 
				+	eor	w11,w6,w6,ror#5
			
 
				+	add	w10,w10,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w6,ror#19
			
 
				+	eor	w15,w10,w10,ror#11
			
 
				+	add	w9,w9,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w10,w3
			
 
				+	eor	w15,w15,w10,ror#20
			
 
				+	add	w9,w9,w11
			
 
				+	ldr	w12,[sp,#40]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w5,w5,w9
			
 
				+	eor	w13,w13,w3
			
 
				+	add	w8,w8,w12
			
 
				+	add	w9,w9,w15
			
 
				+	and	w12,w6,w5
			
 
				+	bic	w15,w7,w5
			
 
				+	eor	w11,w5,w5,ror#5
			
 
				+	add	w9,w9,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w5,ror#19
			
 
				+	eor	w15,w9,w9,ror#11
			
 
				+	add	w8,w8,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w9,w10
			
 
				+	eor	w15,w15,w9,ror#20
			
 
				+	add	w8,w8,w11
			
 
				+	ldr	w12,[sp,#44]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w4,w4,w8
			
 
				+	eor	w14,w14,w10
			
 
				+	add	w7,w7,w12
			
 
				+	add	w8,w8,w15
			
 
				+	and	w12,w5,w4
			
 
				+	bic	w15,w6,w4
			
 
				+	eor	w11,w4,w4,ror#5
			
 
				+	add	w8,w8,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w4,ror#19
			
 
				+	eor	w15,w8,w8,ror#11
			
 
				+	add	w7,w7,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w8,w9
			
 
				+	eor	w15,w15,w8,ror#20
			
 
				+	add	w7,w7,w11
			
 
				+	ldr	w12,[sp,#48]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w3,w3,w7
			
 
				+	eor	w13,w13,w9
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	add	w6,w6,w12
			
 
				+	add	w7,w7,w15
			
 
				+	and	w12,w4,w3
			
 
				+	ld1	{v3.16b},[x1],#16
			
 
				+	bic	w15,w5,w3
			
 
				+	eor	w11,w3,w3,ror#5
			
 
				+	ld1	{v4.4s},[x16],#16
			
 
				+	add	w7,w7,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w3,ror#19
			
 
				+	eor	w15,w7,w7,ror#11
			
 
				+	rev32	v3.16b,v3.16b
			
 
				+	add	w6,w6,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w7,w8
			
 
				+	eor	w15,w15,w7,ror#20
			
 
				+	add	v4.4s,v4.4s,v3.4s
			
 
				+	add	w6,w6,w11
			
 
				+	ldr	w12,[sp,#52]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w10,w10,w6
			
 
				+	eor	w14,w14,w8
			
 
				+	add	w5,w5,w12
			
 
				+	add	w6,w6,w15
			
 
				+	and	w12,w3,w10
			
 
				+	bic	w15,w4,w10
			
 
				+	eor	w11,w10,w10,ror#5
			
 
				+	add	w6,w6,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w10,ror#19
			
 
				+	eor	w15,w6,w6,ror#11
			
 
				+	add	w5,w5,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w6,w7
			
 
				+	eor	w15,w15,w6,ror#20
			
 
				+	add	w5,w5,w11
			
 
				+	ldr	w12,[sp,#56]
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w9,w9,w5
			
 
				+	eor	w13,w13,w7
			
 
				+	add	w4,w4,w12
			
 
				+	add	w5,w5,w15
			
 
				+	and	w12,w10,w9
			
 
				+	bic	w15,w3,w9
			
 
				+	eor	w11,w9,w9,ror#5
			
 
				+	add	w5,w5,w13
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w9,ror#19
			
 
				+	eor	w15,w5,w5,ror#11
			
 
				+	add	w4,w4,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w13,w5,w6
			
 
				+	eor	w15,w15,w5,ror#20
			
 
				+	add	w4,w4,w11
			
 
				+	ldr	w12,[sp,#60]
			
 
				+	and	w14,w14,w13
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w8,w8,w4
			
 
				+	eor	w14,w14,w6
			
 
				+	add	w3,w3,w12
			
 
				+	add	w4,w4,w15
			
 
				+	and	w12,w9,w8
			
 
				+	bic	w15,w10,w8
			
 
				+	eor	w11,w8,w8,ror#5
			
 
				+	add	w4,w4,w14
			
 
				+	orr	w12,w12,w15
			
 
				+	eor	w11,w11,w8,ror#19
			
 
				+	eor	w15,w4,w4,ror#11
			
 
				+	add	w3,w3,w12
			
 
				+	ror	w11,w11,#6
			
 
				+	eor	w14,w4,w5
			
 
				+	eor	w15,w15,w4,ror#20
			
 
				+	add	w3,w3,w11
			
 
				+	and	w13,w13,w14
			
 
				+	ror	w15,w15,#2
			
 
				+	add	w7,w7,w3
			
 
				+	eor	w13,w13,w5
			
 
				+	st1	{v4.4s},[x17], #16
			
 
				+	add	w3,w3,w15			// h+=Sigma0(a) from the past
			
 
				+	ldp	w11,w12,[x0,#0]
			
 
				+	add	w3,w3,w13			// h+=Maj(a,b,c) from the past
			
 
				+	ldp	w13,w14,[x0,#8]
			
 
				+	add	w3,w3,w11			// accumulate
			
 
				+	add	w4,w4,w12
			
 
				+	ldp	w11,w12,[x0,#16]
			
 
				+	add	w5,w5,w13
			
 
				+	add	w6,w6,w14
			
 
				+	ldp	w13,w14,[x0,#24]
			
 
				+	add	w7,w7,w11
			
 
				+	add	w8,w8,w12
			
 
				+	 ldr	w12,[sp,#0]
			
 
				+	stp	w3,w4,[x0,#0]
			
 
				+	add	w9,w9,w13
			
 
				+	 mov	w13,wzr
			
 
				+	stp	w5,w6,[x0,#8]
			
 
				+	add	w10,w10,w14
			
 
				+	stp	w7,w8,[x0,#16]
			
 
				+	 eor	w14,w4,w5
			
 
				+	stp	w9,w10,[x0,#24]
			
 
				+	 mov	w15,wzr
			
 
				+	 mov	x17,sp
			
 
				+	b.ne	.L_00_48
			
 
				+
			
 
				+	ldr	x29,[x29]
			
 
				+	add	sp,sp,#16*4+16
			
 
				+	ret
			
 
				+.size	sha256_block_neon,.-sha256_block_neon
			
 
				+#ifndef	__KERNEL__
			
 
				+.comm	OPENSSL_armcap_P,4,4
			
 
				+#endif
			
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -0,0 +1,185 @@
 
				+/*
			
 
				+ * Linux/arm64 port of the OpenSSL SHA256 implementation for AArch64
			
 
				+ *
			
 
				+ * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms of the GNU General Public License as published by the Free
			
 
				+ * Software Foundation; either version 2 of the License, or (at your option)
			
 
				+ * any later version.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <asm/hwcap.h>
			
 
				+#include <asm/neon.h>
			
 
				+#include <asm/simd.h>
			
 
				+#include <crypto/internal/hash.h>
			
 
				+#include <crypto/sha.h>
			
 
				+#include <crypto/sha256_base.h>
			
 
				+#include <linux/cryptohash.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash for arm64");
			
 
				+MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
 
				+MODULE_ALIAS_CRYPTO("sha224");
			
 
				+MODULE_ALIAS_CRYPTO("sha256");
			
 
				+
			
 
				+asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
			
 
				+					unsigned int num_blks);
			
 
				+
			
 
				+asmlinkage void sha256_block_neon(u32 *digest, const void *data,
			
 
				+				  unsigned int num_blks);
			
 
				+
			
 
				+static int sha256_update(struct shash_desc *desc, const u8 *data,
			
 
				+			 unsigned int len)
			
 
				+{
			
 
				+	return sha256_base_do_update(desc, data, len,
			
 
				+				(sha256_block_fn *)sha256_block_data_order);
			
 
				+}
			
 
				+
			
 
				+static int sha256_finup(struct shash_desc *desc, const u8 *data,
			
 
				+			unsigned int len, u8 *out)
			
 
				+{
			
 
				+	if (len)
			
 
				+		sha256_base_do_update(desc, data, len,
			
 
				+				(sha256_block_fn *)sha256_block_data_order);
			
 
				+	sha256_base_do_finalize(desc,
			
 
				+				(sha256_block_fn *)sha256_block_data_order);
			
 
				+
			
 
				+	return sha256_base_finish(desc, out);
			
 
				+}
			
 
				+
			
 
				+static int sha256_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	return sha256_finup(desc, NULL, 0, out);
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg algs[] = { {
			
 
				+	.digestsize		= SHA256_DIGEST_SIZE,
			
 
				+	.init			= sha256_base_init,
			
 
				+	.update			= sha256_update,
			
 
				+	.final			= sha256_final,
			
 
				+	.finup			= sha256_finup,
			
 
				+	.descsize		= sizeof(struct sha256_state),
			
 
				+	.base.cra_name		= "sha256",
			
 
				+	.base.cra_driver_name	= "sha256-arm64",
			
 
				+	.base.cra_priority	= 100,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
			
 
				+	.base.cra_blocksize	= SHA256_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+}, {
			
 
				+	.digestsize		= SHA224_DIGEST_SIZE,
			
 
				+	.init			= sha224_base_init,
			
 
				+	.update			= sha256_update,
			
 
				+	.final			= sha256_final,
			
 
				+	.finup			= sha256_finup,
			
 
				+	.descsize		= sizeof(struct sha256_state),
			
 
				+	.base.cra_name		= "sha224",
			
 
				+	.base.cra_driver_name	= "sha224-arm64",
			
 
				+	.base.cra_priority	= 100,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
			
 
				+	.base.cra_blocksize	= SHA224_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+} };
			
 
				+
			
 
				+static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
			
 
				+			      unsigned int len)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Stacking and unstacking a substantial slice of the NEON register
			
 
				+	 * file may significantly affect performance for small updates when
			
 
				+	 * executing in interrupt context, so fall back to the scalar code
			
 
				+	 * in that case.
			
 
				+	 */
			
 
				+	if (!may_use_simd())
			
 
				+		return sha256_base_do_update(desc, data, len,
			
 
				+				(sha256_block_fn *)sha256_block_data_order);
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	sha256_base_do_update(desc, data, len,
			
 
				+				(sha256_block_fn *)sha256_block_neon);
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
			
 
				+			     unsigned int len, u8 *out)
			
 
				+{
			
 
				+	if (!may_use_simd()) {
			
 
				+		if (len)
			
 
				+			sha256_base_do_update(desc, data, len,
			
 
				+				(sha256_block_fn *)sha256_block_data_order);
			
 
				+		sha256_base_do_finalize(desc,
			
 
				+				(sha256_block_fn *)sha256_block_data_order);
			
 
				+	} else {
			
 
				+		kernel_neon_begin();
			
 
				+		if (len)
			
 
				+			sha256_base_do_update(desc, data, len,
			
 
				+				(sha256_block_fn *)sha256_block_neon);
			
 
				+		sha256_base_do_finalize(desc,
			
 
				+				(sha256_block_fn *)sha256_block_neon);
			
 
				+		kernel_neon_end();
			
 
				+	}
			
 
				+	return sha256_base_finish(desc, out);
			
 
				+}
			
 
				+
			
 
				+static int sha256_final_neon(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	return sha256_finup_neon(desc, NULL, 0, out);
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg neon_algs[] = { {
			
 
				+	.digestsize		= SHA256_DIGEST_SIZE,
			
 
				+	.init			= sha256_base_init,
			
 
				+	.update			= sha256_update_neon,
			
 
				+	.final			= sha256_final_neon,
			
 
				+	.finup			= sha256_finup_neon,
			
 
				+	.descsize		= sizeof(struct sha256_state),
			
 
				+	.base.cra_name		= "sha256",
			
 
				+	.base.cra_driver_name	= "sha256-arm64-neon",
			
 
				+	.base.cra_priority	= 150,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
			
 
				+	.base.cra_blocksize	= SHA256_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+}, {
			
 
				+	.digestsize		= SHA224_DIGEST_SIZE,
			
 
				+	.init			= sha224_base_init,
			
 
				+	.update			= sha256_update_neon,
			
 
				+	.final			= sha256_final_neon,
			
 
				+	.finup			= sha256_finup_neon,
			
 
				+	.descsize		= sizeof(struct sha256_state),
			
 
				+	.base.cra_name		= "sha224",
			
 
				+	.base.cra_driver_name	= "sha224-arm64-neon",
			
 
				+	.base.cra_priority	= 150,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
			
 
				+	.base.cra_blocksize	= SHA224_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+} };
			
 
				+
			
 
				+static int __init sha256_mod_init(void)
			
 
				+{
			
 
				+	int ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	if (elf_hwcap & HWCAP_ASIMD) {
			
 
				+		ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
			
 
				+		if (ret)
			
 
				+			crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void __exit sha256_mod_fini(void)
			
 
				+{
			
 
				+	if (elf_hwcap & HWCAP_ASIMD)
			
 
				+		crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
			
 
				+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
			
 
				+}
			
 
				+
			
 
				+module_init(sha256_mod_init);
			
 
				+module_exit(sha256_mod_fini);
			
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -0,0 +1,778 @@
 
				+#! /usr/bin/env perl
			
 
				+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the OpenSSL license (the "License").  You may not use
			
 
				+# this file except in compliance with the License.  You can obtain a copy
			
 
				+# in the file LICENSE in the source distribution or at
			
 
				+# https://www.openssl.org/source/license.html
			
 
				+
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+#
			
 
				+# Permission to use under GPLv2 terms is granted.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# SHA256/512 for ARMv8.
			
 
				+#
			
 
				+# Performance in cycles per processed byte and improvement coefficient
			
 
				+# over code generated with "default" compiler:
			
 
				+#
			
 
				+#		SHA256-hw	SHA256(*)	SHA512
			
 
				+# Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
			
 
				+# Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
			
 
				+# Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
			
 
				+# Denver	2.01		10.5 (+26%)	6.70 (+8%)
			
 
				+# X-Gene			20.0 (+100%)	12.8 (+300%(***))
			
 
				+# Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
			
 
				+#
			
 
				+# (*)	Software SHA256 results are of lesser relevance, presented
			
 
				+#	mostly for informational purposes.
			
 
				+# (**)	The result is a trade-off: it's possible to improve it by
			
 
				+#	10% (or by 1 cycle per round), but at the cost of 20% loss
			
 
				+#	on Cortex-A53 (or by 4 cycles per round).
			
 
				+# (***)	Super-impressive coefficients over gcc-generated code are
			
 
				+#	indication of some compiler "pathology", most notably code
			
 
				+#	generated with -mgeneral-regs-only is significanty faster
			
 
				+#	and the gap is only 40-90%.
			
 
				+#
			
 
				+# October 2016.
			
 
				+#
			
 
				+# Originally it was reckoned that it makes no sense to implement NEON
			
 
				+# version of SHA256 for 64-bit processors. This is because performance
			
 
				+# improvement on most wide-spread Cortex-A5x processors was observed
			
 
				+# to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
			
 
				+# observed that 32-bit NEON SHA256 performs significantly better than
			
 
				+# 64-bit scalar version on *some* of the more recent processors. As
			
 
				+# result 64-bit NEON version of SHA256 was added to provide best
			
 
				+# all-round performance. For example it executes ~30% faster on X-Gene
			
 
				+# and Mongoose. [For reference, NEON version of SHA512 is bound to
			
 
				+# deliver much less improvement, likely *negative* on Cortex-A5x.
			
 
				+# Which is why NEON support is limited to SHA256.]
			
 
				+
			
 
				+$output=pop;
			
 
				+$flavour=pop;
			
 
				+
			
 
				+if ($flavour && $flavour ne "void") {
			
 
				+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
			
 
				+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
			
 
				+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
			
 
				+    die "can't locate arm-xlate.pl";
			
 
				+
			
 
				+    open OUT,"| \"$^X\" $xlate $flavour $output";
			
 
				+    *STDOUT=*OUT;
			
 
				+} else {
			
 
				+    open STDOUT,">$output";
			
 
				+}
			
 
				+
			
 
				+if ($output =~ /512/) {
			
 
				+	$BITS=512;
			
 
				+	$SZ=8;
			
 
				+	@Sigma0=(28,34,39);
			
 
				+	@Sigma1=(14,18,41);
			
 
				+	@sigma0=(1,  8, 7);
			
 
				+	@sigma1=(19,61, 6);
			
 
				+	$rounds=80;
			
 
				+	$reg_t="x";
			
 
				+} else {
			
 
				+	$BITS=256;
			
 
				+	$SZ=4;
			
 
				+	@Sigma0=( 2,13,22);
			
 
				+	@Sigma1=( 6,11,25);
			
 
				+	@sigma0=( 7,18, 3);
			
 
				+	@sigma1=(17,19,10);
			
 
				+	$rounds=64;
			
 
				+	$reg_t="w";
			
 
				+}
			
 
				+
			
 
				+$func="sha${BITS}_block_data_order";
			
 
				+
			
 
				+($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
			
 
				+
			
 
				+@X=map("$reg_t$_",(3..15,0..2));
			
 
				+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
			
 
				+($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
			
 
				+
			
 
				+sub BODY_00_xx {
			
 
				+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
			
 
				+my $j=($i+1)&15;
			
 
				+my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
			
 
				+   $T0=@X[$i+3] if ($i<11);
			
 
				+
			
 
				+$code.=<<___	if ($i<16);
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	@X[$i],@X[$i]			// $i
			
 
				+#endif
			
 
				+___
			
 
				+$code.=<<___	if ($i<13 && ($i&1));
			
 
				+	ldp	@X[$i+1],@X[$i+2],[$inp],#2*$SZ
			
 
				+___
			
 
				+$code.=<<___	if ($i==13);
			
 
				+	ldp	@X[14],@X[15],[$inp]
			
 
				+___
			
 
				+$code.=<<___	if ($i>=14);
			
 
				+	ldr	@X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
			
 
				+___
			
 
				+$code.=<<___	if ($i>0 && $i<16);
			
 
				+	add	$a,$a,$t1			// h+=Sigma0(a)
			
 
				+___
			
 
				+$code.=<<___	if ($i>=11);
			
 
				+	str	@X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
			
 
				+___
			
 
				+# While ARMv8 specifies merged rotate-n-logical operation such as
			
 
				+# 'eor x,y,z,ror#n', it was found to negatively affect performance
			
 
				+# on Apple A7. The reason seems to be that it requires even 'y' to
			
 
				+# be available earlier. This means that such merged instruction is
			
 
				+# not necessarily best choice on critical path... On the other hand
			
 
				+# Cortex-A5x handles merged instructions much better than disjoint
			
 
				+# rotate and logical... See (**) footnote above.
			
 
				+$code.=<<___	if ($i<15);
			
 
				+	ror	$t0,$e,#$Sigma1[0]
			
 
				+	add	$h,$h,$t2			// h+=K[i]
			
 
				+	eor	$T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
			
 
				+	and	$t1,$f,$e
			
 
				+	bic	$t2,$g,$e
			
 
				+	add	$h,$h,@X[$i&15]			// h+=X[i]
			
 
				+	orr	$t1,$t1,$t2			// Ch(e,f,g)
			
 
				+	eor	$t2,$a,$b			// a^b, b^c in next round
			
 
				+	eor	$t0,$t0,$T0,ror#$Sigma1[1]	// Sigma1(e)
			
 
				+	ror	$T0,$a,#$Sigma0[0]
			
 
				+	add	$h,$h,$t1			// h+=Ch(e,f,g)
			
 
				+	eor	$t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
			
 
				+	add	$h,$h,$t0			// h+=Sigma1(e)
			
 
				+	and	$t3,$t3,$t2			// (b^c)&=(a^b)
			
 
				+	add	$d,$d,$h			// d+=h
			
 
				+	eor	$t3,$t3,$b			// Maj(a,b,c)
			
 
				+	eor	$t1,$T0,$t1,ror#$Sigma0[1]	// Sigma0(a)
			
 
				+	add	$h,$h,$t3			// h+=Maj(a,b,c)
			
 
				+	ldr	$t3,[$Ktbl],#$SZ		// *K++, $t2 in next round
			
 
				+	//add	$h,$h,$t1			// h+=Sigma0(a)
			
 
				+___
			
 
				+$code.=<<___	if ($i>=15);
			
 
				+	ror	$t0,$e,#$Sigma1[0]
			
 
				+	add	$h,$h,$t2			// h+=K[i]
			
 
				+	ror	$T1,@X[($j+1)&15],#$sigma0[0]
			
 
				+	and	$t1,$f,$e
			
 
				+	ror	$T2,@X[($j+14)&15],#$sigma1[0]
			
 
				+	bic	$t2,$g,$e
			
 
				+	ror	$T0,$a,#$Sigma0[0]
			
 
				+	add	$h,$h,@X[$i&15]			// h+=X[i]
			
 
				+	eor	$t0,$t0,$e,ror#$Sigma1[1]
			
 
				+	eor	$T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
			
 
				+	orr	$t1,$t1,$t2			// Ch(e,f,g)
			
 
				+	eor	$t2,$a,$b			// a^b, b^c in next round
			
 
				+	eor	$t0,$t0,$e,ror#$Sigma1[2]	// Sigma1(e)
			
 
				+	eor	$T0,$T0,$a,ror#$Sigma0[1]
			
 
				+	add	$h,$h,$t1			// h+=Ch(e,f,g)
			
 
				+	and	$t3,$t3,$t2			// (b^c)&=(a^b)
			
 
				+	eor	$T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
			
 
				+	eor	$T1,$T1,@X[($j+1)&15],lsr#$sigma0[2]	// sigma0(X[i+1])
			
 
				+	add	$h,$h,$t0			// h+=Sigma1(e)
			
 
				+	eor	$t3,$t3,$b			// Maj(a,b,c)
			
 
				+	eor	$t1,$T0,$a,ror#$Sigma0[2]	// Sigma0(a)
			
 
				+	eor	$T2,$T2,@X[($j+14)&15],lsr#$sigma1[2]	// sigma1(X[i+14])
			
 
				+	add	@X[$j],@X[$j],@X[($j+9)&15]
			
 
				+	add	$d,$d,$h			// d+=h
			
 
				+	add	$h,$h,$t3			// h+=Maj(a,b,c)
			
 
				+	ldr	$t3,[$Ktbl],#$SZ		// *K++, $t2 in next round
			
 
				+	add	@X[$j],@X[$j],$T1
			
 
				+	add	$h,$h,$t1			// h+=Sigma0(a)
			
 
				+	add	@X[$j],@X[$j],$T2
			
 
				+___
			
 
				+	($t2,$t3)=($t3,$t2);
			
 
				+}
			
 
				+
			
 
				+$code.=<<___;
			
 
				+#ifndef	__KERNEL__
			
 
				+# include "arm_arch.h"
			
 
				+#endif
			
 
				+
			
 
				+.text
			
 
				+
			
 
				+.extern	OPENSSL_armcap_P
			
 
				+.globl	$func
			
 
				+.type	$func,%function
			
 
				+.align	6
			
 
				+$func:
			
 
				+___
			
 
				+$code.=<<___	if ($SZ==4);
			
 
				+#ifndef	__KERNEL__
			
 
				+# ifdef	__ILP32__
			
 
				+	ldrsw	x16,.LOPENSSL_armcap_P
			
 
				+# else
			
 
				+	ldr	x16,.LOPENSSL_armcap_P
			
 
				+# endif
			
 
				+	adr	x17,.LOPENSSL_armcap_P
			
 
				+	add	x16,x16,x17
			
 
				+	ldr	w16,[x16]
			
 
				+	tst	w16,#ARMV8_SHA256
			
 
				+	b.ne	.Lv8_entry
			
 
				+	tst	w16,#ARMV7_NEON
			
 
				+	b.ne	.Lneon_entry
			
 
				+#endif
			
 
				+___
			
 
				+$code.=<<___;
			
 
				+	stp	x29,x30,[sp,#-128]!
			
 
				+	add	x29,sp,#0
			
 
				+
			
 
				+	stp	x19,x20,[sp,#16]
			
 
				+	stp	x21,x22,[sp,#32]
			
 
				+	stp	x23,x24,[sp,#48]
			
 
				+	stp	x25,x26,[sp,#64]
			
 
				+	stp	x27,x28,[sp,#80]
			
 
				+	sub	sp,sp,#4*$SZ
			
 
				+
			
 
				+	ldp	$A,$B,[$ctx]				// load context
			
 
				+	ldp	$C,$D,[$ctx,#2*$SZ]
			
 
				+	ldp	$E,$F,[$ctx,#4*$SZ]
			
 
				+	add	$num,$inp,$num,lsl#`log(16*$SZ)/log(2)`	// end of input
			
 
				+	ldp	$G,$H,[$ctx,#6*$SZ]
			
 
				+	adr	$Ktbl,.LK$BITS
			
 
				+	stp	$ctx,$num,[x29,#96]
			
 
				+
			
 
				+.Loop:
			
 
				+	ldp	@X[0],@X[1],[$inp],#2*$SZ
			
 
				+	ldr	$t2,[$Ktbl],#$SZ			// *K++
			
 
				+	eor	$t3,$B,$C				// magic seed
			
 
				+	str	$inp,[x29,#112]
			
 
				+___
			
 
				+for ($i=0;$i<16;$i++)	{ &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
			
 
				+$code.=".Loop_16_xx:\n";
			
 
				+for (;$i<32;$i++)	{ &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
			
 
				+$code.=<<___;
			
 
				+	cbnz	$t2,.Loop_16_xx
			
 
				+
			
 
				+	ldp	$ctx,$num,[x29,#96]
			
 
				+	ldr	$inp,[x29,#112]
			
 
				+	sub	$Ktbl,$Ktbl,#`$SZ*($rounds+1)`		// rewind
			
 
				+
			
 
				+	ldp	@X[0],@X[1],[$ctx]
			
 
				+	ldp	@X[2],@X[3],[$ctx,#2*$SZ]
			
 
				+	add	$inp,$inp,#14*$SZ			// advance input pointer
			
 
				+	ldp	@X[4],@X[5],[$ctx,#4*$SZ]
			
 
				+	add	$A,$A,@X[0]
			
 
				+	ldp	@X[6],@X[7],[$ctx,#6*$SZ]
			
 
				+	add	$B,$B,@X[1]
			
 
				+	add	$C,$C,@X[2]
			
 
				+	add	$D,$D,@X[3]
			
 
				+	stp	$A,$B,[$ctx]
			
 
				+	add	$E,$E,@X[4]
			
 
				+	add	$F,$F,@X[5]
			
 
				+	stp	$C,$D,[$ctx,#2*$SZ]
			
 
				+	add	$G,$G,@X[6]
			
 
				+	add	$H,$H,@X[7]
			
 
				+	cmp	$inp,$num
			
 
				+	stp	$E,$F,[$ctx,#4*$SZ]
			
 
				+	stp	$G,$H,[$ctx,#6*$SZ]
			
 
				+	b.ne	.Loop
			
 
				+
			
 
				+	ldp	x19,x20,[x29,#16]
			
 
				+	add	sp,sp,#4*$SZ
			
 
				+	ldp	x21,x22,[x29,#32]
			
 
				+	ldp	x23,x24,[x29,#48]
			
 
				+	ldp	x25,x26,[x29,#64]
			
 
				+	ldp	x27,x28,[x29,#80]
			
 
				+	ldp	x29,x30,[sp],#128
			
 
				+	ret
			
 
				+.size	$func,.-$func
			
 
				+
			
 
				+.align	6
			
 
				+.type	.LK$BITS,%object
			
 
				+.LK$BITS:
			
 
				+___
			
 
				+$code.=<<___ if ($SZ==8);
			
 
				+	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
			
 
				+	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
			
 
				+	.quad	0x3956c25bf348b538,0x59f111f1b605d019
			
 
				+	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
			
 
				+	.quad	0xd807aa98a3030242,0x12835b0145706fbe
			
 
				+	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
			
 
				+	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
			
 
				+	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
			
 
				+	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
			
 
				+	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
			
 
				+	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
			
 
				+	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
			
 
				+	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
			
 
				+	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
			
 
				+	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
			
 
				+	.quad	0x06ca6351e003826f,0x142929670a0e6e70
			
 
				+	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
			
 
				+	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
			
 
				+	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
			
 
				+	.quad	0x81c2c92e47edaee6,0x92722c851482353b
			
 
				+	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
			
 
				+	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
			
 
				+	.quad	0xd192e819d6ef5218,0xd69906245565a910
			
 
				+	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
			
 
				+	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
			
 
				+	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
			
 
				+	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
			
 
				+	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
			
 
				+	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
			
 
				+	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
			
 
				+	.quad	0x90befffa23631e28,0xa4506cebde82bde9
			
 
				+	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
			
 
				+	.quad	0xca273eceea26619c,0xd186b8c721c0c207
			
 
				+	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
			
 
				+	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
			
 
				+	.quad	0x113f9804bef90dae,0x1b710b35131c471b
			
 
				+	.quad	0x28db77f523047d84,0x32caab7b40c72493
			
 
				+	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
			
 
				+	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
			
 
				+	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
			
 
				+	.quad	0	// terminator
			
 
				+___
			
 
				+$code.=<<___ if ($SZ==4);
			
 
				+	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
			
 
				+	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
			
 
				+	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
			
 
				+	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
			
 
				+	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
			
 
				+	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
			
 
				+	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
			
 
				+	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
			
 
				+	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
			
 
				+	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
			
 
				+	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
			
 
				+	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
			
 
				+	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
			
 
				+	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
			
 
				+	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
			
 
				+	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
			
 
				+	.long	0	//terminator
			
 
				+___
			
 
				+$code.=<<___;
			
 
				+.size	.LK$BITS,.-.LK$BITS
			
 
				+#ifndef	__KERNEL__
			
 
				+.align	3
			
 
				+.LOPENSSL_armcap_P:
			
 
				+# ifdef	__ILP32__
			
 
				+	.long	OPENSSL_armcap_P-.
			
 
				+# else
			
 
				+	.quad	OPENSSL_armcap_P-.
			
 
				+# endif
			
 
				+#endif
			
 
				+.asciz	"SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
			
 
				+.align	2
			
 
				+___
			
 
				+
			
 
				+if ($SZ==4) {
			
 
				+my $Ktbl="x3";
			
 
				+
			
 
				+my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
			
 
				+my @MSG=map("v$_.16b",(4..7));
			
 
				+my ($W0,$W1)=("v16.4s","v17.4s");
			
 
				+my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
			
 
				+
			
 
				+$code.=<<___;
			
 
				+#ifndef	__KERNEL__
			
 
				+.type	sha256_block_armv8,%function
			
 
				+.align	6
			
 
				+sha256_block_armv8:
			
 
				+.Lv8_entry:
			
 
				+	stp		x29,x30,[sp,#-16]!
			
 
				+	add		x29,sp,#0
			
 
				+
			
 
				+	ld1.32		{$ABCD,$EFGH},[$ctx]
			
 
				+	adr		$Ktbl,.LK256
			
 
				+
			
 
				+.Loop_hw:
			
 
				+	ld1		{@MSG[0]-@MSG[3]},[$inp],#64
			
 
				+	sub		$num,$num,#1
			
 
				+	ld1.32		{$W0},[$Ktbl],#16
			
 
				+	rev32		@MSG[0],@MSG[0]
			
 
				+	rev32		@MSG[1],@MSG[1]
			
 
				+	rev32		@MSG[2],@MSG[2]
			
 
				+	rev32		@MSG[3],@MSG[3]
			
 
				+	orr		$ABCD_SAVE,$ABCD,$ABCD		// offload
			
 
				+	orr		$EFGH_SAVE,$EFGH,$EFGH
			
 
				+___
			
 
				+for($i=0;$i<12;$i++) {
			
 
				+$code.=<<___;
			
 
				+	ld1.32		{$W1},[$Ktbl],#16
			
 
				+	add.i32		$W0,$W0,@MSG[0]
			
 
				+	sha256su0	@MSG[0],@MSG[1]
			
 
				+	orr		$abcd,$ABCD,$ABCD
			
 
				+	sha256h		$ABCD,$EFGH,$W0
			
 
				+	sha256h2	$EFGH,$abcd,$W0
			
 
				+	sha256su1	@MSG[0],@MSG[2],@MSG[3]
			
 
				+___
			
 
				+	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
			
 
				+}
			
 
				+$code.=<<___;
			
 
				+	ld1.32		{$W1},[$Ktbl],#16
			
 
				+	add.i32		$W0,$W0,@MSG[0]
			
 
				+	orr		$abcd,$ABCD,$ABCD
			
 
				+	sha256h		$ABCD,$EFGH,$W0
			
 
				+	sha256h2	$EFGH,$abcd,$W0
			
 
				+
			
 
				+	ld1.32		{$W0},[$Ktbl],#16
			
 
				+	add.i32		$W1,$W1,@MSG[1]
			
 
				+	orr		$abcd,$ABCD,$ABCD
			
 
				+	sha256h		$ABCD,$EFGH,$W1
			
 
				+	sha256h2	$EFGH,$abcd,$W1
			
 
				+
			
 
				+	ld1.32		{$W1},[$Ktbl]
			
 
				+	add.i32		$W0,$W0,@MSG[2]
			
 
				+	sub		$Ktbl,$Ktbl,#$rounds*$SZ-16	// rewind
			
 
				+	orr		$abcd,$ABCD,$ABCD
			
 
				+	sha256h		$ABCD,$EFGH,$W0
			
 
				+	sha256h2	$EFGH,$abcd,$W0
			
 
				+
			
 
				+	add.i32		$W1,$W1,@MSG[3]
			
 
				+	orr		$abcd,$ABCD,$ABCD
			
 
				+	sha256h		$ABCD,$EFGH,$W1
			
 
				+	sha256h2	$EFGH,$abcd,$W1
			
 
				+
			
 
				+	add.i32		$ABCD,$ABCD,$ABCD_SAVE
			
 
				+	add.i32		$EFGH,$EFGH,$EFGH_SAVE
			
 
				+
			
 
				+	cbnz		$num,.Loop_hw
			
 
				+
			
 
				+	st1.32		{$ABCD,$EFGH},[$ctx]
			
 
				+
			
 
				+	ldr		x29,[sp],#16
			
 
				+	ret
			
 
				+.size	sha256_block_armv8,.-sha256_block_armv8
			
 
				+#endif
			
 
				+___
			
 
				+}
			
 
				+
			
 
				+if ($SZ==4) {	######################################### NEON stuff #
			
 
				+# You'll surely note a lot of similarities with sha256-armv4 module,
			
 
				+# and of course it's not a coincidence. sha256-armv4 was used as
			
 
				+# initial template, but was adapted for ARMv8 instruction set and
			
 
				+# extensively re-tuned for all-round performance.
			
 
				+
			
 
				+my @V = ($A,$B,$C,$D,$E,$F,$G,$H) = map("w$_",(3..10));
			
 
				+my ($t0,$t1,$t2,$t3,$t4) = map("w$_",(11..15));
			
 
				+my $Ktbl="x16";
			
 
				+my $Xfer="x17";
			
 
				+my @X = map("q$_",(0..3));
			
 
				+my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7) = map("q$_",(4..7,16..19));
			
 
				+my $j=0;
			
 
				+
			
 
				+sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
			
 
				+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
			
 
				+  my $arg = pop;
			
 
				+    $arg = "#$arg" if ($arg*1 eq $arg);
			
 
				+    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
			
 
				+}
			
 
				+
			
 
				+sub Dscalar { shift =~ m|[qv]([0-9]+)|?"d$1":""; }
			
 
				+sub Dlo     { shift =~ m|[qv]([0-9]+)|?"v$1.d[0]":""; }
			
 
				+sub Dhi     { shift =~ m|[qv]([0-9]+)|?"v$1.d[1]":""; }
			
 
				+
			
 
				+sub Xupdate()
			
 
				+{ use integer;
			
 
				+  my $body = shift;
			
 
				+  my @insns = (&$body,&$body,&$body,&$body);
			
 
				+  my ($a,$b,$c,$d,$e,$f,$g,$h);
			
 
				+
			
 
				+	&ext_8		($T0,@X[0],@X[1],4);	# X[1..4]
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&ext_8		($T3,@X[2],@X[3],4);	# X[9..12]
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&mov		(&Dscalar($T7),&Dhi(@X[3]));	# X[14..15]
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&ushr_32	($T2,$T0,$sigma0[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	&ushr_32	($T1,$T0,$sigma0[2]);
			
 
				+	 eval(shift(@insns));
			
 
				+	&add_32 	(@X[0],@X[0],$T3);	# X[0..3] += X[9..12]
			
 
				+	 eval(shift(@insns));
			
 
				+	&sli_32		($T2,$T0,32-$sigma0[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&ushr_32	($T3,$T0,$sigma0[1]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&eor_8		($T1,$T1,$T2);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&sli_32		($T3,$T0,32-$sigma0[1]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &ushr_32	($T4,$T7,$sigma1[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&eor_8		($T1,$T1,$T3);		# sigma0(X[1..4])
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &sli_32	($T4,$T7,32-$sigma1[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &ushr_32	($T5,$T7,$sigma1[2]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &ushr_32	($T3,$T7,$sigma1[1]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&add_32		(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4])
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &sli_u32	($T3,$T7,32-$sigma1[1]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &eor_8	($T5,$T5,$T4);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &eor_8	($T5,$T5,$T3);		# sigma1(X[14..15])
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&add_32		(@X[0],@X[0],$T5);	# X[0..1] += sigma1(X[14..15])
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &ushr_32	($T6,@X[0],$sigma1[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	  &ushr_32	($T7,@X[0],$sigma1[2]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &sli_32	($T6,@X[0],32-$sigma1[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	  &ushr_32	($T5,@X[0],$sigma1[1]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &eor_8	($T7,$T7,$T6);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	  &sli_32	($T5,@X[0],32-$sigma1[1]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&ld1_32		("{$T0}","[$Ktbl], #16");
			
 
				+	 eval(shift(@insns));
			
 
				+	  &eor_8	($T7,$T7,$T5);		# sigma1(X[16..17])
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&eor_8		($T5,$T5,$T5);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&mov		(&Dhi($T5), &Dlo($T7));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&add_32		(@X[0],@X[0],$T5);	# X[2..3] += sigma1(X[16..17])
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&add_32		($T0,$T0,@X[0]);
			
 
				+	 while($#insns>=1) { eval(shift(@insns)); }
			
 
				+	&st1_32		("{$T0}","[$Xfer], #16");
			
 
				+	 eval(shift(@insns));
			
 
				+
			
 
				+	push(@X,shift(@X));		# "rotate" X[]
			
 
				+}
			
 
				+
			
 
				+sub Xpreload()
			
 
				+{ use integer;
			
 
				+  my $body = shift;
			
 
				+  my @insns = (&$body,&$body,&$body,&$body);
			
 
				+  my ($a,$b,$c,$d,$e,$f,$g,$h);
			
 
				+
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&ld1_8		("{@X[0]}","[$inp],#16");
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&ld1_32		("{$T0}","[$Ktbl],#16");
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&rev32		(@X[0],@X[0]);
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	 eval(shift(@insns));
			
 
				+	&add_32		($T0,$T0,@X[0]);
			
 
				+	 foreach (@insns) { eval; }	# remaining instructions
			
 
				+	&st1_32		("{$T0}","[$Xfer], #16");
			
 
				+
			
 
				+	push(@X,shift(@X));		# "rotate" X[]
			
 
				+}
			
 
				+
			
 
				+sub body_00_15 () {
			
 
				+	(
			
 
				+	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
			
 
				+	'&add	($h,$h,$t1)',			# h+=X[i]+K[i]
			
 
				+	'&add	($a,$a,$t4);'.			# h+=Sigma0(a) from the past
			
 
				+	'&and	($t1,$f,$e)',
			
 
				+	'&bic	($t4,$g,$e)',
			
 
				+	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
			
 
				+	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past
			
 
				+	'&orr	($t1,$t1,$t4)',			# Ch(e,f,g)
			
 
				+	'&eor	($t0,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e)
			
 
				+	'&eor	($t4,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
			
 
				+	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g)
			
 
				+	'&ror	($t0,$t0,"#$Sigma1[0]")',
			
 
				+	'&eor	($t2,$a,$b)',			# a^b, b^c in next round
			
 
				+	'&eor	($t4,$t4,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a)
			
 
				+	'&add	($h,$h,$t0)',			# h+=Sigma1(e)
			
 
				+	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'.
			
 
				+	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'.
			
 
				+	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b)
			
 
				+	'&ror	($t4,$t4,"#$Sigma0[0]")',
			
 
				+	'&add	($d,$d,$h)',			# d+=h
			
 
				+	'&eor	($t3,$t3,$b)',			# Maj(a,b,c)
			
 
				+	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+$code.=<<___;
			
 
				+#ifdef	__KERNEL__
			
 
				+.globl	sha256_block_neon
			
 
				+#endif
			
 
				+.type	sha256_block_neon,%function
			
 
				+.align	4
			
 
				+sha256_block_neon:
			
 
				+.Lneon_entry:
			
 
				+	stp	x29, x30, [sp, #-16]!
			
 
				+	mov	x29, sp
			
 
				+	sub	sp,sp,#16*4
			
 
				+
			
 
				+	adr	$Ktbl,.LK256
			
 
				+	add	$num,$inp,$num,lsl#6	// len to point at the end of inp
			
 
				+
			
 
				+	ld1.8	{@X[0]},[$inp], #16
			
 
				+	ld1.8	{@X[1]},[$inp], #16
			
 
				+	ld1.8	{@X[2]},[$inp], #16
			
 
				+	ld1.8	{@X[3]},[$inp], #16
			
 
				+	ld1.32	{$T0},[$Ktbl], #16
			
 
				+	ld1.32	{$T1},[$Ktbl], #16
			
 
				+	ld1.32	{$T2},[$Ktbl], #16
			
 
				+	ld1.32	{$T3},[$Ktbl], #16
			
 
				+	rev32	@X[0],@X[0]		// yes, even on
			
 
				+	rev32	@X[1],@X[1]		// big-endian
			
 
				+	rev32	@X[2],@X[2]
			
 
				+	rev32	@X[3],@X[3]
			
 
				+	mov	$Xfer,sp
			
 
				+	add.32	$T0,$T0,@X[0]
			
 
				+	add.32	$T1,$T1,@X[1]
			
 
				+	add.32	$T2,$T2,@X[2]
			
 
				+	st1.32	{$T0-$T1},[$Xfer], #32
			
 
				+	add.32	$T3,$T3,@X[3]
			
 
				+	st1.32	{$T2-$T3},[$Xfer]
			
 
				+	sub	$Xfer,$Xfer,#32
			
 
				+
			
 
				+	ldp	$A,$B,[$ctx]
			
 
				+	ldp	$C,$D,[$ctx,#8]
			
 
				+	ldp	$E,$F,[$ctx,#16]
			
 
				+	ldp	$G,$H,[$ctx,#24]
			
 
				+	ldr	$t1,[sp,#0]
			
 
				+	mov	$t2,wzr
			
 
				+	eor	$t3,$B,$C
			
 
				+	mov	$t4,wzr
			
 
				+	b	.L_00_48
			
 
				+
			
 
				+.align	4
			
 
				+.L_00_48:
			
 
				+___
			
 
				+	&Xupdate(\&body_00_15);
			
 
				+	&Xupdate(\&body_00_15);
			
 
				+	&Xupdate(\&body_00_15);
			
 
				+	&Xupdate(\&body_00_15);
			
 
				+$code.=<<___;
			
 
				+	cmp	$t1,#0				// check for K256 terminator
			
 
				+	ldr	$t1,[sp,#0]
			
 
				+	sub	$Xfer,$Xfer,#64
			
 
				+	bne	.L_00_48
			
 
				+
			
 
				+	sub	$Ktbl,$Ktbl,#256		// rewind $Ktbl
			
 
				+	cmp	$inp,$num
			
 
				+	mov	$Xfer, #64
			
 
				+	csel	$Xfer, $Xfer, xzr, eq
			
 
				+	sub	$inp,$inp,$Xfer			// avoid SEGV
			
 
				+	mov	$Xfer,sp
			
 
				+___
			
 
				+	&Xpreload(\&body_00_15);
			
 
				+	&Xpreload(\&body_00_15);
			
 
				+	&Xpreload(\&body_00_15);
			
 
				+	&Xpreload(\&body_00_15);
			
 
				+$code.=<<___;
			
 
				+	add	$A,$A,$t4			// h+=Sigma0(a) from the past
			
 
				+	ldp	$t0,$t1,[$ctx,#0]
			
 
				+	add	$A,$A,$t2			// h+=Maj(a,b,c) from the past
			
 
				+	ldp	$t2,$t3,[$ctx,#8]
			
 
				+	add	$A,$A,$t0			// accumulate
			
 
				+	add	$B,$B,$t1
			
 
				+	ldp	$t0,$t1,[$ctx,#16]
			
 
				+	add	$C,$C,$t2
			
 
				+	add	$D,$D,$t3
			
 
				+	ldp	$t2,$t3,[$ctx,#24]
			
 
				+	add	$E,$E,$t0
			
 
				+	add	$F,$F,$t1
			
 
				+	 ldr	$t1,[sp,#0]
			
 
				+	stp	$A,$B,[$ctx,#0]
			
 
				+	add	$G,$G,$t2
			
 
				+	 mov	$t2,wzr
			
 
				+	stp	$C,$D,[$ctx,#8]
			
 
				+	add	$H,$H,$t3
			
 
				+	stp	$E,$F,[$ctx,#16]
			
 
				+	 eor	$t3,$B,$C
			
 
				+	stp	$G,$H,[$ctx,#24]
			
 
				+	 mov	$t4,wzr
			
 
				+	 mov	$Xfer,sp
			
 
				+	b.ne	.L_00_48
			
 
				+
			
 
				+	ldr	x29,[x29]
			
 
				+	add	sp,sp,#16*4+16
			
 
				+	ret
			
 
				+.size	sha256_block_neon,.-sha256_block_neon
			
 
				+___
			
 
				+}
			
 
				+
			
 
				+$code.=<<___;
			
 
				+#ifndef	__KERNEL__
			
 
				+.comm	OPENSSL_armcap_P,4,4
			
 
				+#endif
			
 
				+___
			
 
				+
			
 
				+{   my  %opcode = (
			
 
				+	"sha256h"	=> 0x5e004000,	"sha256h2"	=> 0x5e005000,
			
 
				+	"sha256su0"	=> 0x5e282800,	"sha256su1"	=> 0x5e006000	);
			
 
				+
			
 
				+    sub unsha256 {
			
 
				+	my ($mnemonic,$arg)=@_;
			
 
				+
			
 
				+	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
			
 
				+	&&
			
 
				+	sprintf ".inst\t0x%08x\t//%s %s",
			
 
				+			$opcode{$mnemonic}|$1|($2<<5)|($3<<16),
			
 
				+			$mnemonic,$arg;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+open SELF,$0;
			
 
				+while(<SELF>) {
			
 
				+        next if (/^#!/);
			
 
				+        last if (!s/^#/\/\// and !/^$/);
			
 
				+        print;
			
 
				+}
			
 
				+close SELF;
			
 
				+
			
 
				+foreach(split("\n",$code)) {
			
 
				+
			
 
				+	s/\`([^\`]*)\`/eval($1)/ge;
			
 
				+
			
 
				+	s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;
			
 
				+
			
 
				+	s/\bq([0-9]+)\b/v$1.16b/g;		# old->new registers
			
 
				+
			
 
				+	s/\.[ui]?8(\s)/$1/;
			
 
				+	s/\.\w?32\b//		and s/\.16b/\.4s/g;
			
 
				+	m/(ld|st)1[^\[]+\[0\]/	and s/\.4s/\.s/g;
			
 
				+
			
 
				+	print $_,"\n";
			
 
				+}
			
 
				+
			
 
				+close STDOUT;
			
--- a/arch/arm64/crypto/sha512-core.S_shipped
+++ b/arch/arm64/crypto/sha512-core.S_shipped
@@ -0,0 +1,1085 @@
 
				+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the OpenSSL license (the "License").  You may not use
			
 
				+// this file except in compliance with the License.  You can obtain a copy
			
 
				+// in the file LICENSE in the source distribution or at
			
 
				+// https://www.openssl.org/source/license.html
			
 
				+
			
 
				+// ====================================================================
			
 
				+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+// project. The module is, however, dual licensed under OpenSSL and
			
 
				+// CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+// details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+//
			
 
				+// Permission to use under GPLv2 terms is granted.
			
 
				+// ====================================================================
			
 
				+//
			
 
				+// SHA256/512 for ARMv8.
			
 
				+//
			
 
				+// Performance in cycles per processed byte and improvement coefficient
			
 
				+// over code generated with "default" compiler:
			
 
				+//
			
 
				+//		SHA256-hw	SHA256(*)	SHA512
			
 
				+// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
			
 
				+// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
			
 
				+// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
			
 
				+// Denver	2.01		10.5 (+26%)	6.70 (+8%)
			
 
				+// X-Gene			20.0 (+100%)	12.8 (+300%(***))
			
 
				+// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
			
 
				+//
			
 
				+// (*)	Software SHA256 results are of lesser relevance, presented
			
 
				+//	mostly for informational purposes.
			
 
				+// (**)	The result is a trade-off: it's possible to improve it by
			
 
				+//	10% (or by 1 cycle per round), but at the cost of 20% loss
			
 
				+//	on Cortex-A53 (or by 4 cycles per round).
			
 
				+// (***)	Super-impressive coefficients over gcc-generated code are
			
 
				+//	indication of some compiler "pathology", most notably code
			
 
				+//	generated with -mgeneral-regs-only is significanty faster
			
 
				+//	and the gap is only 40-90%.
			
 
				+//
			
 
				+// October 2016.
			
 
				+//
			
 
				+// Originally it was reckoned that it makes no sense to implement NEON
			
 
				+// version of SHA256 for 64-bit processors. This is because performance
			
 
				+// improvement on most wide-spread Cortex-A5x processors was observed
			
 
				+// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
			
 
				+// observed that 32-bit NEON SHA256 performs significantly better than
			
 
				+// 64-bit scalar version on *some* of the more recent processors. As
			
 
				+// result 64-bit NEON version of SHA256 was added to provide best
			
 
				+// all-round performance. For example it executes ~30% faster on X-Gene
			
 
				+// and Mongoose. [For reference, NEON version of SHA512 is bound to
			
 
				+// deliver much less improvement, likely *negative* on Cortex-A5x.
			
 
				+// Which is why NEON support is limited to SHA256.]
			
 
				+
			
 
				+#ifndef	__KERNEL__
			
 
				+# include "arm_arch.h"
			
 
				+#endif
			
 
				+
			
 
				+.text
			
 
				+
			
 
				+.extern	OPENSSL_armcap_P
			
 
				+.globl	sha512_block_data_order
			
 
				+.type	sha512_block_data_order,%function
			
 
				+.align	6
			
 
				+sha512_block_data_order:
			
 
				+	stp	x29,x30,[sp,#-128]!
			
 
				+	add	x29,sp,#0
			
 
				+
			
 
				+	stp	x19,x20,[sp,#16]
			
 
				+	stp	x21,x22,[sp,#32]
			
 
				+	stp	x23,x24,[sp,#48]
			
 
				+	stp	x25,x26,[sp,#64]
			
 
				+	stp	x27,x28,[sp,#80]
			
 
				+	sub	sp,sp,#4*8
			
 
				+
			
 
				+	ldp	x20,x21,[x0]				// load context
			
 
				+	ldp	x22,x23,[x0,#2*8]
			
 
				+	ldp	x24,x25,[x0,#4*8]
			
 
				+	add	x2,x1,x2,lsl#7	// end of input
			
 
				+	ldp	x26,x27,[x0,#6*8]
			
 
				+	adr	x30,.LK512
			
 
				+	stp	x0,x2,[x29,#96]
			
 
				+
			
 
				+.Loop:
			
 
				+	ldp	x3,x4,[x1],#2*8
			
 
				+	ldr	x19,[x30],#8			// *K++
			
 
				+	eor	x28,x21,x22				// magic seed
			
 
				+	str	x1,[x29,#112]
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x3,x3			// 0
			
 
				+#endif
			
 
				+	ror	x16,x24,#14
			
 
				+	add	x27,x27,x19			// h+=K[i]
			
 
				+	eor	x6,x24,x24,ror#23
			
 
				+	and	x17,x25,x24
			
 
				+	bic	x19,x26,x24
			
 
				+	add	x27,x27,x3			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x20,x21			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x6,ror#18	// Sigma1(e)
			
 
				+	ror	x6,x20,#28
			
 
				+	add	x27,x27,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x20,x20,ror#5
			
 
				+	add	x27,x27,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x23,x23,x27			// d+=h
			
 
				+	eor	x28,x28,x21			// Maj(a,b,c)
			
 
				+	eor	x17,x6,x17,ror#34	// Sigma0(a)
			
 
				+	add	x27,x27,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x27,x27,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x4,x4			// 1
			
 
				+#endif
			
 
				+	ldp	x5,x6,[x1],#2*8
			
 
				+	add	x27,x27,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x23,#14
			
 
				+	add	x26,x26,x28			// h+=K[i]
			
 
				+	eor	x7,x23,x23,ror#23
			
 
				+	and	x17,x24,x23
			
 
				+	bic	x28,x25,x23
			
 
				+	add	x26,x26,x4			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x27,x20			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x7,ror#18	// Sigma1(e)
			
 
				+	ror	x7,x27,#28
			
 
				+	add	x26,x26,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x27,x27,ror#5
			
 
				+	add	x26,x26,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x22,x22,x26			// d+=h
			
 
				+	eor	x19,x19,x20			// Maj(a,b,c)
			
 
				+	eor	x17,x7,x17,ror#34	// Sigma0(a)
			
 
				+	add	x26,x26,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x26,x26,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x5,x5			// 2
			
 
				+#endif
			
 
				+	add	x26,x26,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x22,#14
			
 
				+	add	x25,x25,x19			// h+=K[i]
			
 
				+	eor	x8,x22,x22,ror#23
			
 
				+	and	x17,x23,x22
			
 
				+	bic	x19,x24,x22
			
 
				+	add	x25,x25,x5			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x26,x27			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x8,ror#18	// Sigma1(e)
			
 
				+	ror	x8,x26,#28
			
 
				+	add	x25,x25,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x26,x26,ror#5
			
 
				+	add	x25,x25,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x21,x21,x25			// d+=h
			
 
				+	eor	x28,x28,x27			// Maj(a,b,c)
			
 
				+	eor	x17,x8,x17,ror#34	// Sigma0(a)
			
 
				+	add	x25,x25,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x25,x25,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x6,x6			// 3
			
 
				+#endif
			
 
				+	ldp	x7,x8,[x1],#2*8
			
 
				+	add	x25,x25,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x21,#14
			
 
				+	add	x24,x24,x28			// h+=K[i]
			
 
				+	eor	x9,x21,x21,ror#23
			
 
				+	and	x17,x22,x21
			
 
				+	bic	x28,x23,x21
			
 
				+	add	x24,x24,x6			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x25,x26			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x9,ror#18	// Sigma1(e)
			
 
				+	ror	x9,x25,#28
			
 
				+	add	x24,x24,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x25,x25,ror#5
			
 
				+	add	x24,x24,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x20,x20,x24			// d+=h
			
 
				+	eor	x19,x19,x26			// Maj(a,b,c)
			
 
				+	eor	x17,x9,x17,ror#34	// Sigma0(a)
			
 
				+	add	x24,x24,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x24,x24,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x7,x7			// 4
			
 
				+#endif
			
 
				+	add	x24,x24,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x20,#14
			
 
				+	add	x23,x23,x19			// h+=K[i]
			
 
				+	eor	x10,x20,x20,ror#23
			
 
				+	and	x17,x21,x20
			
 
				+	bic	x19,x22,x20
			
 
				+	add	x23,x23,x7			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x24,x25			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x10,ror#18	// Sigma1(e)
			
 
				+	ror	x10,x24,#28
			
 
				+	add	x23,x23,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x24,x24,ror#5
			
 
				+	add	x23,x23,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x27,x27,x23			// d+=h
			
 
				+	eor	x28,x28,x25			// Maj(a,b,c)
			
 
				+	eor	x17,x10,x17,ror#34	// Sigma0(a)
			
 
				+	add	x23,x23,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x23,x23,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x8,x8			// 5
			
 
				+#endif
			
 
				+	ldp	x9,x10,[x1],#2*8
			
 
				+	add	x23,x23,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x27,#14
			
 
				+	add	x22,x22,x28			// h+=K[i]
			
 
				+	eor	x11,x27,x27,ror#23
			
 
				+	and	x17,x20,x27
			
 
				+	bic	x28,x21,x27
			
 
				+	add	x22,x22,x8			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x23,x24			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x11,ror#18	// Sigma1(e)
			
 
				+	ror	x11,x23,#28
			
 
				+	add	x22,x22,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x23,x23,ror#5
			
 
				+	add	x22,x22,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x26,x26,x22			// d+=h
			
 
				+	eor	x19,x19,x24			// Maj(a,b,c)
			
 
				+	eor	x17,x11,x17,ror#34	// Sigma0(a)
			
 
				+	add	x22,x22,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x22,x22,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x9,x9			// 6
			
 
				+#endif
			
 
				+	add	x22,x22,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x26,#14
			
 
				+	add	x21,x21,x19			// h+=K[i]
			
 
				+	eor	x12,x26,x26,ror#23
			
 
				+	and	x17,x27,x26
			
 
				+	bic	x19,x20,x26
			
 
				+	add	x21,x21,x9			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x22,x23			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x12,ror#18	// Sigma1(e)
			
 
				+	ror	x12,x22,#28
			
 
				+	add	x21,x21,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x22,x22,ror#5
			
 
				+	add	x21,x21,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x25,x25,x21			// d+=h
			
 
				+	eor	x28,x28,x23			// Maj(a,b,c)
			
 
				+	eor	x17,x12,x17,ror#34	// Sigma0(a)
			
 
				+	add	x21,x21,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x21,x21,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x10,x10			// 7
			
 
				+#endif
			
 
				+	ldp	x11,x12,[x1],#2*8
			
 
				+	add	x21,x21,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x25,#14
			
 
				+	add	x20,x20,x28			// h+=K[i]
			
 
				+	eor	x13,x25,x25,ror#23
			
 
				+	and	x17,x26,x25
			
 
				+	bic	x28,x27,x25
			
 
				+	add	x20,x20,x10			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x21,x22			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x13,ror#18	// Sigma1(e)
			
 
				+	ror	x13,x21,#28
			
 
				+	add	x20,x20,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x21,x21,ror#5
			
 
				+	add	x20,x20,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x24,x24,x20			// d+=h
			
 
				+	eor	x19,x19,x22			// Maj(a,b,c)
			
 
				+	eor	x17,x13,x17,ror#34	// Sigma0(a)
			
 
				+	add	x20,x20,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x20,x20,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x11,x11			// 8
			
 
				+#endif
			
 
				+	add	x20,x20,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x24,#14
			
 
				+	add	x27,x27,x19			// h+=K[i]
			
 
				+	eor	x14,x24,x24,ror#23
			
 
				+	and	x17,x25,x24
			
 
				+	bic	x19,x26,x24
			
 
				+	add	x27,x27,x11			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x20,x21			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x14,ror#18	// Sigma1(e)
			
 
				+	ror	x14,x20,#28
			
 
				+	add	x27,x27,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x20,x20,ror#5
			
 
				+	add	x27,x27,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x23,x23,x27			// d+=h
			
 
				+	eor	x28,x28,x21			// Maj(a,b,c)
			
 
				+	eor	x17,x14,x17,ror#34	// Sigma0(a)
			
 
				+	add	x27,x27,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x27,x27,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x12,x12			// 9
			
 
				+#endif
			
 
				+	ldp	x13,x14,[x1],#2*8
			
 
				+	add	x27,x27,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x23,#14
			
 
				+	add	x26,x26,x28			// h+=K[i]
			
 
				+	eor	x15,x23,x23,ror#23
			
 
				+	and	x17,x24,x23
			
 
				+	bic	x28,x25,x23
			
 
				+	add	x26,x26,x12			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x27,x20			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x15,ror#18	// Sigma1(e)
			
 
				+	ror	x15,x27,#28
			
 
				+	add	x26,x26,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x27,x27,ror#5
			
 
				+	add	x26,x26,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x22,x22,x26			// d+=h
			
 
				+	eor	x19,x19,x20			// Maj(a,b,c)
			
 
				+	eor	x17,x15,x17,ror#34	// Sigma0(a)
			
 
				+	add	x26,x26,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x26,x26,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x13,x13			// 10
			
 
				+#endif
			
 
				+	add	x26,x26,x17			// h+=Sigma0(a)
			
 
				+	ror	x16,x22,#14
			
 
				+	add	x25,x25,x19			// h+=K[i]
			
 
				+	eor	x0,x22,x22,ror#23
			
 
				+	and	x17,x23,x22
			
 
				+	bic	x19,x24,x22
			
 
				+	add	x25,x25,x13			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x26,x27			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x0,ror#18	// Sigma1(e)
			
 
				+	ror	x0,x26,#28
			
 
				+	add	x25,x25,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x26,x26,ror#5
			
 
				+	add	x25,x25,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x21,x21,x25			// d+=h
			
 
				+	eor	x28,x28,x27			// Maj(a,b,c)
			
 
				+	eor	x17,x0,x17,ror#34	// Sigma0(a)
			
 
				+	add	x25,x25,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x25,x25,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x14,x14			// 11
			
 
				+#endif
			
 
				+	ldp	x15,x0,[x1],#2*8
			
 
				+	add	x25,x25,x17			// h+=Sigma0(a)
			
 
				+	str	x6,[sp,#24]
			
 
				+	ror	x16,x21,#14
			
 
				+	add	x24,x24,x28			// h+=K[i]
			
 
				+	eor	x6,x21,x21,ror#23
			
 
				+	and	x17,x22,x21
			
 
				+	bic	x28,x23,x21
			
 
				+	add	x24,x24,x14			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x25,x26			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x6,ror#18	// Sigma1(e)
			
 
				+	ror	x6,x25,#28
			
 
				+	add	x24,x24,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x25,x25,ror#5
			
 
				+	add	x24,x24,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x20,x20,x24			// d+=h
			
 
				+	eor	x19,x19,x26			// Maj(a,b,c)
			
 
				+	eor	x17,x6,x17,ror#34	// Sigma0(a)
			
 
				+	add	x24,x24,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x24,x24,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x15,x15			// 12
			
 
				+#endif
			
 
				+	add	x24,x24,x17			// h+=Sigma0(a)
			
 
				+	str	x7,[sp,#0]
			
 
				+	ror	x16,x20,#14
			
 
				+	add	x23,x23,x19			// h+=K[i]
			
 
				+	eor	x7,x20,x20,ror#23
			
 
				+	and	x17,x21,x20
			
 
				+	bic	x19,x22,x20
			
 
				+	add	x23,x23,x15			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x24,x25			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x7,ror#18	// Sigma1(e)
			
 
				+	ror	x7,x24,#28
			
 
				+	add	x23,x23,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x24,x24,ror#5
			
 
				+	add	x23,x23,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x27,x27,x23			// d+=h
			
 
				+	eor	x28,x28,x25			// Maj(a,b,c)
			
 
				+	eor	x17,x7,x17,ror#34	// Sigma0(a)
			
 
				+	add	x23,x23,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x23,x23,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x0,x0			// 13
			
 
				+#endif
			
 
				+	ldp	x1,x2,[x1]
			
 
				+	add	x23,x23,x17			// h+=Sigma0(a)
			
 
				+	str	x8,[sp,#8]
			
 
				+	ror	x16,x27,#14
			
 
				+	add	x22,x22,x28			// h+=K[i]
			
 
				+	eor	x8,x27,x27,ror#23
			
 
				+	and	x17,x20,x27
			
 
				+	bic	x28,x21,x27
			
 
				+	add	x22,x22,x0			// h+=X[i]
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x23,x24			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x8,ror#18	// Sigma1(e)
			
 
				+	ror	x8,x23,#28
			
 
				+	add	x22,x22,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x23,x23,ror#5
			
 
				+	add	x22,x22,x16			// h+=Sigma1(e)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	add	x26,x26,x22			// d+=h
			
 
				+	eor	x19,x19,x24			// Maj(a,b,c)
			
 
				+	eor	x17,x8,x17,ror#34	// Sigma0(a)
			
 
				+	add	x22,x22,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	//add	x22,x22,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x1,x1			// 14
			
 
				+#endif
			
 
				+	ldr	x6,[sp,#24]
			
 
				+	add	x22,x22,x17			// h+=Sigma0(a)
			
 
				+	str	x9,[sp,#16]
			
 
				+	ror	x16,x26,#14
			
 
				+	add	x21,x21,x19			// h+=K[i]
			
 
				+	eor	x9,x26,x26,ror#23
			
 
				+	and	x17,x27,x26
			
 
				+	bic	x19,x20,x26
			
 
				+	add	x21,x21,x1			// h+=X[i]
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x22,x23			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x9,ror#18	// Sigma1(e)
			
 
				+	ror	x9,x22,#28
			
 
				+	add	x21,x21,x17			// h+=Ch(e,f,g)
			
 
				+	eor	x17,x22,x22,ror#5
			
 
				+	add	x21,x21,x16			// h+=Sigma1(e)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	add	x25,x25,x21			// d+=h
			
 
				+	eor	x28,x28,x23			// Maj(a,b,c)
			
 
				+	eor	x17,x9,x17,ror#34	// Sigma0(a)
			
 
				+	add	x21,x21,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	//add	x21,x21,x17			// h+=Sigma0(a)
			
 
				+#ifndef	__AARCH64EB__
			
 
				+	rev	x2,x2			// 15
			
 
				+#endif
			
 
				+	ldr	x7,[sp,#0]
			
 
				+	add	x21,x21,x17			// h+=Sigma0(a)
			
 
				+	str	x10,[sp,#24]
			
 
				+	ror	x16,x25,#14
			
 
				+	add	x20,x20,x28			// h+=K[i]
			
 
				+	ror	x9,x4,#1
			
 
				+	and	x17,x26,x25
			
 
				+	ror	x8,x1,#19
			
 
				+	bic	x28,x27,x25
			
 
				+	ror	x10,x21,#28
			
 
				+	add	x20,x20,x2			// h+=X[i]
			
 
				+	eor	x16,x16,x25,ror#18
			
 
				+	eor	x9,x9,x4,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x21,x22			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x25,ror#41	// Sigma1(e)
			
 
				+	eor	x10,x10,x21,ror#34
			
 
				+	add	x20,x20,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x8,x8,x1,ror#61
			
 
				+	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x20,x20,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x22			// Maj(a,b,c)
			
 
				+	eor	x17,x10,x21,ror#39	// Sigma0(a)
			
 
				+	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x3,x3,x12
			
 
				+	add	x24,x24,x20			// d+=h
			
 
				+	add	x20,x20,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x3,x3,x9
			
 
				+	add	x20,x20,x17			// h+=Sigma0(a)
			
 
				+	add	x3,x3,x8
			
 
				+.Loop_16_xx:
			
 
				+	ldr	x8,[sp,#8]
			
 
				+	str	x11,[sp,#0]
			
 
				+	ror	x16,x24,#14
			
 
				+	add	x27,x27,x19			// h+=K[i]
			
 
				+	ror	x10,x5,#1
			
 
				+	and	x17,x25,x24
			
 
				+	ror	x9,x2,#19
			
 
				+	bic	x19,x26,x24
			
 
				+	ror	x11,x20,#28
			
 
				+	add	x27,x27,x3			// h+=X[i]
			
 
				+	eor	x16,x16,x24,ror#18
			
 
				+	eor	x10,x10,x5,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x20,x21			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x24,ror#41	// Sigma1(e)
			
 
				+	eor	x11,x11,x20,ror#34
			
 
				+	add	x27,x27,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x9,x9,x2,ror#61
			
 
				+	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x27,x27,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x21			// Maj(a,b,c)
			
 
				+	eor	x17,x11,x20,ror#39	// Sigma0(a)
			
 
				+	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x4,x4,x13
			
 
				+	add	x23,x23,x27			// d+=h
			
 
				+	add	x27,x27,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x4,x4,x10
			
 
				+	add	x27,x27,x17			// h+=Sigma0(a)
			
 
				+	add	x4,x4,x9
			
 
				+	ldr	x9,[sp,#16]
			
 
				+	str	x12,[sp,#8]
			
 
				+	ror	x16,x23,#14
			
 
				+	add	x26,x26,x28			// h+=K[i]
			
 
				+	ror	x11,x6,#1
			
 
				+	and	x17,x24,x23
			
 
				+	ror	x10,x3,#19
			
 
				+	bic	x28,x25,x23
			
 
				+	ror	x12,x27,#28
			
 
				+	add	x26,x26,x4			// h+=X[i]
			
 
				+	eor	x16,x16,x23,ror#18
			
 
				+	eor	x11,x11,x6,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x27,x20			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x23,ror#41	// Sigma1(e)
			
 
				+	eor	x12,x12,x27,ror#34
			
 
				+	add	x26,x26,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x10,x10,x3,ror#61
			
 
				+	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x26,x26,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x20			// Maj(a,b,c)
			
 
				+	eor	x17,x12,x27,ror#39	// Sigma0(a)
			
 
				+	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x5,x5,x14
			
 
				+	add	x22,x22,x26			// d+=h
			
 
				+	add	x26,x26,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x5,x5,x11
			
 
				+	add	x26,x26,x17			// h+=Sigma0(a)
			
 
				+	add	x5,x5,x10
			
 
				+	ldr	x10,[sp,#24]
			
 
				+	str	x13,[sp,#16]
			
 
				+	ror	x16,x22,#14
			
 
				+	add	x25,x25,x19			// h+=K[i]
			
 
				+	ror	x12,x7,#1
			
 
				+	and	x17,x23,x22
			
 
				+	ror	x11,x4,#19
			
 
				+	bic	x19,x24,x22
			
 
				+	ror	x13,x26,#28
			
 
				+	add	x25,x25,x5			// h+=X[i]
			
 
				+	eor	x16,x16,x22,ror#18
			
 
				+	eor	x12,x12,x7,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x26,x27			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x22,ror#41	// Sigma1(e)
			
 
				+	eor	x13,x13,x26,ror#34
			
 
				+	add	x25,x25,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x11,x11,x4,ror#61
			
 
				+	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x25,x25,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x27			// Maj(a,b,c)
			
 
				+	eor	x17,x13,x26,ror#39	// Sigma0(a)
			
 
				+	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x6,x6,x15
			
 
				+	add	x21,x21,x25			// d+=h
			
 
				+	add	x25,x25,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x6,x6,x12
			
 
				+	add	x25,x25,x17			// h+=Sigma0(a)
			
 
				+	add	x6,x6,x11
			
 
				+	ldr	x11,[sp,#0]
			
 
				+	str	x14,[sp,#24]
			
 
				+	ror	x16,x21,#14
			
 
				+	add	x24,x24,x28			// h+=K[i]
			
 
				+	ror	x13,x8,#1
			
 
				+	and	x17,x22,x21
			
 
				+	ror	x12,x5,#19
			
 
				+	bic	x28,x23,x21
			
 
				+	ror	x14,x25,#28
			
 
				+	add	x24,x24,x6			// h+=X[i]
			
 
				+	eor	x16,x16,x21,ror#18
			
 
				+	eor	x13,x13,x8,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x25,x26			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x21,ror#41	// Sigma1(e)
			
 
				+	eor	x14,x14,x25,ror#34
			
 
				+	add	x24,x24,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x12,x12,x5,ror#61
			
 
				+	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x24,x24,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x26			// Maj(a,b,c)
			
 
				+	eor	x17,x14,x25,ror#39	// Sigma0(a)
			
 
				+	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x7,x7,x0
			
 
				+	add	x20,x20,x24			// d+=h
			
 
				+	add	x24,x24,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x7,x7,x13
			
 
				+	add	x24,x24,x17			// h+=Sigma0(a)
			
 
				+	add	x7,x7,x12
			
 
				+	ldr	x12,[sp,#8]
			
 
				+	str	x15,[sp,#0]
			
 
				+	ror	x16,x20,#14
			
 
				+	add	x23,x23,x19			// h+=K[i]
			
 
				+	ror	x14,x9,#1
			
 
				+	and	x17,x21,x20
			
 
				+	ror	x13,x6,#19
			
 
				+	bic	x19,x22,x20
			
 
				+	ror	x15,x24,#28
			
 
				+	add	x23,x23,x7			// h+=X[i]
			
 
				+	eor	x16,x16,x20,ror#18
			
 
				+	eor	x14,x14,x9,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x24,x25			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x20,ror#41	// Sigma1(e)
			
 
				+	eor	x15,x15,x24,ror#34
			
 
				+	add	x23,x23,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x13,x13,x6,ror#61
			
 
				+	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x23,x23,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x25			// Maj(a,b,c)
			
 
				+	eor	x17,x15,x24,ror#39	// Sigma0(a)
			
 
				+	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x8,x8,x1
			
 
				+	add	x27,x27,x23			// d+=h
			
 
				+	add	x23,x23,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x8,x8,x14
			
 
				+	add	x23,x23,x17			// h+=Sigma0(a)
			
 
				+	add	x8,x8,x13
			
 
				+	ldr	x13,[sp,#16]
			
 
				+	str	x0,[sp,#8]
			
 
				+	ror	x16,x27,#14
			
 
				+	add	x22,x22,x28			// h+=K[i]
			
 
				+	ror	x15,x10,#1
			
 
				+	and	x17,x20,x27
			
 
				+	ror	x14,x7,#19
			
 
				+	bic	x28,x21,x27
			
 
				+	ror	x0,x23,#28
			
 
				+	add	x22,x22,x8			// h+=X[i]
			
 
				+	eor	x16,x16,x27,ror#18
			
 
				+	eor	x15,x15,x10,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x23,x24			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x27,ror#41	// Sigma1(e)
			
 
				+	eor	x0,x0,x23,ror#34
			
 
				+	add	x22,x22,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x14,x14,x7,ror#61
			
 
				+	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x22,x22,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x24			// Maj(a,b,c)
			
 
				+	eor	x17,x0,x23,ror#39	// Sigma0(a)
			
 
				+	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x9,x9,x2
			
 
				+	add	x26,x26,x22			// d+=h
			
 
				+	add	x22,x22,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x9,x9,x15
			
 
				+	add	x22,x22,x17			// h+=Sigma0(a)
			
 
				+	add	x9,x9,x14
			
 
				+	ldr	x14,[sp,#24]
			
 
				+	str	x1,[sp,#16]
			
 
				+	ror	x16,x26,#14
			
 
				+	add	x21,x21,x19			// h+=K[i]
			
 
				+	ror	x0,x11,#1
			
 
				+	and	x17,x27,x26
			
 
				+	ror	x15,x8,#19
			
 
				+	bic	x19,x20,x26
			
 
				+	ror	x1,x22,#28
			
 
				+	add	x21,x21,x9			// h+=X[i]
			
 
				+	eor	x16,x16,x26,ror#18
			
 
				+	eor	x0,x0,x11,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x22,x23			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x26,ror#41	// Sigma1(e)
			
 
				+	eor	x1,x1,x22,ror#34
			
 
				+	add	x21,x21,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x15,x15,x8,ror#61
			
 
				+	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x21,x21,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x23			// Maj(a,b,c)
			
 
				+	eor	x17,x1,x22,ror#39	// Sigma0(a)
			
 
				+	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x10,x10,x3
			
 
				+	add	x25,x25,x21			// d+=h
			
 
				+	add	x21,x21,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x10,x10,x0
			
 
				+	add	x21,x21,x17			// h+=Sigma0(a)
			
 
				+	add	x10,x10,x15
			
 
				+	ldr	x15,[sp,#0]
			
 
				+	str	x2,[sp,#24]
			
 
				+	ror	x16,x25,#14
			
 
				+	add	x20,x20,x28			// h+=K[i]
			
 
				+	ror	x1,x12,#1
			
 
				+	and	x17,x26,x25
			
 
				+	ror	x0,x9,#19
			
 
				+	bic	x28,x27,x25
			
 
				+	ror	x2,x21,#28
			
 
				+	add	x20,x20,x10			// h+=X[i]
			
 
				+	eor	x16,x16,x25,ror#18
			
 
				+	eor	x1,x1,x12,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x21,x22			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x25,ror#41	// Sigma1(e)
			
 
				+	eor	x2,x2,x21,ror#34
			
 
				+	add	x20,x20,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x0,x0,x9,ror#61
			
 
				+	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x20,x20,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x22			// Maj(a,b,c)
			
 
				+	eor	x17,x2,x21,ror#39	// Sigma0(a)
			
 
				+	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x11,x11,x4
			
 
				+	add	x24,x24,x20			// d+=h
			
 
				+	add	x20,x20,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x11,x11,x1
			
 
				+	add	x20,x20,x17			// h+=Sigma0(a)
			
 
				+	add	x11,x11,x0
			
 
				+	ldr	x0,[sp,#8]
			
 
				+	str	x3,[sp,#0]
			
 
				+	ror	x16,x24,#14
			
 
				+	add	x27,x27,x19			// h+=K[i]
			
 
				+	ror	x2,x13,#1
			
 
				+	and	x17,x25,x24
			
 
				+	ror	x1,x10,#19
			
 
				+	bic	x19,x26,x24
			
 
				+	ror	x3,x20,#28
			
 
				+	add	x27,x27,x11			// h+=X[i]
			
 
				+	eor	x16,x16,x24,ror#18
			
 
				+	eor	x2,x2,x13,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x20,x21			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x24,ror#41	// Sigma1(e)
			
 
				+	eor	x3,x3,x20,ror#34
			
 
				+	add	x27,x27,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x1,x1,x10,ror#61
			
 
				+	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x27,x27,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x21			// Maj(a,b,c)
			
 
				+	eor	x17,x3,x20,ror#39	// Sigma0(a)
			
 
				+	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x12,x12,x5
			
 
				+	add	x23,x23,x27			// d+=h
			
 
				+	add	x27,x27,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x12,x12,x2
			
 
				+	add	x27,x27,x17			// h+=Sigma0(a)
			
 
				+	add	x12,x12,x1
			
 
				+	ldr	x1,[sp,#16]
			
 
				+	str	x4,[sp,#8]
			
 
				+	ror	x16,x23,#14
			
 
				+	add	x26,x26,x28			// h+=K[i]
			
 
				+	ror	x3,x14,#1
			
 
				+	and	x17,x24,x23
			
 
				+	ror	x2,x11,#19
			
 
				+	bic	x28,x25,x23
			
 
				+	ror	x4,x27,#28
			
 
				+	add	x26,x26,x12			// h+=X[i]
			
 
				+	eor	x16,x16,x23,ror#18
			
 
				+	eor	x3,x3,x14,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x27,x20			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x23,ror#41	// Sigma1(e)
			
 
				+	eor	x4,x4,x27,ror#34
			
 
				+	add	x26,x26,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x2,x2,x11,ror#61
			
 
				+	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x26,x26,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x20			// Maj(a,b,c)
			
 
				+	eor	x17,x4,x27,ror#39	// Sigma0(a)
			
 
				+	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x13,x13,x6
			
 
				+	add	x22,x22,x26			// d+=h
			
 
				+	add	x26,x26,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x13,x13,x3
			
 
				+	add	x26,x26,x17			// h+=Sigma0(a)
			
 
				+	add	x13,x13,x2
			
 
				+	ldr	x2,[sp,#24]
			
 
				+	str	x5,[sp,#16]
			
 
				+	ror	x16,x22,#14
			
 
				+	add	x25,x25,x19			// h+=K[i]
			
 
				+	ror	x4,x15,#1
			
 
				+	and	x17,x23,x22
			
 
				+	ror	x3,x12,#19
			
 
				+	bic	x19,x24,x22
			
 
				+	ror	x5,x26,#28
			
 
				+	add	x25,x25,x13			// h+=X[i]
			
 
				+	eor	x16,x16,x22,ror#18
			
 
				+	eor	x4,x4,x15,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x26,x27			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x22,ror#41	// Sigma1(e)
			
 
				+	eor	x5,x5,x26,ror#34
			
 
				+	add	x25,x25,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x3,x3,x12,ror#61
			
 
				+	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x25,x25,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x27			// Maj(a,b,c)
			
 
				+	eor	x17,x5,x26,ror#39	// Sigma0(a)
			
 
				+	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x14,x14,x7
			
 
				+	add	x21,x21,x25			// d+=h
			
 
				+	add	x25,x25,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x14,x14,x4
			
 
				+	add	x25,x25,x17			// h+=Sigma0(a)
			
 
				+	add	x14,x14,x3
			
 
				+	ldr	x3,[sp,#0]
			
 
				+	str	x6,[sp,#24]
			
 
				+	ror	x16,x21,#14
			
 
				+	add	x24,x24,x28			// h+=K[i]
			
 
				+	ror	x5,x0,#1
			
 
				+	and	x17,x22,x21
			
 
				+	ror	x4,x13,#19
			
 
				+	bic	x28,x23,x21
			
 
				+	ror	x6,x25,#28
			
 
				+	add	x24,x24,x14			// h+=X[i]
			
 
				+	eor	x16,x16,x21,ror#18
			
 
				+	eor	x5,x5,x0,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x25,x26			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x21,ror#41	// Sigma1(e)
			
 
				+	eor	x6,x6,x25,ror#34
			
 
				+	add	x24,x24,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x4,x4,x13,ror#61
			
 
				+	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x24,x24,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x26			// Maj(a,b,c)
			
 
				+	eor	x17,x6,x25,ror#39	// Sigma0(a)
			
 
				+	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x15,x15,x8
			
 
				+	add	x20,x20,x24			// d+=h
			
 
				+	add	x24,x24,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x15,x15,x5
			
 
				+	add	x24,x24,x17			// h+=Sigma0(a)
			
 
				+	add	x15,x15,x4
			
 
				+	ldr	x4,[sp,#8]
			
 
				+	str	x7,[sp,#0]
			
 
				+	ror	x16,x20,#14
			
 
				+	add	x23,x23,x19			// h+=K[i]
			
 
				+	ror	x6,x1,#1
			
 
				+	and	x17,x21,x20
			
 
				+	ror	x5,x14,#19
			
 
				+	bic	x19,x22,x20
			
 
				+	ror	x7,x24,#28
			
 
				+	add	x23,x23,x15			// h+=X[i]
			
 
				+	eor	x16,x16,x20,ror#18
			
 
				+	eor	x6,x6,x1,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x24,x25			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x20,ror#41	// Sigma1(e)
			
 
				+	eor	x7,x7,x24,ror#34
			
 
				+	add	x23,x23,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x5,x5,x14,ror#61
			
 
				+	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x23,x23,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x25			// Maj(a,b,c)
			
 
				+	eor	x17,x7,x24,ror#39	// Sigma0(a)
			
 
				+	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x0,x0,x9
			
 
				+	add	x27,x27,x23			// d+=h
			
 
				+	add	x23,x23,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x0,x0,x6
			
 
				+	add	x23,x23,x17			// h+=Sigma0(a)
			
 
				+	add	x0,x0,x5
			
 
				+	ldr	x5,[sp,#16]
			
 
				+	str	x8,[sp,#8]
			
 
				+	ror	x16,x27,#14
			
 
				+	add	x22,x22,x28			// h+=K[i]
			
 
				+	ror	x7,x2,#1
			
 
				+	and	x17,x20,x27
			
 
				+	ror	x6,x15,#19
			
 
				+	bic	x28,x21,x27
			
 
				+	ror	x8,x23,#28
			
 
				+	add	x22,x22,x0			// h+=X[i]
			
 
				+	eor	x16,x16,x27,ror#18
			
 
				+	eor	x7,x7,x2,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x23,x24			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x27,ror#41	// Sigma1(e)
			
 
				+	eor	x8,x8,x23,ror#34
			
 
				+	add	x22,x22,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x6,x6,x15,ror#61
			
 
				+	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x22,x22,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x24			// Maj(a,b,c)
			
 
				+	eor	x17,x8,x23,ror#39	// Sigma0(a)
			
 
				+	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x1,x1,x10
			
 
				+	add	x26,x26,x22			// d+=h
			
 
				+	add	x22,x22,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x1,x1,x7
			
 
				+	add	x22,x22,x17			// h+=Sigma0(a)
			
 
				+	add	x1,x1,x6
			
 
				+	ldr	x6,[sp,#24]
			
 
				+	str	x9,[sp,#16]
			
 
				+	ror	x16,x26,#14
			
 
				+	add	x21,x21,x19			// h+=K[i]
			
 
				+	ror	x8,x3,#1
			
 
				+	and	x17,x27,x26
			
 
				+	ror	x7,x0,#19
			
 
				+	bic	x19,x20,x26
			
 
				+	ror	x9,x22,#28
			
 
				+	add	x21,x21,x1			// h+=X[i]
			
 
				+	eor	x16,x16,x26,ror#18
			
 
				+	eor	x8,x8,x3,ror#8
			
 
				+	orr	x17,x17,x19			// Ch(e,f,g)
			
 
				+	eor	x19,x22,x23			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x26,ror#41	// Sigma1(e)
			
 
				+	eor	x9,x9,x22,ror#34
			
 
				+	add	x21,x21,x17			// h+=Ch(e,f,g)
			
 
				+	and	x28,x28,x19			// (b^c)&=(a^b)
			
 
				+	eor	x7,x7,x0,ror#61
			
 
				+	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x21,x21,x16			// h+=Sigma1(e)
			
 
				+	eor	x28,x28,x23			// Maj(a,b,c)
			
 
				+	eor	x17,x9,x22,ror#39	// Sigma0(a)
			
 
				+	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x2,x2,x11
			
 
				+	add	x25,x25,x21			// d+=h
			
 
				+	add	x21,x21,x28			// h+=Maj(a,b,c)
			
 
				+	ldr	x28,[x30],#8		// *K++, x19 in next round
			
 
				+	add	x2,x2,x8
			
 
				+	add	x21,x21,x17			// h+=Sigma0(a)
			
 
				+	add	x2,x2,x7
			
 
				+	ldr	x7,[sp,#0]
			
 
				+	str	x10,[sp,#24]
			
 
				+	ror	x16,x25,#14
			
 
				+	add	x20,x20,x28			// h+=K[i]
			
 
				+	ror	x9,x4,#1
			
 
				+	and	x17,x26,x25
			
 
				+	ror	x8,x1,#19
			
 
				+	bic	x28,x27,x25
			
 
				+	ror	x10,x21,#28
			
 
				+	add	x20,x20,x2			// h+=X[i]
			
 
				+	eor	x16,x16,x25,ror#18
			
 
				+	eor	x9,x9,x4,ror#8
			
 
				+	orr	x17,x17,x28			// Ch(e,f,g)
			
 
				+	eor	x28,x21,x22			// a^b, b^c in next round
			
 
				+	eor	x16,x16,x25,ror#41	// Sigma1(e)
			
 
				+	eor	x10,x10,x21,ror#34
			
 
				+	add	x20,x20,x17			// h+=Ch(e,f,g)
			
 
				+	and	x19,x19,x28			// (b^c)&=(a^b)
			
 
				+	eor	x8,x8,x1,ror#61
			
 
				+	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
			
 
				+	add	x20,x20,x16			// h+=Sigma1(e)
			
 
				+	eor	x19,x19,x22			// Maj(a,b,c)
			
 
				+	eor	x17,x10,x21,ror#39	// Sigma0(a)
			
 
				+	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
			
 
				+	add	x3,x3,x12
			
 
				+	add	x24,x24,x20			// d+=h
			
 
				+	add	x20,x20,x19			// h+=Maj(a,b,c)
			
 
				+	ldr	x19,[x30],#8		// *K++, x28 in next round
			
 
				+	add	x3,x3,x9
			
 
				+	add	x20,x20,x17			// h+=Sigma0(a)
			
 
				+	add	x3,x3,x8
			
 
				+	cbnz	x19,.Loop_16_xx
			
 
				+
			
 
				+	ldp	x0,x2,[x29,#96]
			
 
				+	ldr	x1,[x29,#112]
			
 
				+	sub	x30,x30,#648		// rewind
			
 
				+
			
 
				+	ldp	x3,x4,[x0]
			
 
				+	ldp	x5,x6,[x0,#2*8]
			
 
				+	add	x1,x1,#14*8			// advance input pointer
			
 
				+	ldp	x7,x8,[x0,#4*8]
			
 
				+	add	x20,x20,x3
			
 
				+	ldp	x9,x10,[x0,#6*8]
			
 
				+	add	x21,x21,x4
			
 
				+	add	x22,x22,x5
			
 
				+	add	x23,x23,x6
			
 
				+	stp	x20,x21,[x0]
			
 
				+	add	x24,x24,x7
			
 
				+	add	x25,x25,x8
			
 
				+	stp	x22,x23,[x0,#2*8]
			
 
				+	add	x26,x26,x9
			
 
				+	add	x27,x27,x10
			
 
				+	cmp	x1,x2
			
 
				+	stp	x24,x25,[x0,#4*8]
			
 
				+	stp	x26,x27,[x0,#6*8]
			
 
				+	b.ne	.Loop
			
 
				+
			
 
				+	ldp	x19,x20,[x29,#16]
			
 
				+	add	sp,sp,#4*8
			
 
				+	ldp	x21,x22,[x29,#32]
			
 
				+	ldp	x23,x24,[x29,#48]
			
 
				+	ldp	x25,x26,[x29,#64]
			
 
				+	ldp	x27,x28,[x29,#80]
			
 
				+	ldp	x29,x30,[sp],#128
			
 
				+	ret
			
 
				+.size	sha512_block_data_order,.-sha512_block_data_order
			
 
				+
			
 
				+.align	6
			
 
				+.type	.LK512,%object
			
 
				+.LK512:
			
 
				+	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
			
 
				+	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
			
 
				+	.quad	0x3956c25bf348b538,0x59f111f1b605d019
			
 
				+	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
			
 
				+	.quad	0xd807aa98a3030242,0x12835b0145706fbe
			
 
				+	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
			
 
				+	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
			
 
				+	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
			
 
				+	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
			
 
				+	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
			
 
				+	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
			
 
				+	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
			
 
				+	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
			
 
				+	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
			
 
				+	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
			
 
				+	.quad	0x06ca6351e003826f,0x142929670a0e6e70
			
 
				+	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
			
 
				+	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
			
 
				+	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
			
 
				+	.quad	0x81c2c92e47edaee6,0x92722c851482353b
			
 
				+	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
			
 
				+	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
			
 
				+	.quad	0xd192e819d6ef5218,0xd69906245565a910
			
 
				+	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
			
 
				+	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
			
 
				+	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
			
 
				+	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
			
 
				+	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
			
 
				+	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
			
 
				+	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
			
 
				+	.quad	0x90befffa23631e28,0xa4506cebde82bde9
			
 
				+	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
			
 
				+	.quad	0xca273eceea26619c,0xd186b8c721c0c207
			
 
				+	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
			
 
				+	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
			
 
				+	.quad	0x113f9804bef90dae,0x1b710b35131c471b
			
 
				+	.quad	0x28db77f523047d84,0x32caab7b40c72493
			
 
				+	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
			
 
				+	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
			
 
				+	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
			
 
				+	.quad	0	// terminator
			
 
				+.size	.LK512,.-.LK512
			
 
				+#ifndef	__KERNEL__
			
 
				+.align	3
			
 
				+.LOPENSSL_armcap_P:
			
 
				+# ifdef	__ILP32__
			
 
				+	.long	OPENSSL_armcap_P-.
			
 
				+# else
			
 
				+	.quad	OPENSSL_armcap_P-.
			
 
				+# endif
			
 
				+#endif
			
 
				+.asciz	"SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
			
 
				+.align	2
			
 
				+#ifndef	__KERNEL__
			
 
				+.comm	OPENSSL_armcap_P,4,4
			
 
				+#endif
			
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -0,0 +1,94 @@
 
				+/*
			
 
				+ * Linux/arm64 port of the OpenSSL SHA512 implementation for AArch64
			
 
				+ *
			
 
				+ * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms of the GNU General Public License as published by the Free
			
 
				+ * Software Foundation; either version 2 of the License, or (at your option)
			
 
				+ * any later version.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <crypto/internal/hash.h>
			
 
				+#include <linux/cryptohash.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <crypto/sha.h>
			
 
				+#include <crypto/sha512_base.h>
			
 
				+#include <asm/neon.h>
			
 
				+
			
 
				+MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash for arm64");
			
 
				+MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
 
				+MODULE_ALIAS_CRYPTO("sha384");
			
 
				+MODULE_ALIAS_CRYPTO("sha512");
			
 
				+
			
 
				+asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
			
 
				+					unsigned int num_blks);
			
 
				+
			
 
				+static int sha512_update(struct shash_desc *desc, const u8 *data,
			
 
				+			 unsigned int len)
			
 
				+{
			
 
				+	return sha512_base_do_update(desc, data, len,
			
 
				+			(sha512_block_fn *)sha512_block_data_order);
			
 
				+}
			
 
				+
			
 
				+static int sha512_finup(struct shash_desc *desc, const u8 *data,
			
 
				+			unsigned int len, u8 *out)
			
 
				+{
			
 
				+	if (len)
			
 
				+		sha512_base_do_update(desc, data, len,
			
 
				+			(sha512_block_fn *)sha512_block_data_order);
			
 
				+	sha512_base_do_finalize(desc,
			
 
				+			(sha512_block_fn *)sha512_block_data_order);
			
 
				+
			
 
				+	return sha512_base_finish(desc, out);
			
 
				+}
			
 
				+
			
 
				+static int sha512_final(struct shash_desc *desc, u8 *out)
			
 
				+{
			
 
				+	return sha512_finup(desc, NULL, 0, out);
			
 
				+}
			
 
				+
			
 
				+static struct shash_alg algs[] = { {
			
 
				+	.digestsize		= SHA512_DIGEST_SIZE,
			
 
				+	.init			= sha512_base_init,
			
 
				+	.update			= sha512_update,
			
 
				+	.final			= sha512_final,
			
 
				+	.finup			= sha512_finup,
			
 
				+	.descsize		= sizeof(struct sha512_state),
			
 
				+	.base.cra_name		= "sha512",
			
 
				+	.base.cra_driver_name	= "sha512-arm64",
			
 
				+	.base.cra_priority	= 150,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
			
 
				+	.base.cra_blocksize	= SHA512_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+}, {
			
 
				+	.digestsize		= SHA384_DIGEST_SIZE,
			
 
				+	.init			= sha384_base_init,
			
 
				+	.update			= sha512_update,
			
 
				+	.final			= sha512_final,
			
 
				+	.finup			= sha512_finup,
			
 
				+	.descsize		= sizeof(struct sha512_state),
			
 
				+	.base.cra_name		= "sha384",
			
 
				+	.base.cra_driver_name	= "sha384-arm64",
			
 
				+	.base.cra_priority	= 150,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
			
 
				+	.base.cra_blocksize	= SHA384_BLOCK_SIZE,
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+} };
			
 
				+
			
 
				+static int __init sha512_mod_init(void)
			
 
				+{
			
 
				+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
			
 
				+}
			
 
				+
			
 
				+static void __exit sha512_mod_fini(void)
			
 
				+{
			
 
				+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
			
 
				+}
			
 
				+
			
 
				+module_init(sha512_mod_init);
			
 
				+module_exit(sha512_mod_fini);
			
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
 
				 obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
			
 
				 obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
			
 
				 obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
			
 
				-obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o
			
 
				+obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
			
 
				 
			
 
				 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
			
 
				 md5-ppc-y := md5-asm.o md5-glue.o
			
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -21,7 +21,6 @@
 
				 
			
 
				 #include <linux/hardirq.h>
			
 
				 #include <linux/types.h>
			
 
				-#include <linux/crypto.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <crypto/algapi.h>
			
@@ -29,14 +28,14 @@
 
				 #include <crypto/cryptd.h>
			
 
				 #include <crypto/ctr.h>
			
 
				 #include <crypto/b128ops.h>
			
 
				-#include <crypto/lrw.h>
			
 
				 #include <crypto/xts.h>
			
 
				 #include <asm/cpu_device_id.h>
			
 
				 #include <asm/fpu/api.h>
			
 
				 #include <asm/crypto/aes.h>
			
 
				-#include <crypto/ablk_helper.h>
			
 
				 #include <crypto/scatterwalk.h>
			
 
				 #include <crypto/internal/aead.h>
			
 
				+#include <crypto/internal/simd.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/workqueue.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #ifdef CONFIG_X86_64
			
@@ -45,28 +44,26 @@
 
				 
			
 
				 
			
 
				 #define AESNI_ALIGN	16
			
 
				+#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
			
 
				 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE - 1))
			
 
				 #define RFC4106_HASH_SUBKEY_SIZE 16
			
 
				+#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
			
 
				+#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
			
 
				+#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
			
 
				 
			
 
				 /* This data is stored at the end of the crypto_tfm struct.
			
 
				  * It's a type of per "session" data storage location.
			
 
				  * This needs to be 16 byte aligned.
			
 
				  */
			
 
				 struct aesni_rfc4106_gcm_ctx {
			
 
				-	u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
			
 
				-	struct crypto_aes_ctx aes_key_expanded
			
 
				-		__attribute__ ((__aligned__(AESNI_ALIGN)));
			
 
				+	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
			
 
				+	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
			
 
				 	u8 nonce[4];
			
 
				 };
			
 
				 
			
 
				-struct aesni_lrw_ctx {
			
 
				-	struct lrw_table_ctx lrw_table;
			
 
				-	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
			
 
				-};
			
 
				-
			
 
				 struct aesni_xts_ctx {
			
 
				-	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
			
 
				-	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
			
 
				+	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
			
 
				+	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
			
 
				 };
			
 
				 
			
 
				 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
			
@@ -360,96 +357,95 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 
				 	aesni_dec(ctx, dst, src);
			
 
				 }
			
 
				 
			
 
				-static int ecb_encrypt(struct blkcipher_desc *desc,
			
 
				-		       struct scatterlist *dst, struct scatterlist *src,
			
 
				-		       unsigned int nbytes)
			
 
				+static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				+			         unsigned int len)
			
 
				+{
			
 
				+	return aes_set_key_common(crypto_skcipher_tfm(tfm),
			
 
				+				  crypto_skcipher_ctx(tfm), key, len);
			
 
				+}
			
 
				+
			
 
				+static int ecb_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				 	while ((nbytes = walk.nbytes)) {
			
 
				 		aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 			      nbytes & AES_BLOCK_MASK);
			
 
				 		nbytes &= AES_BLOCK_SIZE - 1;
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 	kernel_fpu_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int ecb_decrypt(struct blkcipher_desc *desc,
			
 
				-		       struct scatterlist *dst, struct scatterlist *src,
			
 
				-		       unsigned int nbytes)
			
 
				+static int ecb_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				 	while ((nbytes = walk.nbytes)) {
			
 
				 		aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 			      nbytes & AES_BLOCK_MASK);
			
 
				 		nbytes &= AES_BLOCK_SIZE - 1;
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 	kernel_fpu_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int cbc_encrypt(struct blkcipher_desc *desc,
			
 
				-		       struct scatterlist *dst, struct scatterlist *src,
			
 
				-		       unsigned int nbytes)
			
 
				+static int cbc_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				 	while ((nbytes = walk.nbytes)) {
			
 
				 		aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 			      nbytes & AES_BLOCK_MASK, walk.iv);
			
 
				 		nbytes &= AES_BLOCK_SIZE - 1;
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 	kernel_fpu_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int cbc_decrypt(struct blkcipher_desc *desc,
			
 
				-		       struct scatterlist *dst, struct scatterlist *src,
			
 
				-		       unsigned int nbytes)
			
 
				+static int cbc_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				 	while ((nbytes = walk.nbytes)) {
			
 
				 		aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 			      nbytes & AES_BLOCK_MASK, walk.iv);
			
 
				 		nbytes &= AES_BLOCK_SIZE - 1;
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 	kernel_fpu_end();
			
 
				 
			
@@ -458,7 +454,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
			
 
				-			    struct blkcipher_walk *walk)
			
 
				+			    struct skcipher_walk *walk)
			
 
				 {
			
 
				 	u8 *ctrblk = walk->iv;
			
 
				 	u8 keystream[AES_BLOCK_SIZE];
			
@@ -491,157 +487,53 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static int ctr_crypt(struct blkcipher_desc *desc,
			
 
				-		     struct scatterlist *dst, struct scatterlist *src,
			
 
				-		     unsigned int nbytes)
			
 
				+static int ctr_crypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
			
 
				-	struct blkcipher_walk walk;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				 	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
			
 
				 		aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
			
 
				 			              nbytes & AES_BLOCK_MASK, walk.iv);
			
 
				 		nbytes &= AES_BLOCK_SIZE - 1;
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 	if (walk.nbytes) {
			
 
				 		ctr_crypt_final(ctx, &walk);
			
 
				-		err = blkcipher_walk_done(desc, &walk, 0);
			
 
				+		err = skcipher_walk_done(&walk, 0);
			
 
				 	}
			
 
				 	kernel_fpu_end();
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				-#endif
			
 
				-
			
 
				-static int ablk_ecb_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
			
 
				-}
			
 
				-
			
 
				-static int ablk_cbc_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	return ablk_init_common(tfm, "__driver-cbc-aes-aesni");
			
 
				-}
			
 
				-
			
 
				-#ifdef CONFIG_X86_64
			
 
				-static int ablk_ctr_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
			
 
				-}
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-#if IS_ENABLED(CONFIG_CRYPTO_PCBC)
			
 
				-static int ablk_pcbc_init(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))");
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
			
 
				-{
			
 
				-	aesni_ecb_enc(ctx, blks, blks, nbytes);
			
 
				-}
			
 
				 
			
 
				-static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
			
 
				-{
			
 
				-	aesni_ecb_dec(ctx, blks, blks, nbytes);
			
 
				-}
			
 
				-
			
 
				-static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
			
 
				+static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				 			    unsigned int keylen)
			
 
				 {
			
 
				-	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	int err;
			
 
				 
			
 
				-	err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
			
 
				-				 keylen - AES_BLOCK_SIZE);
			
 
				+	err = xts_verify_key(tfm, key, keylen);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				-	return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
			
 
				-}
			
 
				-
			
 
				-static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-
			
 
				-	lrw_free_table(&ctx->lrw_table);
			
 
				-}
			
 
				-
			
 
				-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				-{
			
 
				-	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	be128 buf[8];
			
 
				-	struct lrw_crypt_req req = {
			
 
				-		.tbuf = buf,
			
 
				-		.tbuflen = sizeof(buf),
			
 
				-
			
 
				-		.table_ctx = &ctx->lrw_table,
			
 
				-		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
			
 
				-		.crypt_fn = lrw_xts_encrypt_callback,
			
 
				-	};
			
 
				-	int ret;
			
 
				-
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				-{
			
 
				-	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	be128 buf[8];
			
 
				-	struct lrw_crypt_req req = {
			
 
				-		.tbuf = buf,
			
 
				-		.tbuflen = sizeof(buf),
			
 
				-
			
 
				-		.table_ctx = &ctx->lrw_table,
			
 
				-		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
			
 
				-		.crypt_fn = lrw_xts_decrypt_callback,
			
 
				-	};
			
 
				-	int ret;
			
 
				-
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
			
 
				-			    unsigned int keylen)
			
 
				-{
			
 
				-	struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	int err;
			
 
				-
			
 
				-	err = xts_check_key(tfm, key, keylen);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				+	keylen /= 2;
			
 
				 
			
 
				 	/* first half of xts-key is for crypt */
			
 
				-	err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
			
 
				+	err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
			
 
				+				 key, keylen);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				 	/* second half of xts-key is for tweak */
			
 
				-	return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
			
 
				-				  keylen / 2);
			
 
				+	return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
			
 
				+				  key + keylen, keylen);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -650,8 +542,6 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
 
				 	aesni_enc(ctx, out, in);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_X86_64
			
 
				-
			
 
				 static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
			
 
				 {
			
 
				 	glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
			
@@ -698,83 +588,28 @@ static const struct common_glue_ctx aesni_dec_xts = {
 
				 	} }
			
 
				 };
			
 
				 
			
 
				-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int xts_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				-	return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes,
			
 
				-				     XTS_TWEAK_CAST(aesni_xts_tweak),
			
 
				-				     aes_ctx(ctx->raw_tweak_ctx),
			
 
				-				     aes_ctx(ctx->raw_crypt_ctx));
			
 
				+	return glue_xts_req_128bit(&aesni_enc_xts, req,
			
 
				+				   XTS_TWEAK_CAST(aesni_xts_tweak),
			
 
				+				   aes_ctx(ctx->raw_tweak_ctx),
			
 
				+				   aes_ctx(ctx->raw_crypt_ctx));
			
 
				 }
			
 
				 
			
 
				-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				+static int xts_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-
			
 
				-	return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes,
			
 
				-				     XTS_TWEAK_CAST(aesni_xts_tweak),
			
 
				-				     aes_ctx(ctx->raw_tweak_ctx),
			
 
				-				     aes_ctx(ctx->raw_crypt_ctx));
			
 
				-}
			
 
				-
			
 
				-#else
			
 
				-
			
 
				-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				-{
			
 
				-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	be128 buf[8];
			
 
				-	struct xts_crypt_req req = {
			
 
				-		.tbuf = buf,
			
 
				-		.tbuflen = sizeof(buf),
			
 
				-
			
 
				-		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
			
 
				-		.tweak_fn = aesni_xts_tweak,
			
 
				-		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
			
 
				-		.crypt_fn = lrw_xts_encrypt_callback,
			
 
				-	};
			
 
				-	int ret;
			
 
				-
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	ret = xts_crypt(desc, dst, src, nbytes, &req);
			
 
				-	kernel_fpu_end();
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		       struct scatterlist *src, unsigned int nbytes)
			
 
				-{
			
 
				-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	be128 buf[8];
			
 
				-	struct xts_crypt_req req = {
			
 
				-		.tbuf = buf,
			
 
				-		.tbuflen = sizeof(buf),
			
 
				-
			
 
				-		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
			
 
				-		.tweak_fn = aesni_xts_tweak,
			
 
				-		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
			
 
				-		.crypt_fn = lrw_xts_decrypt_callback,
			
 
				-	};
			
 
				-	int ret;
			
 
				-
			
 
				-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				-
			
 
				-	kernel_fpu_begin();
			
 
				-	ret = xts_crypt(desc, dst, src, nbytes, &req);
			
 
				-	kernel_fpu_end();
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				-	return ret;
			
 
				+	return glue_xts_req_128bit(&aesni_dec_xts, req,
			
 
				+				   XTS_TWEAK_CAST(aesni_xts_tweak),
			
 
				+				   aes_ctx(ctx->raw_tweak_ctx),
			
 
				+				   aes_ctx(ctx->raw_crypt_ctx));
			
 
				 }
			
 
				 
			
 
				-#endif
			
 
				-
			
 
				-#ifdef CONFIG_X86_64
			
 
				 static int rfc4106_init(struct crypto_aead *aead)
			
 
				 {
			
 
				 	struct cryptd_aead *cryptd_tfm;
			
@@ -1077,9 +912,7 @@ static struct crypto_alg aesni_algs[] = { {
 
				 	.cra_priority		= 300,
			
 
				 	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
			
 
				 	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
			
 
				-				  AESNI_ALIGN - 1,
			
 
				-	.cra_alignmask		= 0,
			
 
				+	.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
			
 
				 	.cra_module		= THIS_MODULE,
			
 
				 	.cra_u	= {
			
 
				 		.cipher	= {
			
@@ -1091,14 +924,12 @@ static struct crypto_alg aesni_algs[] = { {
 
				 		}
			
 
				 	}
			
 
				 }, {
			
 
				-	.cra_name		= "__aes-aesni",
			
 
				-	.cra_driver_name	= "__driver-aes-aesni",
			
 
				-	.cra_priority		= 0,
			
 
				+	.cra_name		= "__aes",
			
 
				+	.cra_driver_name	= "__aes-aesni",
			
 
				+	.cra_priority		= 300,
			
 
				 	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
			
 
				 	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
			
 
				-				  AESNI_ALIGN - 1,
			
 
				-	.cra_alignmask		= 0,
			
 
				+	.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
			
 
				 	.cra_module		= THIS_MODULE,
			
 
				 	.cra_u	= {
			
 
				 		.cipher	= {
			
@@ -1109,250 +940,94 @@ static struct crypto_alg aesni_algs[] = { {
 
				 			.cia_decrypt		= __aes_decrypt
			
 
				 		}
			
 
				 	}
			
 
				-}, {
			
 
				-	.cra_name		= "__ecb-aes-aesni",
			
 
				-	.cra_driver_name	= "__driver-ecb-aes-aesni",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
			
 
				-				  AESNI_ALIGN - 1,
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_u = {
			
 
				-		.blkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.setkey		= aes_set_key,
			
 
				-			.encrypt	= ecb_encrypt,
			
 
				-			.decrypt	= ecb_decrypt,
			
 
				-		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "__cbc-aes-aesni",
			
 
				-	.cra_driver_name	= "__driver-cbc-aes-aesni",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
			
 
				-				  AESNI_ALIGN - 1,
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_u = {
			
 
				-		.blkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.setkey		= aes_set_key,
			
 
				-			.encrypt	= cbc_encrypt,
			
 
				-			.decrypt	= cbc_decrypt,
			
 
				-		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "ecb(aes)",
			
 
				-	.cra_driver_name	= "ecb-aes-aesni",
			
 
				-	.cra_priority		= 400,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_ecb_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_u = {
			
 
				-		.ablkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.setkey		= ablk_set_key,
			
 
				-			.encrypt	= ablk_encrypt,
			
 
				-			.decrypt	= ablk_decrypt,
			
 
				+} };
			
 
				+
			
 
				+static struct skcipher_alg aesni_skciphers[] = {
			
 
				+	{
			
 
				+		.base = {
			
 
				+			.cra_name		= "__ecb(aes)",
			
 
				+			.cra_driver_name	= "__ecb-aes-aesni",
			
 
				+			.cra_priority		= 400,
			
 
				+			.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+			.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
			
 
				+			.cra_module		= THIS_MODULE,
			
 
				 		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "cbc(aes)",
			
 
				-	.cra_driver_name	= "cbc-aes-aesni",
			
 
				-	.cra_priority		= 400,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_cbc_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_u = {
			
 
				-		.ablkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= ablk_set_key,
			
 
				-			.encrypt	= ablk_encrypt,
			
 
				-			.decrypt	= ablk_decrypt,
			
 
				+		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+		.setkey		= aesni_skcipher_setkey,
			
 
				+		.encrypt	= ecb_encrypt,
			
 
				+		.decrypt	= ecb_decrypt,
			
 
				+	}, {
			
 
				+		.base = {
			
 
				+			.cra_name		= "__cbc(aes)",
			
 
				+			.cra_driver_name	= "__cbc-aes-aesni",
			
 
				+			.cra_priority		= 400,
			
 
				+			.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+			.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
			
 
				+			.cra_module		= THIS_MODULE,
			
 
				 		},
			
 
				-	},
			
 
				+		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+		.ivsize		= AES_BLOCK_SIZE,
			
 
				+		.setkey		= aesni_skcipher_setkey,
			
 
				+		.encrypt	= cbc_encrypt,
			
 
				+		.decrypt	= cbc_decrypt,
			
 
				 #ifdef CONFIG_X86_64
			
 
				-}, {
			
 
				-	.cra_name		= "__ctr-aes-aesni",
			
 
				-	.cra_driver_name	= "__driver-ctr-aes-aesni",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
			
 
				-				  AESNI_ALIGN - 1,
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_u = {
			
 
				-		.blkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= aes_set_key,
			
 
				-			.encrypt	= ctr_crypt,
			
 
				-			.decrypt	= ctr_crypt,
			
 
				+	}, {
			
 
				+		.base = {
			
 
				+			.cra_name		= "__ctr(aes)",
			
 
				+			.cra_driver_name	= "__ctr-aes-aesni",
			
 
				+			.cra_priority		= 400,
			
 
				+			.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+			.cra_blocksize		= 1,
			
 
				+			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
			
 
				+			.cra_module		= THIS_MODULE,
			
 
				 		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "ctr(aes)",
			
 
				-	.cra_driver_name	= "ctr-aes-aesni",
			
 
				-	.cra_priority		= 400,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= 1,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_ctr_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_u = {
			
 
				-		.ablkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= ablk_set_key,
			
 
				-			.encrypt	= ablk_encrypt,
			
 
				-			.decrypt	= ablk_encrypt,
			
 
				-			.geniv		= "chainiv",
			
 
				+		.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				+		.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				+		.ivsize		= AES_BLOCK_SIZE,
			
 
				+		.chunksize	= AES_BLOCK_SIZE,
			
 
				+		.setkey		= aesni_skcipher_setkey,
			
 
				+		.encrypt	= ctr_crypt,
			
 
				+		.decrypt	= ctr_crypt,
			
 
				+	}, {
			
 
				+		.base = {
			
 
				+			.cra_name		= "__xts(aes)",
			
 
				+			.cra_driver_name	= "__xts-aes-aesni",
			
 
				+			.cra_priority		= 401,
			
 
				+			.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+			.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+			.cra_ctxsize		= XTS_AES_CTX_SIZE,
			
 
				+			.cra_module		= THIS_MODULE,
			
 
				 		},
			
 
				-	},
			
 
				+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				+		.ivsize		= AES_BLOCK_SIZE,
			
 
				+		.setkey		= xts_aesni_setkey,
			
 
				+		.encrypt	= xts_encrypt,
			
 
				+		.decrypt	= xts_decrypt,
			
 
				 #endif
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
			
 
				+
			
 
				+struct {
			
 
				+	const char *algname;
			
 
				+	const char *drvname;
			
 
				+	const char *basename;
			
 
				+	struct simd_skcipher_alg *simd;
			
 
				+} aesni_simd_skciphers2[] = {
			
 
				 #if IS_ENABLED(CONFIG_CRYPTO_PCBC)
			
 
				-}, {
			
 
				-	.cra_name		= "pcbc(aes)",
			
 
				-	.cra_driver_name	= "pcbc-aes-aesni",
			
 
				-	.cra_priority		= 400,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_pcbc_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_u = {
			
 
				-		.ablkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= ablk_set_key,
			
 
				-			.encrypt	= ablk_encrypt,
			
 
				-			.decrypt	= ablk_decrypt,
			
 
				-		},
			
 
				+	{
			
 
				+		.algname	= "pcbc(aes)",
			
 
				+		.drvname	= "pcbc-aes-aesni",
			
 
				+		.basename	= "fpu(pcbc(__aes-aesni))",
			
 
				 	},
			
 
				 #endif
			
 
				-}, {
			
 
				-	.cra_name		= "__lrw-aes-aesni",
			
 
				-	.cra_driver_name	= "__driver-lrw-aes-aesni",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct aesni_lrw_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_exit		= lrw_aesni_exit_tfm,
			
 
				-	.cra_u = {
			
 
				-		.blkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= lrw_aesni_setkey,
			
 
				-			.encrypt	= lrw_encrypt,
			
 
				-			.decrypt	= lrw_decrypt,
			
 
				-		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "__xts-aes-aesni",
			
 
				-	.cra_driver_name	= "__driver-xts-aes-aesni",
			
 
				-	.cra_priority		= 0,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
			
 
				-				  CRYPTO_ALG_INTERNAL,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct aesni_xts_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_blkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_u = {
			
 
				-		.blkcipher = {
			
 
				-			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= xts_aesni_setkey,
			
 
				-			.encrypt	= xts_encrypt,
			
 
				-			.decrypt	= xts_decrypt,
			
 
				-		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "lrw(aes)",
			
 
				-	.cra_driver_name	= "lrw-aes-aesni",
			
 
				-	.cra_priority		= 400,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_u = {
			
 
				-		.ablkcipher = {
			
 
				-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
			
 
				-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= ablk_set_key,
			
 
				-			.encrypt	= ablk_encrypt,
			
 
				-			.decrypt	= ablk_decrypt,
			
 
				-		},
			
 
				-	},
			
 
				-}, {
			
 
				-	.cra_name		= "xts(aes)",
			
 
				-	.cra_driver_name	= "xts-aes-aesni",
			
 
				-	.cra_priority		= 400,
			
 
				-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				-	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
			
 
				-	.cra_alignmask		= 0,
			
 
				-	.cra_type		= &crypto_ablkcipher_type,
			
 
				-	.cra_module		= THIS_MODULE,
			
 
				-	.cra_init		= ablk_init,
			
 
				-	.cra_exit		= ablk_exit,
			
 
				-	.cra_u = {
			
 
				-		.ablkcipher = {
			
 
				-			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				-			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				-			.ivsize		= AES_BLOCK_SIZE,
			
 
				-			.setkey		= ablk_set_key,
			
 
				-			.encrypt	= ablk_encrypt,
			
 
				-			.decrypt	= ablk_decrypt,
			
 
				-		},
			
 
				-	},
			
 
				-} };
			
 
				+};
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 static struct aead_alg aesni_aead_algs[] = { {
			
@@ -1401,9 +1076,27 @@ static const struct x86_cpu_id aesni_cpu_id[] = {
 
				 };
			
 
				 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
			
 
				 
			
 
				+static void aesni_free_simds(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
			
 
				+		    aesni_simd_skciphers[i]; i++)
			
 
				+		simd_skcipher_free(aesni_simd_skciphers[i]);
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) &&
			
 
				+		    aesni_simd_skciphers2[i].simd; i++)
			
 
				+		simd_skcipher_free(aesni_simd_skciphers2[i].simd);
			
 
				+}
			
 
				+
			
 
				 static int __init aesni_init(void)
			
 
				 {
			
 
				+	struct simd_skcipher_alg *simd;
			
 
				+	const char *basename;
			
 
				+	const char *algname;
			
 
				+	const char *drvname;
			
 
				 	int err;
			
 
				+	int i;
			
 
				 
			
 
				 	if (!x86_match_cpu(aesni_cpu_id))
			
 
				 		return -ENODEV;
			
@@ -1445,13 +1138,48 @@ static int __init aesni_init(void)
 
				 	if (err)
			
 
				 		goto fpu_exit;
			
 
				 
			
 
				+	err = crypto_register_skciphers(aesni_skciphers,
			
 
				+					ARRAY_SIZE(aesni_skciphers));
			
 
				+	if (err)
			
 
				+		goto unregister_algs;
			
 
				+
			
 
				 	err = crypto_register_aeads(aesni_aead_algs,
			
 
				 				    ARRAY_SIZE(aesni_aead_algs));
			
 
				 	if (err)
			
 
				-		goto unregister_algs;
			
 
				+		goto unregister_skciphers;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) {
			
 
				+		algname = aesni_skciphers[i].base.cra_name + 2;
			
 
				+		drvname = aesni_skciphers[i].base.cra_driver_name + 2;
			
 
				+		basename = aesni_skciphers[i].base.cra_driver_name;
			
 
				+		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+		err = PTR_ERR(simd);
			
 
				+		if (IS_ERR(simd))
			
 
				+			goto unregister_simds;
			
 
				+
			
 
				+		aesni_simd_skciphers[i] = simd;
			
 
				+	}
			
 
				 
			
 
				-	return err;
			
 
				+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
			
 
				+		algname = aesni_simd_skciphers2[i].algname;
			
 
				+		drvname = aesni_simd_skciphers2[i].drvname;
			
 
				+		basename = aesni_simd_skciphers2[i].basename;
			
 
				+		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+		err = PTR_ERR(simd);
			
 
				+		if (IS_ERR(simd))
			
 
				+			goto unregister_simds;
			
 
				 
			
 
				+		aesni_simd_skciphers2[i].simd = simd;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+unregister_simds:
			
 
				+	aesni_free_simds();
			
 
				+	crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
			
 
				+unregister_skciphers:
			
 
				+	crypto_unregister_skciphers(aesni_skciphers,
			
 
				+				    ARRAY_SIZE(aesni_skciphers));
			
 
				 unregister_algs:
			
 
				 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
			
 
				 fpu_exit:
			
@@ -1461,7 +1189,10 @@ static int __init aesni_init(void)
 
				 
			
 
				 static void __exit aesni_exit(void)
			
 
				 {
			
 
				+	aesni_free_simds();
			
 
				 	crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
			
 
				+	crypto_unregister_skciphers(aesni_skciphers,
			
 
				+				    ARRAY_SIZE(aesni_skciphers));
			
 
				 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
			
 
				 
			
 
				 	crypto_fpu_exit();
			
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -11,143 +11,186 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/slab.h>
			
 
				-#include <linux/crypto.h>
			
 
				 #include <asm/fpu/api.h>
			
 
				 
			
 
				 struct crypto_fpu_ctx {
			
 
				-	struct crypto_blkcipher *child;
			
 
				+	struct crypto_skcipher *child;
			
 
				 };
			
 
				 
			
 
				-static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
			
 
				+static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				 			     unsigned int keylen)
			
 
				 {
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
			
 
				-	struct crypto_blkcipher *child = ctx->child;
			
 
				+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				 	int err;
			
 
				 
			
 
				-	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
			
 
				-				   CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_blkcipher_setkey(child, key, keylen);
			
 
				-	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
			
 
				-				     CRYPTO_TFM_RES_MASK);
			
 
				+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	err = crypto_skcipher_setkey(child, key, keylen);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
			
 
				-			      struct scatterlist *dst, struct scatterlist *src,
			
 
				-			      unsigned int nbytes)
			
 
				+static int crypto_fpu_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				 	int err;
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
			
 
				-	struct crypto_blkcipher *child = ctx->child;
			
 
				-	struct blkcipher_desc desc = {
			
 
				-		.tfm = child,
			
 
				-		.info = desc_in->info,
			
 
				-		.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-	};
			
 
				+
			
 
				+	skcipher_request_set_tfm(subreq, child);
			
 
				+	skcipher_request_set_callback(subreq, 0, NULL, NULL);
			
 
				+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
 
				+				   req->iv);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				-	err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
			
 
				+	err = crypto_skcipher_encrypt(subreq);
			
 
				 	kernel_fpu_end();
			
 
				+
			
 
				+	skcipher_request_zero(subreq);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
			
 
				-			      struct scatterlist *dst, struct scatterlist *src,
			
 
				-			      unsigned int nbytes)
			
 
				+static int crypto_fpu_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				 	int err;
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
			
 
				-	struct crypto_blkcipher *child = ctx->child;
			
 
				-	struct blkcipher_desc desc = {
			
 
				-		.tfm = child,
			
 
				-		.info = desc_in->info,
			
 
				-		.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				-	};
			
 
				+
			
 
				+	skcipher_request_set_tfm(subreq, child);
			
 
				+	skcipher_request_set_callback(subreq, 0, NULL, NULL);
			
 
				+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
 
				+				   req->iv);
			
 
				 
			
 
				 	kernel_fpu_begin();
			
 
				-	err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
			
 
				+	err = crypto_skcipher_decrypt(subreq);
			
 
				 	kernel_fpu_end();
			
 
				+
			
 
				+	skcipher_request_zero(subreq);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
			
 
				+static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
			
 
				-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	struct crypto_blkcipher *cipher;
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher_spawn *spawn;
			
 
				+	struct crypto_skcipher *cipher;
			
 
				 
			
 
				-	cipher = crypto_spawn_blkcipher(spawn);
			
 
				+	spawn = skcipher_instance_ctx(inst);
			
 
				+	cipher = crypto_spawn_skcipher(spawn);
			
 
				 	if (IS_ERR(cipher))
			
 
				 		return PTR_ERR(cipher);
			
 
				 
			
 
				 	ctx->child = cipher;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
			
 
				+static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	crypto_free_skcipher(ctx->child);
			
 
				+}
			
 
				+
			
 
				+static void crypto_fpu_free(struct skcipher_instance *inst)
			
 
				 {
			
 
				-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	crypto_free_blkcipher(ctx->child);
			
 
				+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				+	kfree(inst);
			
 
				 }
			
 
				 
			
 
				-static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
			
 
				+static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				 {
			
 
				-	struct crypto_instance *inst;
			
 
				-	struct crypto_alg *alg;
			
 
				+	struct crypto_skcipher_spawn *spawn;
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct crypto_attr_type *algt;
			
 
				+	struct skcipher_alg *alg;
			
 
				+	const char *cipher_name;
			
 
				 	int err;
			
 
				 
			
 
				-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
			
 
				+	algt = crypto_get_attr_type(tb);
			
 
				+	if (IS_ERR(algt))
			
 
				+		return PTR_ERR(algt);
			
 
				+
			
 
				+	if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
			
 
				+	    algt->mask)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (!(algt->mask & CRYPTO_ALG_INTERNAL))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	cipher_name = crypto_attr_alg_name(tb[1]);
			
 
				+	if (IS_ERR(cipher_name))
			
 
				+		return PTR_ERR(cipher_name);
			
 
				+
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	spawn = skcipher_instance_ctx(inst);
			
 
				+
			
 
				+	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
			
 
				+	err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
			
 
				+				   CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
			
 
				 	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				-
			
 
				-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
			
 
				-				  CRYPTO_ALG_TYPE_MASK);
			
 
				-	if (IS_ERR(alg))
			
 
				-		return ERR_CAST(alg);
			
 
				-
			
 
				-	inst = crypto_alloc_instance("fpu", alg);
			
 
				-	if (IS_ERR(inst))
			
 
				-		goto out_put_alg;
			
 
				-
			
 
				-	inst->alg.cra_flags = alg->cra_flags;
			
 
				-	inst->alg.cra_priority = alg->cra_priority;
			
 
				-	inst->alg.cra_blocksize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_alignmask = alg->cra_alignmask;
			
 
				-	inst->alg.cra_type = alg->cra_type;
			
 
				-	inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
			
 
				-	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
			
 
				-	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
			
 
				-	inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
			
 
				-	inst->alg.cra_init = crypto_fpu_init_tfm;
			
 
				-	inst->alg.cra_exit = crypto_fpu_exit_tfm;
			
 
				-	inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
			
 
				-	inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
			
 
				-	inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
			
 
				-
			
 
				-out_put_alg:
			
 
				-	crypto_mod_put(alg);
			
 
				-	return inst;
			
 
				-}
			
 
				+		goto out_free_inst;
			
 
				 
			
 
				-static void crypto_fpu_free(struct crypto_instance *inst)
			
 
				-{
			
 
				-	crypto_drop_spawn(crypto_instance_ctx(inst));
			
 
				+	alg = crypto_skcipher_spawn_alg(spawn);
			
 
				+
			
 
				+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
			
 
				+				  &alg->base);
			
 
				+	if (err)
			
 
				+		goto out_drop_skcipher;
			
 
				+
			
 
				+	inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
			
 
				+	inst->alg.base.cra_priority = alg->base.cra_priority;
			
 
				+	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
			
 
				+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
			
 
				+
			
 
				+	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
			
 
				+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
			
 
				+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
			
 
				+
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
			
 
				+
			
 
				+	inst->alg.init = crypto_fpu_init_tfm;
			
 
				+	inst->alg.exit = crypto_fpu_exit_tfm;
			
 
				+
			
 
				+	inst->alg.setkey = crypto_fpu_setkey;
			
 
				+	inst->alg.encrypt = crypto_fpu_encrypt;
			
 
				+	inst->alg.decrypt = crypto_fpu_decrypt;
			
 
				+
			
 
				+	inst->free = crypto_fpu_free;
			
 
				+
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err)
			
 
				+		goto out_drop_skcipher;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+
			
 
				+out_drop_skcipher:
			
 
				+	crypto_drop_skcipher(spawn);
			
 
				+out_free_inst:
			
 
				 	kfree(inst);
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 static struct crypto_template crypto_fpu_tmpl = {
			
 
				 	.name = "fpu",
			
 
				-	.alloc = crypto_fpu_alloc,
			
 
				-	.free = crypto_fpu_free,
			
 
				+	.create = crypto_fpu_create,
			
 
				 	.module = THIS_MODULE,
			
 
				 };
			
 
				 
			
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -27,10 +27,10 @@
 
				 
			
 
				 #include <linux/module.h>
			
 
				 #include <crypto/b128ops.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <crypto/lrw.h>
			
 
				 #include <crypto/xts.h>
			
 
				 #include <asm/crypto/glue_helper.h>
			
 
				-#include <crypto/scatterwalk.h>
			
 
				 
			
 
				 static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
			
 
				 				   struct blkcipher_desc *desc,
			
@@ -339,6 +339,41 @@ static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 
				 	return nbytes;
			
 
				 }
			
 
				 
			
 
				+static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
			
 
				+					  void *ctx,
			
 
				+					  struct skcipher_walk *walk)
			
 
				+{
			
 
				+	const unsigned int bsize = 128 / 8;
			
 
				+	unsigned int nbytes = walk->nbytes;
			
 
				+	u128 *src = walk->src.virt.addr;
			
 
				+	u128 *dst = walk->dst.virt.addr;
			
 
				+	unsigned int num_blocks, func_bytes;
			
 
				+	unsigned int i;
			
 
				+
			
 
				+	/* Process multi-block batch */
			
 
				+	for (i = 0; i < gctx->num_funcs; i++) {
			
 
				+		num_blocks = gctx->funcs[i].num_blocks;
			
 
				+		func_bytes = bsize * num_blocks;
			
 
				+
			
 
				+		if (nbytes >= func_bytes) {
			
 
				+			do {
			
 
				+				gctx->funcs[i].fn_u.xts(ctx, dst, src,
			
 
				+							walk->iv);
			
 
				+
			
 
				+				src += num_blocks;
			
 
				+				dst += num_blocks;
			
 
				+				nbytes -= func_bytes;
			
 
				+			} while (nbytes >= func_bytes);
			
 
				+
			
 
				+			if (nbytes < bsize)
			
 
				+				goto done;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+done:
			
 
				+	return nbytes;
			
 
				+}
			
 
				+
			
 
				 /* for implementations implementing faster XTS IV generator */
			
 
				 int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
			
 
				 			  struct blkcipher_desc *desc, struct scatterlist *dst,
			
@@ -379,6 +414,43 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
			
 
				 
			
 
				+int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
			
 
				+			struct skcipher_request *req,
			
 
				+			common_glue_func_t tweak_fn, void *tweak_ctx,
			
 
				+			void *crypt_ctx)
			
 
				+{
			
 
				+	const unsigned int bsize = 128 / 8;
			
 
				+	struct skcipher_walk walk;
			
 
				+	bool fpu_enabled = false;
			
 
				+	unsigned int nbytes;
			
 
				+	int err;
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				+	nbytes = walk.nbytes;
			
 
				+	if (!nbytes)
			
 
				+		return err;
			
 
				+
			
 
				+	/* set minimum length to bsize, for tweak_fn */
			
 
				+	fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
			
 
				+					    &walk, fpu_enabled,
			
 
				+					    nbytes < bsize ? bsize : nbytes);
			
 
				+
			
 
				+	/* calculate first value of T */
			
 
				+	tweak_fn(tweak_ctx, walk.iv, walk.iv);
			
 
				+
			
 
				+	while (nbytes) {
			
 
				+		nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
			
 
				+
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				+		nbytes = walk.nbytes;
			
 
				+	}
			
 
				+
			
 
				+	glue_fpu_end(fpu_enabled);
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
			
 
				+
			
 
				 void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
			
 
				 			       common_glue_func_t fn)
			
 
				 {
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -114,7 +114,7 @@ static inline void sha1_init_digest(uint32_t *digest)
 
				 }
			
 
				 
			
 
				 static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
			
 
				-			 uint32_t total_len)
			
 
				+			 uint64_t total_len)
			
 
				 {
			
 
				 	uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
			
 
				 
			
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -125,7 +125,7 @@ struct sha1_hash_ctx {
 
				 	/* error flag */
			
 
				 	int error;
			
 
				 
			
 
				-	uint32_t	total_length;
			
 
				+	uint64_t	total_length;
			
 
				 	const void	*incoming_buffer;
			
 
				 	uint32_t	incoming_buffer_length;
			
 
				 	uint8_t		partial_block_buffer[SHA1_BLOCK_SIZE * 2];
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -115,7 +115,7 @@ inline void sha256_init_digest(uint32_t *digest)
 
				 }
			
 
				 
			
 
				 inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
			
 
				-			 uint32_t total_len)
			
 
				+			 uint64_t total_len)
			
 
				 {
			
 
				 	uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
			
 
				 
			
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -125,7 +125,7 @@ struct sha256_hash_ctx {
 
				 	/* error flag */
			
 
				 	int error;
			
 
				 
			
 
				-	uint32_t	total_length;
			
 
				+	uint64_t	total_length;
			
 
				 	const void	*incoming_buffer;
			
 
				 	uint32_t	incoming_buffer_length;
			
 
				 	uint8_t		partial_block_buffer[SHA256_BLOCK_SIZE * 2];
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -117,7 +117,7 @@ inline void sha512_init_digest(uint64_t *digest)
 
				 }
			
 
				 
			
 
				 inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
			
 
				-			 uint32_t total_len)
			
 
				+			 uint64_t total_len)
			
 
				 {
			
 
				 	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
			
 
				 
			
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -119,7 +119,7 @@ struct sha512_hash_ctx {
 
				 	/* error flag */
			
 
				 	int error;
			
 
				 
			
 
				-	uint32_t        total_length;
			
 
				+	uint64_t        total_length;
			
 
				 	const void      *incoming_buffer;
			
 
				 	uint32_t        incoming_buffer_length;
			
 
				 	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
			
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -5,8 +5,8 @@
 
				 #ifndef _CRYPTO_GLUE_HELPER_H
			
 
				 #define _CRYPTO_GLUE_HELPER_H
			
 
				 
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/kernel.h>
			
 
				-#include <linux/crypto.h>
			
 
				 #include <asm/fpu/api.h>
			
 
				 #include <crypto/b128ops.h>
			
 
				 
			
@@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
			
 
				+					 int fpu_blocks_limit,
			
 
				+					 struct skcipher_walk *walk,
			
 
				+					 bool fpu_enabled, unsigned int nbytes)
			
 
				+{
			
 
				+	if (likely(fpu_blocks_limit < 0))
			
 
				+		return false;
			
 
				+
			
 
				+	if (fpu_enabled)
			
 
				+		return true;
			
 
				+
			
 
				+	/*
			
 
				+	 * Vector-registers are only used when chunk to be processed is large
			
 
				+	 * enough, so do not enable FPU until it is necessary.
			
 
				+	 */
			
 
				+	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
			
 
				+		return false;
			
 
				+
			
 
				+	/* prevent sleeping if FPU is in use */
			
 
				+	skcipher_walk_atomise(walk);
			
 
				+
			
 
				+	kernel_fpu_begin();
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 static inline void glue_fpu_end(bool fpu_enabled)
			
 
				 {
			
 
				 	if (fpu_enabled)
			
@@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 
				 				 common_glue_func_t tweak_fn, void *tweak_ctx,
			
 
				 				 void *crypt_ctx);
			
 
				 
			
 
				+extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
			
 
				+				 struct blkcipher_desc *desc,
			
 
				+				 struct scatterlist *dst,
			
 
				+				 struct scatterlist *src, unsigned int nbytes,
			
 
				+				 common_glue_func_t tweak_fn, void *tweak_ctx,
			
 
				+				 void *crypt_ctx);
			
 
				+
			
 
				+extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
			
 
				+			       struct skcipher_request *req,
			
 
				+			       common_glue_func_t tweak_fn, void *tweak_ctx,
			
 
				+			       void *crypt_ctx);
			
 
				+
			
 
				 extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
			
 
				 				      le128 *iv, common_glue_func_t fn);
			
 
				 
			
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -31,11 +31,46 @@
 
				 #include <linux/module.h>
			
 
				 #include <linux/crypto.h>
			
 
				 #include <linux/sw842.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				 
			
 
				 struct crypto842_ctx {
			
 
				-	char wmem[SW842_MEM_COMPRESS];	/* working memory for compress */
			
 
				+	void *wmem;	/* working memory for compress */
			
 
				 };
			
 
				 
			
 
				+static void *crypto842_alloc_ctx(struct crypto_scomp *tfm)
			
 
				+{
			
 
				+	void *ctx;
			
 
				+
			
 
				+	ctx = kmalloc(SW842_MEM_COMPRESS, GFP_KERNEL);
			
 
				+	if (!ctx)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	return ctx;
			
 
				+}
			
 
				+
			
 
				+static int crypto842_init(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	ctx->wmem = crypto842_alloc_ctx(NULL);
			
 
				+	if (IS_ERR(ctx->wmem))
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void crypto842_free_ctx(struct crypto_scomp *tfm, void *ctx)
			
 
				+{
			
 
				+	kfree(ctx);
			
 
				+}
			
 
				+
			
 
				+static void crypto842_exit(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	crypto842_free_ctx(NULL, ctx->wmem);
			
 
				+}
			
 
				+
			
 
				 static int crypto842_compress(struct crypto_tfm *tfm,
			
 
				 			      const u8 *src, unsigned int slen,
			
 
				 			      u8 *dst, unsigned int *dlen)
			
@@ -45,6 +80,13 @@ static int crypto842_compress(struct crypto_tfm *tfm,
 
				 	return sw842_compress(src, slen, dst, dlen, ctx->wmem);
			
 
				 }
			
 
				 
			
 
				+static int crypto842_scompress(struct crypto_scomp *tfm,
			
 
				+			       const u8 *src, unsigned int slen,
			
 
				+			       u8 *dst, unsigned int *dlen, void *ctx)
			
 
				+{
			
 
				+	return sw842_compress(src, slen, dst, dlen, ctx);
			
 
				+}
			
 
				+
			
 
				 static int crypto842_decompress(struct crypto_tfm *tfm,
			
 
				 				const u8 *src, unsigned int slen,
			
 
				 				u8 *dst, unsigned int *dlen)
			
@@ -52,6 +94,13 @@ static int crypto842_decompress(struct crypto_tfm *tfm,
 
				 	return sw842_decompress(src, slen, dst, dlen);
			
 
				 }
			
 
				 
			
 
				+static int crypto842_sdecompress(struct crypto_scomp *tfm,
			
 
				+				 const u8 *src, unsigned int slen,
			
 
				+				 u8 *dst, unsigned int *dlen, void *ctx)
			
 
				+{
			
 
				+	return sw842_decompress(src, slen, dst, dlen);
			
 
				+}
			
 
				+
			
 
				 static struct crypto_alg alg = {
			
 
				 	.cra_name		= "842",
			
 
				 	.cra_driver_name	= "842-generic",
			
@@ -59,20 +108,48 @@ static struct crypto_alg alg = {
 
				 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
			
 
				 	.cra_ctxsize		= sizeof(struct crypto842_ctx),
			
 
				 	.cra_module		= THIS_MODULE,
			
 
				+	.cra_init		= crypto842_init,
			
 
				+	.cra_exit		= crypto842_exit,
			
 
				 	.cra_u			= { .compress = {
			
 
				 	.coa_compress		= crypto842_compress,
			
 
				 	.coa_decompress		= crypto842_decompress } }
			
 
				 };
			
 
				 
			
 
				+static struct scomp_alg scomp = {
			
 
				+	.alloc_ctx		= crypto842_alloc_ctx,
			
 
				+	.free_ctx		= crypto842_free_ctx,
			
 
				+	.compress		= crypto842_scompress,
			
 
				+	.decompress		= crypto842_sdecompress,
			
 
				+	.base			= {
			
 
				+		.cra_name	= "842",
			
 
				+		.cra_driver_name = "842-scomp",
			
 
				+		.cra_priority	 = 100,
			
 
				+		.cra_module	 = THIS_MODULE,
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				 static int __init crypto842_mod_init(void)
			
 
				 {
			
 
				-	return crypto_register_alg(&alg);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_register_alg(&alg);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = crypto_register_scomp(&scomp);
			
 
				+	if (ret) {
			
 
				+		crypto_unregister_alg(&alg);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 module_init(crypto842_mod_init);
			
 
				 
			
 
				 static void __exit crypto842_mod_exit(void)
			
 
				 {
			
 
				 	crypto_unregister_alg(&alg);
			
 
				+	crypto_unregister_scomp(&scomp);
			
 
				 }
			
 
				 module_exit(crypto842_mod_exit);
			
 
				 
			
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -24,7 +24,7 @@ comment "Crypto core or helper"
 
				 config CRYPTO_FIPS
			
 
				 	bool "FIPS 200 compliance"
			
 
				 	depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS
			
 
				-	depends on MODULE_SIG
			
 
				+	depends on (MODULE_SIG || !MODULES)
			
 
				 	help
			
 
				 	  This options enables the fips boot option which is
			
 
				 	  required if you want to system to operate in a FIPS 200
			
@@ -102,6 +102,15 @@ config CRYPTO_KPP
 
				 	select CRYPTO_ALGAPI
			
 
				 	select CRYPTO_KPP2
			
 
				 
			
 
				+config CRYPTO_ACOMP2
			
 
				+	tristate
			
 
				+	select CRYPTO_ALGAPI2
			
 
				+
			
 
				+config CRYPTO_ACOMP
			
 
				+	tristate
			
 
				+	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_ACOMP2
			
 
				+
			
 
				 config CRYPTO_RSA
			
 
				 	tristate "RSA algorithm"
			
 
				 	select CRYPTO_AKCIPHER
			
@@ -138,6 +147,7 @@ config CRYPTO_MANAGER2
 
				 	select CRYPTO_BLKCIPHER2
			
 
				 	select CRYPTO_AKCIPHER2
			
 
				 	select CRYPTO_KPP2
			
 
				+	select CRYPTO_ACOMP2
			
 
				 
			
 
				 config CRYPTO_USER
			
 
				 	tristate "Userspace cryptographic algorithm configuration"
			
@@ -236,10 +246,14 @@ config CRYPTO_ABLK_HELPER
 
				 	tristate
			
 
				 	select CRYPTO_CRYPTD
			
 
				 
			
 
				+config CRYPTO_SIMD
			
 
				+	tristate
			
 
				+	select CRYPTO_CRYPTD
			
 
				+
			
 
				 config CRYPTO_GLUE_HELPER_X86
			
 
				 	tristate
			
 
				 	depends on X86
			
 
				-	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_BLKCIPHER
			
 
				 
			
 
				 config CRYPTO_ENGINE
			
 
				 	tristate
			
@@ -437,7 +451,7 @@ config CRYPTO_CRC32C_INTEL
 
				 	  gain performance compared with software implementation.
			
 
				 	  Module will be crc32c-intel.
			
 
				 
			
 
				-config CRYPT_CRC32C_VPMSUM
			
 
				+config CRYPTO_CRC32C_VPMSUM
			
 
				 	tristate "CRC32c CRC algorithm (powerpc64)"
			
 
				 	depends on PPC64 && ALTIVEC
			
 
				 	select CRYPTO_HASH
			
@@ -928,14 +942,13 @@ config CRYPTO_AES_X86_64
 
				 config CRYPTO_AES_NI_INTEL
			
 
				 	tristate "AES cipher algorithms (AES-NI)"
			
 
				 	depends on X86
			
 
				+	select CRYPTO_AEAD
			
 
				 	select CRYPTO_AES_X86_64 if 64BIT
			
 
				 	select CRYPTO_AES_586 if !64BIT
			
 
				-	select CRYPTO_CRYPTD
			
 
				-	select CRYPTO_ABLK_HELPER
			
 
				 	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_BLKCIPHER
			
 
				 	select CRYPTO_GLUE_HELPER_X86 if 64BIT
			
 
				-	select CRYPTO_LRW
			
 
				-	select CRYPTO_XTS
			
 
				+	select CRYPTO_SIMD
			
 
				 	help
			
 
				 	  Use Intel AES-NI instructions for AES algorithm.
			
 
				 
			
@@ -1568,6 +1581,7 @@ comment "Compression"
 
				 config CRYPTO_DEFLATE
			
 
				 	tristate "Deflate compression algorithm"
			
 
				 	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_ACOMP2
			
 
				 	select ZLIB_INFLATE
			
 
				 	select ZLIB_DEFLATE
			
 
				 	help
			
@@ -1579,6 +1593,7 @@ config CRYPTO_DEFLATE
 
				 config CRYPTO_LZO
			
 
				 	tristate "LZO compression algorithm"
			
 
				 	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_ACOMP2
			
 
				 	select LZO_COMPRESS
			
 
				 	select LZO_DECOMPRESS
			
 
				 	help
			
@@ -1587,6 +1602,7 @@ config CRYPTO_LZO
 
				 config CRYPTO_842
			
 
				 	tristate "842 compression algorithm"
			
 
				 	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_ACOMP2
			
 
				 	select 842_COMPRESS
			
 
				 	select 842_DECOMPRESS
			
 
				 	help
			
@@ -1595,6 +1611,7 @@ config CRYPTO_842
 
				 config CRYPTO_LZ4
			
 
				 	tristate "LZ4 compression algorithm"
			
 
				 	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_ACOMP2
			
 
				 	select LZ4_COMPRESS
			
 
				 	select LZ4_DECOMPRESS
			
 
				 	help
			
@@ -1603,6 +1620,7 @@ config CRYPTO_LZ4
 
				 config CRYPTO_LZ4HC
			
 
				 	tristate "LZ4HC compression algorithm"
			
 
				 	select CRYPTO_ALGAPI
			
 
				+	select CRYPTO_ACOMP2
			
 
				 	select LZ4HC_COMPRESS
			
 
				 	select LZ4_DECOMPRESS
			
 
				 	help
			
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -51,6 +51,10 @@ rsa_generic-y += rsa_helper.o
 
				 rsa_generic-y += rsa-pkcs1pad.o
			
 
				 obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o
			
 
				 
			
 
				+crypto_acompress-y := acompress.o
			
 
				+crypto_acompress-y += scompress.o
			
 
				+obj-$(CONFIG_CRYPTO_ACOMP2) += crypto_acompress.o
			
 
				+
			
 
				 cryptomgr-y := algboss.o testmgr.o
			
 
				 
			
 
				 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
			
@@ -139,3 +143,5 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx/
 
				 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
			
 
				 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
			
 
				 obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
			
 
				+crypto_simd-y := simd.o
			
 
				+obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o
			
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -0,0 +1,169 @@
 
				+/*
			
 
				+ * Asynchronous Compression operations
			
 
				+ *
			
 
				+ * Copyright (c) 2016, Intel Corporation
			
 
				+ * Authors: Weigang Li <weigang.li@intel.com>
			
 
				+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms of the GNU General Public License as published by the Free
			
 
				+ * Software Foundation; either version 2 of the License, or (at your option)
			
 
				+ * any later version.
			
 
				+ *
			
 
				+ */
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/seq_file.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/crypto.h>
			
 
				+#include <crypto/algapi.h>
			
 
				+#include <linux/cryptouser.h>
			
 
				+#include <net/netlink.h>
			
 
				+#include <crypto/internal/acompress.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				+#include "internal.h"
			
 
				+
			
 
				+static const struct crypto_type crypto_acomp_type;
			
 
				+
			
 
				+#ifdef CONFIG_NET
			
 
				+static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_report_acomp racomp;
			
 
				+
			
 
				+	strncpy(racomp.type, "acomp", sizeof(racomp.type));
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
			
 
				+		    sizeof(struct crypto_report_acomp), &racomp))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+#else
			
 
				+static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	return -ENOSYS;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
			
 
				+	__attribute__ ((unused));
			
 
				+
			
 
				+static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
			
 
				+{
			
 
				+	seq_puts(m, "type         : acomp\n");
			
 
				+}
			
 
				+
			
 
				+static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
			
 
				+	struct acomp_alg *alg = crypto_acomp_alg(acomp);
			
 
				+
			
 
				+	alg->exit(acomp);
			
 
				+}
			
 
				+
			
 
				+static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
			
 
				+	struct acomp_alg *alg = crypto_acomp_alg(acomp);
			
 
				+
			
 
				+	if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
			
 
				+		return crypto_init_scomp_ops_async(tfm);
			
 
				+
			
 
				+	acomp->compress = alg->compress;
			
 
				+	acomp->decompress = alg->decompress;
			
 
				+	acomp->dst_free = alg->dst_free;
			
 
				+	acomp->reqsize = alg->reqsize;
			
 
				+
			
 
				+	if (alg->exit)
			
 
				+		acomp->base.exit = crypto_acomp_exit_tfm;
			
 
				+
			
 
				+	if (alg->init)
			
 
				+		return alg->init(acomp);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static unsigned int crypto_acomp_extsize(struct crypto_alg *alg)
			
 
				+{
			
 
				+	int extsize = crypto_alg_extsize(alg);
			
 
				+
			
 
				+	if (alg->cra_type != &crypto_acomp_type)
			
 
				+		extsize += sizeof(struct crypto_scomp *);
			
 
				+
			
 
				+	return extsize;
			
 
				+}
			
 
				+
			
 
				+static const struct crypto_type crypto_acomp_type = {
			
 
				+	.extsize = crypto_acomp_extsize,
			
 
				+	.init_tfm = crypto_acomp_init_tfm,
			
 
				+#ifdef CONFIG_PROC_FS
			
 
				+	.show = crypto_acomp_show,
			
 
				+#endif
			
 
				+	.report = crypto_acomp_report,
			
 
				+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
			
 
				+	.maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK,
			
 
				+	.type = CRYPTO_ALG_TYPE_ACOMPRESS,
			
 
				+	.tfmsize = offsetof(struct crypto_acomp, base),
			
 
				+};
			
 
				+
			
 
				+struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
			
 
				+					u32 mask)
			
 
				+{
			
 
				+	return crypto_alloc_tfm(alg_name, &crypto_acomp_type, type, mask);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(crypto_alloc_acomp);
			
 
				+
			
 
				+struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
			
 
				+{
			
 
				+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
			
 
				+	struct acomp_req *req;
			
 
				+
			
 
				+	req = __acomp_request_alloc(acomp);
			
 
				+	if (req && (tfm->__crt_alg->cra_type != &crypto_acomp_type))
			
 
				+		return crypto_acomp_scomp_alloc_ctx(req);
			
 
				+
			
 
				+	return req;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(acomp_request_alloc);
			
 
				+
			
 
				+void acomp_request_free(struct acomp_req *req)
			
 
				+{
			
 
				+	struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
			
 
				+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
			
 
				+
			
 
				+	if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
			
 
				+		crypto_acomp_scomp_free_ctx(req);
			
 
				+
			
 
				+	if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) {
			
 
				+		acomp->dst_free(req->dst);
			
 
				+		req->dst = NULL;
			
 
				+	}
			
 
				+
			
 
				+	__acomp_request_free(req);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(acomp_request_free);
			
 
				+
			
 
				+int crypto_register_acomp(struct acomp_alg *alg)
			
 
				+{
			
 
				+	struct crypto_alg *base = &alg->base;
			
 
				+
			
 
				+	base->cra_type = &crypto_acomp_type;
			
 
				+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
			
 
				+	base->cra_flags |= CRYPTO_ALG_TYPE_ACOMPRESS;
			
 
				+
			
 
				+	return crypto_register_alg(base);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(crypto_register_acomp);
			
 
				+
			
 
				+int crypto_unregister_acomp(struct acomp_alg *alg)
			
 
				+{
			
 
				+	return crypto_unregister_alg(&alg->base);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(crypto_unregister_acomp);
			
 
				+
			
 
				+MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("Asynchronous compression type");
			
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -247,12 +247,8 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
 
				 	memcpy(param->alg, alg->cra_name, sizeof(param->alg));
			
 
				 	type = alg->cra_flags;
			
 
				 
			
 
				-	/* This piece of crap needs to disappear into per-type test hooks. */
			
 
				-	if (!((type ^ CRYPTO_ALG_TYPE_BLKCIPHER) &
			
 
				-	      CRYPTO_ALG_TYPE_BLKCIPHER_MASK) && !(type & CRYPTO_ALG_GENIV) &&
			
 
				-	    ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
			
 
				-	     CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
			
 
				-					 alg->cra_ablkcipher.ivsize))
			
 
				+	/* Do not test internal algorithms. */
			
 
				+	if (type & CRYPTO_ALG_INTERNAL)
			
 
				 		type |= CRYPTO_ALG_TESTED;
			
 
				 
			
 
				 	param->type = type;
			
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -454,12 +454,13 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
 
				 	used -= ctx->aead_assoclen;
			
 
				 
			
 
				 	/* take over all tx sgls from ctx */
			
 
				-	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur,
			
 
				+	areq->tsgl = sock_kmalloc(sk,
			
 
				+				  sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
			
 
				 				  GFP_KERNEL);
			
 
				 	if (unlikely(!areq->tsgl))
			
 
				 		goto free;
			
 
				 
			
 
				-	sg_init_table(areq->tsgl, sgl->cur);
			
 
				+	sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
			
 
				 	for (i = 0; i < sgl->cur; i++)
			
 
				 		sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
			
 
				 			    sgl->sg[i].length, sgl->sg[i].offset);
			
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -566,8 +566,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 
				 			 * need to expand */
			
 
				 			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
			
 
				 				      GFP_KERNEL);
			
 
				-			if (!tmp)
			
 
				+			if (!tmp) {
			
 
				+				err = -ENOMEM;
			
 
				 				goto free;
			
 
				+			}
			
 
				 
			
 
				 			sg_init_table(tmp, tx_nents * 2);
			
 
				 			for (x = 0; x < tx_nents; x++)
			
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -211,8 +211,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
				 	if (!name)
			
 
				 		return ERR_PTR(-ENOENT);
			
 
				 
			
 
				+	type &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
			
 
				 	mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
			
 
				-	type &= mask;
			
 
				 
			
 
				 	alg = crypto_alg_lookup(name, type, mask);
			
 
				 	if (!alg) {
			
@@ -310,24 +310,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 
				 {
			
 
				 	const struct crypto_type *type = tfm->__crt_alg->cra_type;
			
 
				 
			
 
				-	if (type) {
			
 
				-		if (tfm->exit)
			
 
				-			tfm->exit(tfm);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	switch (crypto_tfm_alg_type(tfm)) {
			
 
				-	case CRYPTO_ALG_TYPE_CIPHER:
			
 
				-		crypto_exit_cipher_ops(tfm);
			
 
				-		break;
			
 
				-
			
 
				-	case CRYPTO_ALG_TYPE_COMPRESS:
			
 
				-		crypto_exit_compress_ops(tfm);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		BUG();
			
 
				-	}
			
 
				+	if (type && tfm->exit)
			
 
				+		tfm->exit(tfm);
			
 
				 }
			
 
				 
			
 
				 static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
			
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -324,7 +324,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 
				 	if (IS_ERR(auth))
			
 
				 		return PTR_ERR(auth);
			
 
				 
			
 
				-	enc = crypto_spawn_skcipher2(&ictx->enc);
			
 
				+	enc = crypto_spawn_skcipher(&ictx->enc);
			
 
				 	err = PTR_ERR(enc);
			
 
				 	if (IS_ERR(enc))
			
 
				 		goto err_free_ahash;
			
@@ -420,9 +420,9 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
 
				 		goto err_free_inst;
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_drop_auth;
			
 
				 
			
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -342,7 +342,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 
				 	if (IS_ERR(auth))
			
 
				 		return PTR_ERR(auth);
			
 
				 
			
 
				-	enc = crypto_spawn_skcipher2(&ictx->enc);
			
 
				+	enc = crypto_spawn_skcipher(&ictx->enc);
			
 
				 	err = PTR_ERR(enc);
			
 
				 	if (IS_ERR(enc))
			
 
				 		goto err_free_ahash;
			
@@ -441,9 +441,9 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
 
				 		goto err_free_inst;
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_drop_auth;
			
 
				 
			
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * CBC: Cipher Block Chaining mode
			
 
				  *
			
 
				- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
			
 
				+ * Copyright (c) 2006-2016 Herbert Xu <herbert@gondor.apana.org.au>
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify it
			
 
				  * under the terms of the GNU General Public License as published by the Free
			
@@ -10,191 +10,78 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/cbc.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/log2.h>
			
 
				 #include <linux/module.h>
			
 
				-#include <linux/scatterlist.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				 struct crypto_cbc_ctx {
			
 
				 	struct crypto_cipher *child;
			
 
				 };
			
 
				 
			
 
				-static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key,
			
 
				+static int crypto_cbc_setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				 			     unsigned int keylen)
			
 
				 {
			
 
				-	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent);
			
 
				+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				 	struct crypto_cipher *child = ctx->child;
			
 
				 	int err;
			
 
				 
			
 
				 	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
			
 
				+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				 				       CRYPTO_TFM_REQ_MASK);
			
 
				 	err = crypto_cipher_setkey(child, key, keylen);
			
 
				-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				-				     CRYPTO_TFM_RES_MASK);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
			
 
				-				      struct blkcipher_walk *walk,
			
 
				-				      struct crypto_cipher *tfm)
			
 
				+static inline void crypto_cbc_encrypt_one(struct crypto_skcipher *tfm,
			
 
				+					  const u8 *src, u8 *dst)
			
 
				 {
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_encrypt;
			
 
				-	int bsize = crypto_cipher_blocksize(tfm);
			
 
				-	unsigned int nbytes = walk->nbytes;
			
 
				-	u8 *src = walk->src.virt.addr;
			
 
				-	u8 *dst = walk->dst.virt.addr;
			
 
				-	u8 *iv = walk->iv;
			
 
				-
			
 
				-	do {
			
 
				-		crypto_xor(iv, src, bsize);
			
 
				-		fn(crypto_cipher_tfm(tfm), dst, iv);
			
 
				-		memcpy(iv, dst, bsize);
			
 
				-
			
 
				-		src += bsize;
			
 
				-		dst += bsize;
			
 
				-	} while ((nbytes -= bsize) >= bsize);
			
 
				-
			
 
				-	return nbytes;
			
 
				-}
			
 
				-
			
 
				-static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc,
			
 
				-				      struct blkcipher_walk *walk,
			
 
				-				      struct crypto_cipher *tfm)
			
 
				-{
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_encrypt;
			
 
				-	int bsize = crypto_cipher_blocksize(tfm);
			
 
				-	unsigned int nbytes = walk->nbytes;
			
 
				-	u8 *src = walk->src.virt.addr;
			
 
				-	u8 *iv = walk->iv;
			
 
				-
			
 
				-	do {
			
 
				-		crypto_xor(src, iv, bsize);
			
 
				-		fn(crypto_cipher_tfm(tfm), src, src);
			
 
				-		iv = src;
			
 
				-
			
 
				-		src += bsize;
			
 
				-	} while ((nbytes -= bsize) >= bsize);
			
 
				+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				-	memcpy(walk->iv, iv, bsize);
			
 
				-
			
 
				-	return nbytes;
			
 
				+	crypto_cipher_encrypt_one(ctx->child, dst, src);
			
 
				 }
			
 
				 
			
 
				-static int crypto_cbc_encrypt(struct blkcipher_desc *desc,
			
 
				-			      struct scatterlist *dst, struct scatterlist *src,
			
 
				-			      unsigned int nbytes)
			
 
				+static int crypto_cbc_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct blkcipher_walk walk;
			
 
				-	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				-	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
			
 
				-	struct crypto_cipher *child = ctx->child;
			
 
				-	int err;
			
 
				-
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				-
			
 
				-	while ((nbytes = walk.nbytes)) {
			
 
				-		if (walk.src.virt.addr == walk.dst.virt.addr)
			
 
				-			nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child);
			
 
				-		else
			
 
				-			nbytes = crypto_cbc_encrypt_segment(desc, &walk, child);
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				-	}
			
 
				-
			
 
				-	return err;
			
 
				+	return crypto_cbc_encrypt_walk(req, crypto_cbc_encrypt_one);
			
 
				 }
			
 
				 
			
 
				-static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc,
			
 
				-				      struct blkcipher_walk *walk,
			
 
				-				      struct crypto_cipher *tfm)
			
 
				+static inline void crypto_cbc_decrypt_one(struct crypto_skcipher *tfm,
			
 
				+					  const u8 *src, u8 *dst)
			
 
				 {
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_decrypt;
			
 
				-	int bsize = crypto_cipher_blocksize(tfm);
			
 
				-	unsigned int nbytes = walk->nbytes;
			
 
				-	u8 *src = walk->src.virt.addr;
			
 
				-	u8 *dst = walk->dst.virt.addr;
			
 
				-	u8 *iv = walk->iv;
			
 
				-
			
 
				-	do {
			
 
				-		fn(crypto_cipher_tfm(tfm), dst, src);
			
 
				-		crypto_xor(dst, iv, bsize);
			
 
				-		iv = src;
			
 
				-
			
 
				-		src += bsize;
			
 
				-		dst += bsize;
			
 
				-	} while ((nbytes -= bsize) >= bsize);
			
 
				-
			
 
				-	memcpy(walk->iv, iv, bsize);
			
 
				-
			
 
				-	return nbytes;
			
 
				-}
			
 
				+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				-static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
			
 
				-				      struct blkcipher_walk *walk,
			
 
				-				      struct crypto_cipher *tfm)
			
 
				-{
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_decrypt;
			
 
				-	int bsize = crypto_cipher_blocksize(tfm);
			
 
				-	unsigned int nbytes = walk->nbytes;
			
 
				-	u8 *src = walk->src.virt.addr;
			
 
				-	u8 last_iv[bsize];
			
 
				-
			
 
				-	/* Start of the last block. */
			
 
				-	src += nbytes - (nbytes & (bsize - 1)) - bsize;
			
 
				-	memcpy(last_iv, src, bsize);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		fn(crypto_cipher_tfm(tfm), src, src);
			
 
				-		if ((nbytes -= bsize) < bsize)
			
 
				-			break;
			
 
				-		crypto_xor(src, src - bsize, bsize);
			
 
				-		src -= bsize;
			
 
				-	}
			
 
				-
			
 
				-	crypto_xor(src, walk->iv, bsize);
			
 
				-	memcpy(walk->iv, last_iv, bsize);
			
 
				-
			
 
				-	return nbytes;
			
 
				+	crypto_cipher_decrypt_one(ctx->child, dst, src);
			
 
				 }
			
 
				 
			
 
				-static int crypto_cbc_decrypt(struct blkcipher_desc *desc,
			
 
				-			      struct scatterlist *dst, struct scatterlist *src,
			
 
				-			      unsigned int nbytes)
			
 
				+static int crypto_cbc_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct blkcipher_walk walk;
			
 
				-	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				-	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
			
 
				-	struct crypto_cipher *child = ctx->child;
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct skcipher_walk walk;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				 
			
 
				-	while ((nbytes = walk.nbytes)) {
			
 
				-		if (walk.src.virt.addr == walk.dst.virt.addr)
			
 
				-			nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child);
			
 
				-		else
			
 
				-			nbytes = crypto_cbc_decrypt_segment(desc, &walk, child);
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+	while (walk.nbytes) {
			
 
				+		err = crypto_cbc_decrypt_blocks(&walk, tfm,
			
 
				+						crypto_cbc_decrypt_one);
			
 
				+		err = skcipher_walk_done(&walk, err);
			
 
				 	}
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
			
 
				+static int crypto_cbc_init_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
			
 
				-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
			
 
				-	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
			
 
				+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	struct crypto_cipher *cipher;
			
 
				 
			
 
				 	cipher = crypto_spawn_cipher(spawn);
			
@@ -205,72 +92,94 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm)
			
 
				+static void crypto_cbc_exit_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				 	crypto_free_cipher(ctx->child);
			
 
				 }
			
 
				 
			
 
				-static struct crypto_instance *crypto_cbc_alloc(struct rtattr **tb)
			
 
				+static void crypto_cbc_free(struct skcipher_instance *inst)
			
 
				+{
			
 
				+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				+	kfree(inst);
			
 
				+}
			
 
				+
			
 
				+static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				 {
			
 
				-	struct crypto_instance *inst;
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct crypto_spawn *spawn;
			
 
				 	struct crypto_alg *alg;
			
 
				 	int err;
			
 
				 
			
 
				-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
			
 
				+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
			
 
				 	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				+		return err;
			
 
				+
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
			
 
				 				  CRYPTO_ALG_TYPE_MASK);
			
 
				+	err = PTR_ERR(alg);
			
 
				 	if (IS_ERR(alg))
			
 
				-		return ERR_CAST(alg);
			
 
				+		goto err_free_inst;
			
 
				 
			
 
				-	inst = ERR_PTR(-EINVAL);
			
 
				-	if (!is_power_of_2(alg->cra_blocksize))
			
 
				-		goto out_put_alg;
			
 
				+	spawn = skcipher_instance_ctx(inst);
			
 
				+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
			
 
				+				CRYPTO_ALG_TYPE_MASK);
			
 
				+	crypto_mod_put(alg);
			
 
				+	if (err)
			
 
				+		goto err_free_inst;
			
 
				 
			
 
				-	inst = crypto_alloc_instance("cbc", alg);
			
 
				-	if (IS_ERR(inst))
			
 
				-		goto out_put_alg;
			
 
				+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cbc", alg);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
			
 
				-	inst->alg.cra_priority = alg->cra_priority;
			
 
				-	inst->alg.cra_blocksize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_alignmask = alg->cra_alignmask;
			
 
				-	inst->alg.cra_type = &crypto_blkcipher_type;
			
 
				+	err = -EINVAL;
			
 
				+	if (!is_power_of_2(alg->cra_blocksize))
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+	inst->alg.base.cra_priority = alg->cra_priority;
			
 
				+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
			
 
				+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
			
 
				 
			
 
				 	/* We access the data as u32s when xoring. */
			
 
				-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
			
 
				+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
			
 
				-	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
			
 
				+	inst->alg.ivsize = alg->cra_blocksize;
			
 
				+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
			
 
				+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
			
 
				 
			
 
				-	inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
			
 
				 
			
 
				-	inst->alg.cra_init = crypto_cbc_init_tfm;
			
 
				-	inst->alg.cra_exit = crypto_cbc_exit_tfm;
			
 
				+	inst->alg.init = crypto_cbc_init_tfm;
			
 
				+	inst->alg.exit = crypto_cbc_exit_tfm;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey;
			
 
				-	inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt;
			
 
				-	inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt;
			
 
				+	inst->alg.setkey = crypto_cbc_setkey;
			
 
				+	inst->alg.encrypt = crypto_cbc_encrypt;
			
 
				+	inst->alg.decrypt = crypto_cbc_decrypt;
			
 
				 
			
 
				-out_put_alg:
			
 
				-	crypto_mod_put(alg);
			
 
				-	return inst;
			
 
				-}
			
 
				+	inst->free = crypto_cbc_free;
			
 
				 
			
 
				-static void crypto_cbc_free(struct crypto_instance *inst)
			
 
				-{
			
 
				-	crypto_drop_spawn(crypto_instance_ctx(inst));
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+
			
 
				+err_drop_spawn:
			
 
				+	crypto_drop_spawn(spawn);
			
 
				+err_free_inst:
			
 
				 	kfree(inst);
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 static struct crypto_template crypto_cbc_tmpl = {
			
 
				 	.name = "cbc",
			
 
				-	.alloc = crypto_cbc_alloc,
			
 
				-	.free = crypto_cbc_free,
			
 
				+	.create = crypto_cbc_create,
			
 
				 	.module = THIS_MODULE,
			
 
				 };
			
 
				 
			
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -462,7 +462,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 
				 	if (IS_ERR(cipher))
			
 
				 		return PTR_ERR(cipher);
			
 
				 
			
 
				-	ctr = crypto_spawn_skcipher2(&ictx->ctr);
			
 
				+	ctr = crypto_spawn_skcipher(&ictx->ctr);
			
 
				 	err = PTR_ERR(ctr);
			
 
				 	if (IS_ERR(ctr))
			
 
				 		goto err_free_cipher;
			
@@ -544,9 +544,9 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 
				 		goto err_free_inst;
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_drop_cipher;
			
 
				 
			
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -532,7 +532,7 @@ static int chachapoly_init(struct crypto_aead *tfm)
 
				 	if (IS_ERR(poly))
			
 
				 		return PTR_ERR(poly);
			
 
				 
			
 
				-	chacha = crypto_spawn_skcipher2(&ictx->chacha);
			
 
				+	chacha = crypto_spawn_skcipher(&ictx->chacha);
			
 
				 	if (IS_ERR(chacha)) {
			
 
				 		crypto_free_ahash(poly);
			
 
				 		return PTR_ERR(chacha);
			
@@ -625,9 +625,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 
				 		goto err_free_inst;
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_drop_poly;
			
 
				 
			
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -116,7 +116,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-
			
 
				-void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-}
			
--- a/crypto/cmac.c
+++ b/crypto/cmac.c
@@ -57,7 +57,8 @@ static int crypto_cmac_digest_setkey(struct crypto_shash *parent,
 
				 	unsigned long alignmask = crypto_shash_alignmask(parent);
			
 
				 	struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
			
 
				 	unsigned int bs = crypto_shash_blocksize(parent);
			
 
				-	__be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
			
 
				+	__be64 *consts = PTR_ALIGN((void *)ctx->ctx,
			
 
				+				   (alignmask | (__alignof__(__be64) - 1)) + 1);
			
 
				 	u64 _const[2];
			
 
				 	int i, err = 0;
			
 
				 	u8 msb_mask, gfmask;
			
@@ -173,7 +174,8 @@ static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out)
 
				 	struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
			
 
				 	struct crypto_cipher *tfm = tctx->child;
			
 
				 	int bs = crypto_shash_blocksize(parent);
			
 
				-	u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1);
			
 
				+	u8 *consts = PTR_ALIGN((void *)tctx->ctx,
			
 
				+			       (alignmask | (__alignof__(__be64) - 1)) + 1);
			
 
				 	u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
			
 
				 	u8 *prev = odds + bs;
			
 
				 	unsigned int offset = 0;
			
@@ -243,6 +245,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 
				 	case 8:
			
 
				 		break;
			
 
				 	default:
			
 
				+		err = -EINVAL;
			
 
				 		goto out_put_alg;
			
 
				 	}
			
 
				 
			
@@ -257,7 +260,8 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 
				 	if (err)
			
 
				 		goto out_free_inst;
			
 
				 
			
 
				-	alignmask = alg->cra_alignmask | (sizeof(long) - 1);
			
 
				+	/* We access the data as u32s when xoring. */
			
 
				+	alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
			
 
				 	inst->alg.base.cra_alignmask = alignmask;
			
 
				 	inst->alg.base.cra_priority = alg->cra_priority;
			
 
				 	inst->alg.base.cra_blocksize = alg->cra_blocksize;
			
@@ -269,7 +273,9 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 
				 		+ alg->cra_blocksize * 2;
			
 
				 
			
 
				 	inst->alg.base.cra_ctxsize =
			
 
				-		ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1)
			
 
				+		ALIGN(sizeof(struct cmac_tfm_ctx), crypto_tfm_ctx_alignment())
			
 
				+		+ ((alignmask | (__alignof__(__be64) - 1)) &
			
 
				+		   ~(crypto_tfm_ctx_alignment() - 1))
			
 
				 		+ alg->cra_blocksize * 2;
			
 
				 
			
 
				 	inst->alg.base.cra_init = cmac_init_tfm;
			
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm)
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-
			
 
				-void crypto_exit_compress_ops(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-}
			
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -17,9 +17,9 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				-#include <crypto/algapi.h>
			
 
				 #include <crypto/internal/hash.h>
			
 
				 #include <crypto/internal/aead.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <crypto/cryptd.h>
			
 
				 #include <crypto/crypto_wq.h>
			
 
				 #include <linux/atomic.h>
			
@@ -48,6 +48,11 @@ struct cryptd_instance_ctx {
 
				 	struct cryptd_queue *queue;
			
 
				 };
			
 
				 
			
 
				+struct skcipherd_instance_ctx {
			
 
				+	struct crypto_skcipher_spawn spawn;
			
 
				+	struct cryptd_queue *queue;
			
 
				+};
			
 
				+
			
 
				 struct hashd_instance_ctx {
			
 
				 	struct crypto_shash_spawn spawn;
			
 
				 	struct cryptd_queue *queue;
			
@@ -67,6 +72,15 @@ struct cryptd_blkcipher_request_ctx {
 
				 	crypto_completion_t complete;
			
 
				 };
			
 
				 
			
 
				+struct cryptd_skcipher_ctx {
			
 
				+	atomic_t refcnt;
			
 
				+	struct crypto_skcipher *child;
			
 
				+};
			
 
				+
			
 
				+struct cryptd_skcipher_request_ctx {
			
 
				+	crypto_completion_t complete;
			
 
				+};
			
 
				+
			
 
				 struct cryptd_hash_ctx {
			
 
				 	atomic_t refcnt;
			
 
				 	struct crypto_shash *child;
			
@@ -122,7 +136,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
 
				 {
			
 
				 	int cpu, err;
			
 
				 	struct cryptd_cpu_queue *cpu_queue;
			
 
				-	struct crypto_tfm *tfm;
			
 
				 	atomic_t *refcnt;
			
 
				 	bool may_backlog;
			
 
				 
			
@@ -141,7 +154,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
 
				 	if (!atomic_read(refcnt))
			
 
				 		goto out_put_cpu;
			
 
				 
			
 
				-	tfm = request->tfm;
			
 
				 	atomic_inc(refcnt);
			
 
				 
			
 
				 out_put_cpu:
			
@@ -432,6 +444,216 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
			
 
				+				  const u8 *key, unsigned int keylen)
			
 
				+{
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				+	int err;
			
 
				+
			
 
				+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	err = crypto_skcipher_setkey(child, key, keylen);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
			
 
				+	int refcnt = atomic_read(&ctx->refcnt);
			
 
				+
			
 
				+	local_bh_disable();
			
 
				+	rctx->complete(&req->base, err);
			
 
				+	local_bh_enable();
			
 
				+
			
 
				+	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
			
 
				+		crypto_free_skcipher(tfm);
			
 
				+}
			
 
				+
			
 
				+static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
			
 
				+				    int err)
			
 
				+{
			
 
				+	struct skcipher_request *req = skcipher_request_cast(base);
			
 
				+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				+
			
 
				+	if (unlikely(err == -EINPROGRESS))
			
 
				+		goto out;
			
 
				+
			
 
				+	skcipher_request_set_tfm(subreq, child);
			
 
				+	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				+				      NULL, NULL);
			
 
				+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
 
				+				   req->iv);
			
 
				+
			
 
				+	err = crypto_skcipher_encrypt(subreq);
			
 
				+	skcipher_request_zero(subreq);
			
 
				+
			
 
				+	req->base.complete = rctx->complete;
			
 
				+
			
 
				+out:
			
 
				+	cryptd_skcipher_complete(req, err);
			
 
				+}
			
 
				+
			
 
				+static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
			
 
				+				    int err)
			
 
				+{
			
 
				+	struct skcipher_request *req = skcipher_request_cast(base);
			
 
				+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
			
 
				+
			
 
				+	if (unlikely(err == -EINPROGRESS))
			
 
				+		goto out;
			
 
				+
			
 
				+	skcipher_request_set_tfm(subreq, child);
			
 
				+	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
			
 
				+				      NULL, NULL);
			
 
				+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
			
 
				+				   req->iv);
			
 
				+
			
 
				+	err = crypto_skcipher_decrypt(subreq);
			
 
				+	skcipher_request_zero(subreq);
			
 
				+
			
 
				+	req->base.complete = rctx->complete;
			
 
				+
			
 
				+out:
			
 
				+	cryptd_skcipher_complete(req, err);
			
 
				+}
			
 
				+
			
 
				+static int cryptd_skcipher_enqueue(struct skcipher_request *req,
			
 
				+				   crypto_completion_t compl)
			
 
				+{
			
 
				+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct cryptd_queue *queue;
			
 
				+
			
 
				+	queue = cryptd_get_queue(crypto_skcipher_tfm(tfm));
			
 
				+	rctx->complete = req->base.complete;
			
 
				+	req->base.complete = compl;
			
 
				+
			
 
				+	return cryptd_enqueue_request(queue, &req->base);
			
 
				+}
			
 
				+
			
 
				+static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req)
			
 
				+{
			
 
				+	return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt);
			
 
				+}
			
 
				+
			
 
				+static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req)
			
 
				+{
			
 
				+	return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt);
			
 
				+}
			
 
				+
			
 
				+static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst);
			
 
				+	struct crypto_skcipher_spawn *spawn = &ictx->spawn;
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *cipher;
			
 
				+
			
 
				+	cipher = crypto_spawn_skcipher(spawn);
			
 
				+	if (IS_ERR(cipher))
			
 
				+		return PTR_ERR(cipher);
			
 
				+
			
 
				+	ctx->child = cipher;
			
 
				+	crypto_skcipher_set_reqsize(
			
 
				+		tfm, sizeof(struct cryptd_skcipher_request_ctx));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	crypto_free_skcipher(ctx->child);
			
 
				+}
			
 
				+
			
 
				+static void cryptd_skcipher_free(struct skcipher_instance *inst)
			
 
				+{
			
 
				+	struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst);
			
 
				+
			
 
				+	crypto_drop_skcipher(&ctx->spawn);
			
 
				+}
			
 
				+
			
 
				+static int cryptd_create_skcipher(struct crypto_template *tmpl,
			
 
				+				  struct rtattr **tb,
			
 
				+				  struct cryptd_queue *queue)
			
 
				+{
			
 
				+	struct skcipherd_instance_ctx *ctx;
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct skcipher_alg *alg;
			
 
				+	const char *name;
			
 
				+	u32 type;
			
 
				+	u32 mask;
			
 
				+	int err;
			
 
				+
			
 
				+	type = 0;
			
 
				+	mask = CRYPTO_ALG_ASYNC;
			
 
				+
			
 
				+	cryptd_check_internal(tb, &type, &mask);
			
 
				+
			
 
				+	name = crypto_attr_alg_name(tb[1]);
			
 
				+	if (IS_ERR(name))
			
 
				+		return PTR_ERR(name);
			
 
				+
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	ctx = skcipher_instance_ctx(inst);
			
 
				+	ctx->queue = queue;
			
 
				+
			
 
				+	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
			
 
				+	err = crypto_grab_skcipher(&ctx->spawn, name, type, mask);
			
 
				+	if (err)
			
 
				+		goto out_free_inst;
			
 
				+
			
 
				+	alg = crypto_spawn_skcipher_alg(&ctx->spawn);
			
 
				+	err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base);
			
 
				+	if (err)
			
 
				+		goto out_drop_skcipher;
			
 
				+
			
 
				+	inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
			
 
				+				   (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
			
 
				+
			
 
				+	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
			
 
				+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
			
 
				+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
			
 
				+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
			
 
				+
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx);
			
 
				+
			
 
				+	inst->alg.init = cryptd_skcipher_init_tfm;
			
 
				+	inst->alg.exit = cryptd_skcipher_exit_tfm;
			
 
				+
			
 
				+	inst->alg.setkey = cryptd_skcipher_setkey;
			
 
				+	inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue;
			
 
				+	inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue;
			
 
				+
			
 
				+	inst->free = cryptd_skcipher_free;
			
 
				+
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err) {
			
 
				+out_drop_skcipher:
			
 
				+		crypto_drop_skcipher(&ctx->spawn);
			
 
				+out_free_inst:
			
 
				+		kfree(inst);
			
 
				+	}
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 static int cryptd_hash_init_tfm(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
			
@@ -895,7 +1117,11 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 
				 
			
 
				 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
			
 
				 	case CRYPTO_ALG_TYPE_BLKCIPHER:
			
 
				-		return cryptd_create_blkcipher(tmpl, tb, &queue);
			
 
				+		if ((algt->type & CRYPTO_ALG_TYPE_MASK) ==
			
 
				+		    CRYPTO_ALG_TYPE_BLKCIPHER)
			
 
				+			return cryptd_create_blkcipher(tmpl, tb, &queue);
			
 
				+
			
 
				+		return cryptd_create_skcipher(tmpl, tb, &queue);
			
 
				 	case CRYPTO_ALG_TYPE_DIGEST:
			
 
				 		return cryptd_create_hash(tmpl, tb, &queue);
			
 
				 	case CRYPTO_ALG_TYPE_AEAD:
			
@@ -985,6 +1211,58 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);
			
 
				 
			
 
				+struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
			
 
				+					      u32 type, u32 mask)
			
 
				+{
			
 
				+	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
			
 
				+	struct cryptd_skcipher_ctx *ctx;
			
 
				+	struct crypto_skcipher *tfm;
			
 
				+
			
 
				+	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
			
 
				+		     "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
			
 
				+		return ERR_PTR(-EINVAL);
			
 
				+
			
 
				+	tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask);
			
 
				+	if (IS_ERR(tfm))
			
 
				+		return ERR_CAST(tfm);
			
 
				+
			
 
				+	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
			
 
				+		crypto_free_skcipher(tfm);
			
 
				+		return ERR_PTR(-EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	ctx = crypto_skcipher_ctx(tfm);
			
 
				+	atomic_set(&ctx->refcnt, 1);
			
 
				+
			
 
				+	return container_of(tfm, struct cryptd_skcipher, base);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher);
			
 
				+
			
 
				+struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
			
 
				+{
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
			
 
				+
			
 
				+	return ctx->child;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
			
 
				+
			
 
				+bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm)
			
 
				+{
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
			
 
				+
			
 
				+	return atomic_read(&ctx->refcnt) - 1;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(cryptd_skcipher_queued);
			
 
				+
			
 
				+void cryptd_free_skcipher(struct cryptd_skcipher *tfm)
			
 
				+{
			
 
				+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
			
 
				+
			
 
				+	if (atomic_dec_and_test(&ctx->refcnt))
			
 
				+		crypto_free_skcipher(&tfm->base);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(cryptd_free_skcipher);
			
 
				+
			
 
				 struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
			
 
				 					u32 type, u32 mask)
			
 
				 {
			
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -47,7 +47,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
				 
			
 
				 	/* If another context is idling then defer */
			
 
				 	if (engine->idling) {
			
 
				-		kthread_queue_work(&engine->kworker, &engine->pump_requests);
			
 
				+		kthread_queue_work(engine->kworker, &engine->pump_requests);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
@@ -58,7 +58,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
				 
			
 
				 		/* Only do teardown in the thread */
			
 
				 		if (!in_kthread) {
			
 
				-			kthread_queue_work(&engine->kworker,
			
 
				+			kthread_queue_work(engine->kworker,
			
 
				 					   &engine->pump_requests);
			
 
				 			goto out;
			
 
				 		}
			
@@ -189,7 +189,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 
				 	ret = ablkcipher_enqueue_request(&engine->queue, req);
			
 
				 
			
 
				 	if (!engine->busy && need_pump)
			
 
				-		kthread_queue_work(&engine->kworker, &engine->pump_requests);
			
 
				+		kthread_queue_work(engine->kworker, &engine->pump_requests);
			
 
				 
			
 
				 	spin_unlock_irqrestore(&engine->queue_lock, flags);
			
 
				 	return ret;
			
@@ -231,7 +231,7 @@ int crypto_transfer_hash_request(struct crypto_engine *engine,
 
				 	ret = ahash_enqueue_request(&engine->queue, req);
			
 
				 
			
 
				 	if (!engine->busy && need_pump)
			
 
				-		kthread_queue_work(&engine->kworker, &engine->pump_requests);
			
 
				+		kthread_queue_work(engine->kworker, &engine->pump_requests);
			
 
				 
			
 
				 	spin_unlock_irqrestore(&engine->queue_lock, flags);
			
 
				 	return ret;
			
@@ -284,7 +284,7 @@ void crypto_finalize_cipher_request(struct crypto_engine *engine,
 
				 
			
 
				 	req->base.complete(&req->base, err);
			
 
				 
			
 
				-	kthread_queue_work(&engine->kworker, &engine->pump_requests);
			
 
				+	kthread_queue_work(engine->kworker, &engine->pump_requests);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
			
 
				 
			
@@ -321,7 +321,7 @@ void crypto_finalize_hash_request(struct crypto_engine *engine,
 
				 
			
 
				 	req->base.complete(&req->base, err);
			
 
				 
			
 
				-	kthread_queue_work(&engine->kworker, &engine->pump_requests);
			
 
				+	kthread_queue_work(engine->kworker, &engine->pump_requests);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
			
 
				 
			
@@ -345,7 +345,7 @@ int crypto_engine_start(struct crypto_engine *engine)
 
				 	engine->running = true;
			
 
				 	spin_unlock_irqrestore(&engine->queue_lock, flags);
			
 
				 
			
 
				-	kthread_queue_work(&engine->kworker, &engine->pump_requests);
			
 
				+	kthread_queue_work(engine->kworker, &engine->pump_requests);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -422,11 +422,8 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 
				 	crypto_init_queue(&engine->queue, CRYPTO_ENGINE_MAX_QLEN);
			
 
				 	spin_lock_init(&engine->queue_lock);
			
 
				 
			
 
				-	kthread_init_worker(&engine->kworker);
			
 
				-	engine->kworker_task = kthread_run(kthread_worker_fn,
			
 
				-					   &engine->kworker, "%s",
			
 
				-					   engine->name);
			
 
				-	if (IS_ERR(engine->kworker_task)) {
			
 
				+	engine->kworker = kthread_create_worker(0, "%s", engine->name);
			
 
				+	if (IS_ERR(engine->kworker)) {
			
 
				 		dev_err(dev, "failed to create crypto request pump task\n");
			
 
				 		return NULL;
			
 
				 	}
			
@@ -434,7 +431,7 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 
				 
			
 
				 	if (engine->rt) {
			
 
				 		dev_info(dev, "will run requests pump with realtime priority\n");
			
 
				-		sched_setscheduler(engine->kworker_task, SCHED_FIFO, &param);
			
 
				+		sched_setscheduler(engine->kworker->task, SCHED_FIFO, &param);
			
 
				 	}
			
 
				 
			
 
				 	return engine;
			
@@ -455,8 +452,7 @@ int crypto_engine_exit(struct crypto_engine *engine)
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	kthread_flush_worker(&engine->kworker);
			
 
				-	kthread_stop(engine->kworker_task);
			
 
				+	kthread_destroy_worker(engine->kworker);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -112,6 +112,21 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
 
				 	return -EMSGSIZE;
			
 
				 }
			
 
				 
			
 
				+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_report_acomp racomp;
			
 
				+
			
 
				+	strncpy(racomp.type, "acomp", sizeof(racomp.type));
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
			
 
				+		    sizeof(struct crypto_report_acomp), &racomp))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+
			
 
				 static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				 {
			
 
				 	struct crypto_report_akcipher rakcipher;
			
@@ -186,7 +201,11 @@ static int crypto_report_one(struct crypto_alg *alg,
 
				 			goto nla_put_failure;
			
 
				 
			
 
				 		break;
			
 
				+	case CRYPTO_ALG_TYPE_ACOMPRESS:
			
 
				+		if (crypto_report_acomp(skb, alg))
			
 
				+			goto nla_put_failure;
			
 
				 
			
 
				+		break;
			
 
				 	case CRYPTO_ALG_TYPE_AKCIPHER:
			
 
				 		if (crypto_report_akcipher(skb, alg))
			
 
				 			goto nla_put_failure;
			
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -312,7 +312,7 @@ static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm)
 
				 	unsigned long align;
			
 
				 	unsigned int reqsize;
			
 
				 
			
 
				-	cipher = crypto_spawn_skcipher2(spawn);
			
 
				+	cipher = crypto_spawn_skcipher(spawn);
			
 
				 	if (IS_ERR(cipher))
			
 
				 		return PTR_ERR(cipher);
			
 
				 
			
@@ -370,9 +370,9 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
 
				 	spawn = skcipher_instance_ctx(inst);
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(spawn, cipher_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(spawn, cipher_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_free_inst;
			
 
				 
			
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -290,7 +290,7 @@ static int crypto_cts_init_tfm(struct crypto_skcipher *tfm)
 
				 	unsigned bsize;
			
 
				 	unsigned align;
			
 
				 
			
 
				-	cipher = crypto_spawn_skcipher2(spawn);
			
 
				+	cipher = crypto_spawn_skcipher(spawn);
			
 
				 	if (IS_ERR(cipher))
			
 
				 		return PTR_ERR(cipher);
			
 
				 
			
@@ -348,9 +348,9 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
 
				 	spawn = skcipher_instance_ctx(inst);
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(spawn, cipher_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(spawn, cipher_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_free_inst;
			
 
				 
			
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -32,6 +32,7 @@
 
				 #include <linux/interrupt.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/net.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				 
			
 
				 #define DEFLATE_DEF_LEVEL		Z_DEFAULT_COMPRESSION
			
 
				 #define DEFLATE_DEF_WINBITS		11
			
@@ -101,9 +102,8 @@ static void deflate_decomp_exit(struct deflate_ctx *ctx)
 
				 	vfree(ctx->decomp_stream.workspace);
			
 
				 }
			
 
				 
			
 
				-static int deflate_init(struct crypto_tfm *tfm)
			
 
				+static int __deflate_init(void *ctx)
			
 
				 {
			
 
				-	struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 	int ret;
			
 
				 
			
 
				 	ret = deflate_comp_init(ctx);
			
@@ -116,19 +116,55 @@ static int deflate_init(struct crypto_tfm *tfm)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void deflate_exit(struct crypto_tfm *tfm)
			
 
				+static void *deflate_alloc_ctx(struct crypto_scomp *tfm)
			
 
				+{
			
 
				+	struct deflate_ctx *ctx;
			
 
				+	int ret;
			
 
				+
			
 
				+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
			
 
				+	if (!ctx)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	ret = __deflate_init(ctx);
			
 
				+	if (ret) {
			
 
				+		kfree(ctx);
			
 
				+		return ERR_PTR(ret);
			
 
				+	}
			
 
				+
			
 
				+	return ctx;
			
 
				+}
			
 
				+
			
 
				+static int deflate_init(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				+	return __deflate_init(ctx);
			
 
				+}
			
 
				+
			
 
				+static void __deflate_exit(void *ctx)
			
 
				+{
			
 
				 	deflate_comp_exit(ctx);
			
 
				 	deflate_decomp_exit(ctx);
			
 
				 }
			
 
				 
			
 
				-static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			    unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static void deflate_free_ctx(struct crypto_scomp *tfm, void *ctx)
			
 
				+{
			
 
				+	__deflate_exit(ctx);
			
 
				+	kzfree(ctx);
			
 
				+}
			
 
				+
			
 
				+static void deflate_exit(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	__deflate_exit(ctx);
			
 
				+}
			
 
				+
			
 
				+static int __deflate_compress(const u8 *src, unsigned int slen,
			
 
				+			      u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				-	struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
			
 
				+	struct deflate_ctx *dctx = ctx;
			
 
				 	struct z_stream_s *stream = &dctx->comp_stream;
			
 
				 
			
 
				 	ret = zlib_deflateReset(stream);
			
@@ -153,12 +189,27 @@ static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			      unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
			
 
				+			    unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+{
			
 
				+	struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	return __deflate_compress(src, slen, dst, dlen, dctx);
			
 
				+}
			
 
				+
			
 
				+static int deflate_scompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			     unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			     void *ctx)
			
 
				+{
			
 
				+	return __deflate_compress(src, slen, dst, dlen, ctx);
			
 
				+}
			
 
				+
			
 
				+static int __deflate_decompress(const u8 *src, unsigned int slen,
			
 
				+				u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				 
			
 
				 	int ret = 0;
			
 
				-	struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
			
 
				+	struct deflate_ctx *dctx = ctx;
			
 
				 	struct z_stream_s *stream = &dctx->decomp_stream;
			
 
				 
			
 
				 	ret = zlib_inflateReset(stream);
			
@@ -194,6 +245,21 @@ static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
			
 
				+			      unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+{
			
 
				+	struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	return __deflate_decompress(src, slen, dst, dlen, dctx);
			
 
				+}
			
 
				+
			
 
				+static int deflate_sdecompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			       unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			       void *ctx)
			
 
				+{
			
 
				+	return __deflate_decompress(src, slen, dst, dlen, ctx);
			
 
				+}
			
 
				+
			
 
				 static struct crypto_alg alg = {
			
 
				 	.cra_name		= "deflate",
			
 
				 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
			
@@ -206,14 +272,39 @@ static struct crypto_alg alg = {
 
				 	.coa_decompress  	= deflate_decompress } }
			
 
				 };
			
 
				 
			
 
				+static struct scomp_alg scomp = {
			
 
				+	.alloc_ctx		= deflate_alloc_ctx,
			
 
				+	.free_ctx		= deflate_free_ctx,
			
 
				+	.compress		= deflate_scompress,
			
 
				+	.decompress		= deflate_sdecompress,
			
 
				+	.base			= {
			
 
				+		.cra_name	= "deflate",
			
 
				+		.cra_driver_name = "deflate-scomp",
			
 
				+		.cra_module	 = THIS_MODULE,
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				 static int __init deflate_mod_init(void)
			
 
				 {
			
 
				-	return crypto_register_alg(&alg);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_register_alg(&alg);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = crypto_register_scomp(&scomp);
			
 
				+	if (ret) {
			
 
				+		crypto_unregister_alg(&alg);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void __exit deflate_mod_fini(void)
			
 
				 {
			
 
				 	crypto_unregister_alg(&alg);
			
 
				+	crypto_unregister_scomp(&scomp);
			
 
				 }
			
 
				 
			
 
				 module_init(deflate_mod_init);
			
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -118,7 +118,7 @@ static int dh_compute_value(struct kpp_request *req)
 
				 	if (req->src) {
			
 
				 		base = mpi_read_raw_from_sgl(req->src, req->src_len);
			
 
				 		if (!base) {
			
 
				-			ret = EINVAL;
			
 
				+			ret = -EINVAL;
			
 
				 			goto err_free_val;
			
 
				 		}
			
 
				 	} else {
			
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1782,6 +1782,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 
				 		memcpy(outbuf, drbg->outscratchpad, cryptlen);
			
 
				 
			
 
				 		outlen -= cryptlen;
			
 
				+		outbuf += cryptlen;
			
 
				 	}
			
 
				 	ret = 0;
			
 
				 
			
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -575,7 +575,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
 
				 	if (IS_ERR(ghash))
			
 
				 		return PTR_ERR(ghash);
			
 
				 
			
 
				-	ctr = crypto_spawn_skcipher2(&ictx->ctr);
			
 
				+	ctr = crypto_spawn_skcipher(&ictx->ctr);
			
 
				 	err = PTR_ERR(ctr);
			
 
				 	if (IS_ERR(ctr))
			
 
				 		goto err_free_hash;
			
@@ -663,20 +663,20 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 
				 		goto err_drop_ghash;
			
 
				 
			
 
				 	crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst));
			
 
				-	err = crypto_grab_skcipher2(&ctx->ctr, ctr_name, 0,
			
 
				-				    crypto_requires_sync(algt->type,
			
 
				-							 algt->mask));
			
 
				+	err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				 	if (err)
			
 
				 		goto err_drop_ghash;
			
 
				 
			
 
				 	ctr = crypto_spawn_skcipher_alg(&ctx->ctr);
			
 
				 
			
 
				 	/* We only support 16-byte blocks. */
			
 
				+	err = -EINVAL;
			
 
				 	if (crypto_skcipher_alg_ivsize(ctr) != 16)
			
 
				 		goto out_put_ctr;
			
 
				 
			
 
				 	/* Not a stream cipher? */
			
 
				-	err = -EINVAL;
			
 
				 	if (ctr->base.cra_blocksize != 1)
			
 
				 		goto out_put_ctr;
			
 
				 
			
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -263,48 +263,6 @@ EXPORT_SYMBOL(gf128mul_bbe);
 
				  * t[1][BYTE] contains g*x^8*BYTE
			
 
				  *  ..
			
 
				  * t[15][BYTE] contains g*x^120*BYTE */
			
 
				-struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
			
 
				-{
			
 
				-	struct gf128mul_64k *t;
			
 
				-	int i, j, k;
			
 
				-
			
 
				-	t = kzalloc(sizeof(*t), GFP_KERNEL);
			
 
				-	if (!t)
			
 
				-		goto out;
			
 
				-
			
 
				-	for (i = 0; i < 16; i++) {
			
 
				-		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
			
 
				-		if (!t->t[i]) {
			
 
				-			gf128mul_free_64k(t);
			
 
				-			t = NULL;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	t->t[0]->t[128] = *g;
			
 
				-	for (j = 64; j > 0; j >>= 1)
			
 
				-		gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
			
 
				-
			
 
				-	for (i = 0;;) {
			
 
				-		for (j = 2; j < 256; j += j)
			
 
				-			for (k = 1; k < j; ++k)
			
 
				-				be128_xor(&t->t[i]->t[j + k],
			
 
				-					  &t->t[i]->t[j], &t->t[i]->t[k]);
			
 
				-
			
 
				-		if (++i >= 16)
			
 
				-			break;
			
 
				-
			
 
				-		for (j = 128; j > 0; j >>= 1) {
			
 
				-			t->t[i]->t[j] = t->t[i - 1]->t[j];
			
 
				-			gf128mul_x8_lle(&t->t[i]->t[j]);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	return t;
			
 
				-}
			
 
				-EXPORT_SYMBOL(gf128mul_init_64k_lle);
			
 
				-
			
 
				 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
			
 
				 {
			
 
				 	struct gf128mul_64k *t;
			
@@ -352,24 +310,11 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
 
				 	int i;
			
 
				 
			
 
				 	for (i = 0; i < 16; i++)
			
 
				-		kfree(t->t[i]);
			
 
				-	kfree(t);
			
 
				+		kzfree(t->t[i]);
			
 
				+	kzfree(t);
			
 
				 }
			
 
				 EXPORT_SYMBOL(gf128mul_free_64k);
			
 
				 
			
 
				-void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
			
 
				-{
			
 
				-	u8 *ap = (u8 *)a;
			
 
				-	be128 r[1];
			
 
				-	int i;
			
 
				-
			
 
				-	*r = t->t[0]->t[ap[0]];
			
 
				-	for (i = 1; i < 16; ++i)
			
 
				-		be128_xor(r, r, &t->t[i]->t[ap[i]]);
			
 
				-	*a = *r;
			
 
				-}
			
 
				-EXPORT_SYMBOL(gf128mul_64k_lle);
			
 
				-
			
 
				 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
			
 
				 {
			
 
				 	u8 *ap = (u8 *)a;
			
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
				 int crypto_init_cipher_ops(struct crypto_tfm *tfm);
			
 
				 int crypto_init_compress_ops(struct crypto_tfm *tfm);
			
 
				 
			
 
				-void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
			
 
				-void crypto_exit_compress_ops(struct crypto_tfm *tfm);
			
 
				-
			
 
				 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
			
 
				 void crypto_larval_kill(struct crypto_alg *alg);
			
 
				 struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
			
--- a/crypto/jitterentropy-kcapi.c
+++ b/crypto/jitterentropy-kcapi.c
@@ -39,7 +39,6 @@
 
				 
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/slab.h>
			
 
				-#include <linux/module.h>
			
 
				 #include <linux/fips.h>
			
 
				 #include <linux/time.h>
			
 
				 #include <linux/crypto.h>
			
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -17,7 +17,8 @@
 
				  *
			
 
				  * The test vectors are included in the testing module tcrypt.[ch] */
			
 
				 
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				+#include <crypto/scatterwalk.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/kernel.h>
			
@@ -29,11 +30,30 @@
 
				 #include <crypto/gf128mul.h>
			
 
				 #include <crypto/lrw.h>
			
 
				 
			
 
				+#define LRW_BUFFER_SIZE 128u
			
 
				+
			
 
				 struct priv {
			
 
				-	struct crypto_cipher *child;
			
 
				+	struct crypto_skcipher *child;
			
 
				 	struct lrw_table_ctx table;
			
 
				 };
			
 
				 
			
 
				+struct rctx {
			
 
				+	be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
			
 
				+
			
 
				+	be128 t;
			
 
				+
			
 
				+	be128 *ext;
			
 
				+
			
 
				+	struct scatterlist srcbuf[2];
			
 
				+	struct scatterlist dstbuf[2];
			
 
				+	struct scatterlist *src;
			
 
				+	struct scatterlist *dst;
			
 
				+
			
 
				+	unsigned int left;
			
 
				+
			
 
				+	struct skcipher_request subreq;
			
 
				+};
			
 
				+
			
 
				 static inline void setbit128_bbe(void *b, int bit)
			
 
				 {
			
 
				 	__set_bit(bit ^ (0x80 -
			
@@ -76,32 +96,26 @@ void lrw_free_table(struct lrw_table_ctx *ctx)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(lrw_free_table);
			
 
				 
			
 
				-static int setkey(struct crypto_tfm *parent, const u8 *key,
			
 
				+static int setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				 		  unsigned int keylen)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_tfm_ctx(parent);
			
 
				-	struct crypto_cipher *child = ctx->child;
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(parent);
			
 
				+	struct crypto_skcipher *child = ctx->child;
			
 
				 	int err, bsize = LRW_BLOCK_SIZE;
			
 
				 	const u8 *tweak = key + keylen - bsize;
			
 
				 
			
 
				-	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
			
 
				-				       CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_cipher_setkey(child, key, keylen - bsize);
			
 
				+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	err = crypto_skcipher_setkey(child, key, keylen - bsize);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				-				     CRYPTO_TFM_RES_MASK);
			
 
				 
			
 
				 	return lrw_init_table(&ctx->table, tweak);
			
 
				 }
			
 
				 
			
 
				-struct sinfo {
			
 
				-	be128 t;
			
 
				-	struct crypto_tfm *tfm;
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *);
			
 
				-};
			
 
				-
			
 
				 static inline void inc(be128 *iv)
			
 
				 {
			
 
				 	be64_add_cpu(&iv->b, 1);
			
@@ -109,13 +123,6 @@ static inline void inc(be128 *iv)
 
				 		be64_add_cpu(&iv->a, 1);
			
 
				 }
			
 
				 
			
 
				-static inline void lrw_round(struct sinfo *s, void *dst, const void *src)
			
 
				-{
			
 
				-	be128_xor(dst, &s->t, src);		/* PP <- T xor P */
			
 
				-	s->fn(s->tfm, dst, dst);		/* CC <- E(Key2,PP) */
			
 
				-	be128_xor(dst, dst, &s->t);		/* C <- T xor CC */
			
 
				-}
			
 
				-
			
 
				 /* this returns the number of consequative 1 bits starting
			
 
				  * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
			
 
				 static inline int get_index128(be128 *block)
			
@@ -135,83 +142,263 @@ static inline int get_index128(be128 *block)
 
				 	return x;
			
 
				 }
			
 
				 
			
 
				-static int crypt(struct blkcipher_desc *d,
			
 
				-		 struct blkcipher_walk *w, struct priv *ctx,
			
 
				-		 void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
			
 
				+static int post_crypt(struct skcipher_request *req)
			
 
				 {
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	be128 *buf = rctx->ext ?: rctx->buf;
			
 
				+	struct skcipher_request *subreq;
			
 
				+	const int bs = LRW_BLOCK_SIZE;
			
 
				+	struct skcipher_walk w;
			
 
				+	struct scatterlist *sg;
			
 
				+	unsigned offset;
			
 
				 	int err;
			
 
				-	unsigned int avail;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+	err = skcipher_walk_virt(&w, subreq, false);
			
 
				+
			
 
				+	while (w.nbytes) {
			
 
				+		unsigned int avail = w.nbytes;
			
 
				+		be128 *wdst;
			
 
				+
			
 
				+		wdst = w.dst.virt.addr;
			
 
				+
			
 
				+		do {
			
 
				+			be128_xor(wdst, buf++, wdst);
			
 
				+			wdst++;
			
 
				+		} while ((avail -= bs) >= bs);
			
 
				+
			
 
				+		err = skcipher_walk_done(&w, avail);
			
 
				+	}
			
 
				+
			
 
				+	rctx->left -= subreq->cryptlen;
			
 
				+
			
 
				+	if (err || !rctx->left)
			
 
				+		goto out;
			
 
				+
			
 
				+	rctx->dst = rctx->dstbuf;
			
 
				+
			
 
				+	scatterwalk_done(&w.out, 0, 1);
			
 
				+	sg = w.out.sg;
			
 
				+	offset = w.out.offset;
			
 
				+
			
 
				+	if (rctx->dst != sg) {
			
 
				+		rctx->dst[0] = *sg;
			
 
				+		sg_unmark_end(rctx->dst);
			
 
				+		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
			
 
				+	}
			
 
				+	rctx->dst[0].length -= offset - sg->offset;
			
 
				+	rctx->dst[0].offset = offset;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int pre_crypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	be128 *buf = rctx->ext ?: rctx->buf;
			
 
				+	struct skcipher_request *subreq;
			
 
				 	const int bs = LRW_BLOCK_SIZE;
			
 
				-	struct sinfo s = {
			
 
				-		.tfm = crypto_cipher_tfm(ctx->child),
			
 
				-		.fn = fn
			
 
				-	};
			
 
				+	struct skcipher_walk w;
			
 
				+	struct scatterlist *sg;
			
 
				+	unsigned cryptlen;
			
 
				+	unsigned offset;
			
 
				 	be128 *iv;
			
 
				-	u8 *wsrc;
			
 
				-	u8 *wdst;
			
 
				+	bool more;
			
 
				+	int err;
			
 
				 
			
 
				-	err = blkcipher_walk_virt(d, w);
			
 
				-	if (!(avail = w->nbytes))
			
 
				-		return err;
			
 
				+	subreq = &rctx->subreq;
			
 
				+	skcipher_request_set_tfm(subreq, tfm);
			
 
				 
			
 
				-	wsrc = w->src.virt.addr;
			
 
				-	wdst = w->dst.virt.addr;
			
 
				+	cryptlen = subreq->cryptlen;
			
 
				+	more = rctx->left > cryptlen;
			
 
				+	if (!more)
			
 
				+		cryptlen = rctx->left;
			
 
				 
			
 
				-	/* calculate first value of T */
			
 
				-	iv = (be128 *)w->iv;
			
 
				-	s.t = *iv;
			
 
				+	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
			
 
				+				   cryptlen, req->iv);
			
 
				 
			
 
				-	/* T <- I*Key2 */
			
 
				-	gf128mul_64k_bbe(&s.t, ctx->table.table);
			
 
				+	err = skcipher_walk_virt(&w, subreq, false);
			
 
				+	iv = w.iv;
			
 
				 
			
 
				-	goto first;
			
 
				+	while (w.nbytes) {
			
 
				+		unsigned int avail = w.nbytes;
			
 
				+		be128 *wsrc;
			
 
				+		be128 *wdst;
			
 
				+
			
 
				+		wsrc = w.src.virt.addr;
			
 
				+		wdst = w.dst.virt.addr;
			
 
				 
			
 
				-	for (;;) {
			
 
				 		do {
			
 
				+			*buf++ = rctx->t;
			
 
				+			be128_xor(wdst++, &rctx->t, wsrc++);
			
 
				+
			
 
				 			/* T <- I*Key2, using the optimization
			
 
				 			 * discussed in the specification */
			
 
				-			be128_xor(&s.t, &s.t,
			
 
				+			be128_xor(&rctx->t, &rctx->t,
			
 
				 				  &ctx->table.mulinc[get_index128(iv)]);
			
 
				 			inc(iv);
			
 
				+		} while ((avail -= bs) >= bs);
			
 
				 
			
 
				-first:
			
 
				-			lrw_round(&s, wdst, wsrc);
			
 
				+		err = skcipher_walk_done(&w, avail);
			
 
				+	}
			
 
				 
			
 
				-			wsrc += bs;
			
 
				-			wdst += bs;
			
 
				-		} while ((avail -= bs) >= bs);
			
 
				+	skcipher_request_set_tfm(subreq, ctx->child);
			
 
				+	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
			
 
				+				   cryptlen, NULL);
			
 
				 
			
 
				-		err = blkcipher_walk_done(d, w, avail);
			
 
				-		if (!(avail = w->nbytes))
			
 
				-			break;
			
 
				+	if (err || !more)
			
 
				+		goto out;
			
 
				+
			
 
				+	rctx->src = rctx->srcbuf;
			
 
				+
			
 
				+	scatterwalk_done(&w.in, 0, 1);
			
 
				+	sg = w.in.sg;
			
 
				+	offset = w.in.offset;
			
 
				+
			
 
				+	if (rctx->src != sg) {
			
 
				+		rctx->src[0] = *sg;
			
 
				+		sg_unmark_end(rctx->src);
			
 
				+		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
			
 
				+	}
			
 
				+	rctx->src[0].length -= offset - sg->offset;
			
 
				+	rctx->src[0].offset = offset;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
			
 
				+{
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq;
			
 
				+	gfp_t gfp;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+	skcipher_request_set_callback(subreq, req->base.flags, done, req);
			
 
				+
			
 
				+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
			
 
				+							   GFP_ATOMIC;
			
 
				+	rctx->ext = NULL;
			
 
				+
			
 
				+	subreq->cryptlen = LRW_BUFFER_SIZE;
			
 
				+	if (req->cryptlen > LRW_BUFFER_SIZE) {
			
 
				+		subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
			
 
				+		rctx->ext = kmalloc(subreq->cryptlen, gfp);
			
 
				+	}
			
 
				+
			
 
				+	rctx->src = req->src;
			
 
				+	rctx->dst = req->dst;
			
 
				+	rctx->left = req->cryptlen;
			
 
				+
			
 
				+	/* calculate first value of T */
			
 
				+	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
			
 
				+
			
 
				+	/* T <- I*Key2 */
			
 
				+	gf128mul_64k_bbe(&rctx->t, ctx->table.table);
			
 
				 
			
 
				-		wsrc = w->src.virt.addr;
			
 
				-		wdst = w->dst.virt.addr;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void exit_crypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+
			
 
				+	rctx->left = 0;
			
 
				+
			
 
				+	if (rctx->ext)
			
 
				+		kfree(rctx->ext);
			
 
				+}
			
 
				+
			
 
				+static int do_encrypt(struct skcipher_request *req, int err)
			
 
				+{
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+
			
 
				+	while (!err && rctx->left) {
			
 
				+		err = pre_crypt(req) ?:
			
 
				+		      crypto_skcipher_encrypt(subreq) ?:
			
 
				+		      post_crypt(req);
			
 
				+
			
 
				+		if (err == -EINPROGRESS ||
			
 
				+		    (err == -EBUSY &&
			
 
				+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
			
 
				+			return err;
			
 
				 	}
			
 
				 
			
 
				+	exit_crypt(req);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		   struct scatterlist *src, unsigned int nbytes)
			
 
				+static void encrypt_done(struct crypto_async_request *areq, int err)
			
 
				+{
			
 
				+	struct skcipher_request *req = areq->data;
			
 
				+	struct skcipher_request *subreq;
			
 
				+	struct rctx *rctx;
			
 
				+
			
 
				+	rctx = skcipher_request_ctx(req);
			
 
				+	subreq = &rctx->subreq;
			
 
				+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
			
 
				+
			
 
				+	err = do_encrypt(req, err ?: post_crypt(req));
			
 
				+	if (rctx->left)
			
 
				+		return;
			
 
				+
			
 
				+	skcipher_request_complete(req, err);
			
 
				+}
			
 
				+
			
 
				+static int encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return do_encrypt(req, init_crypt(req, encrypt_done));
			
 
				+}
			
 
				+
			
 
				+static int do_decrypt(struct skcipher_request *req, int err)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk w;
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+
			
 
				+	while (!err && rctx->left) {
			
 
				+		err = pre_crypt(req) ?:
			
 
				+		      crypto_skcipher_decrypt(subreq) ?:
			
 
				+		      post_crypt(req);
			
 
				+
			
 
				+		if (err == -EINPROGRESS ||
			
 
				+		    (err == -EBUSY &&
			
 
				+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
			
 
				+			return err;
			
 
				+	}
			
 
				 
			
 
				-	blkcipher_walk_init(&w, dst, src, nbytes);
			
 
				-	return crypt(desc, &w, ctx,
			
 
				-		     crypto_cipher_alg(ctx->child)->cia_encrypt);
			
 
				+	exit_crypt(req);
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				-static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		   struct scatterlist *src, unsigned int nbytes)
			
 
				+static void decrypt_done(struct crypto_async_request *areq, int err)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk w;
			
 
				+	struct skcipher_request *req = areq->data;
			
 
				+	struct skcipher_request *subreq;
			
 
				+	struct rctx *rctx;
			
 
				+
			
 
				+	rctx = skcipher_request_ctx(req);
			
 
				+	subreq = &rctx->subreq;
			
 
				+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
			
 
				+
			
 
				+	err = do_decrypt(req, err ?: post_crypt(req));
			
 
				+	if (rctx->left)
			
 
				+		return;
			
 
				 
			
 
				-	blkcipher_walk_init(&w, dst, src, nbytes);
			
 
				-	return crypt(desc, &w, ctx,
			
 
				-		     crypto_cipher_alg(ctx->child)->cia_decrypt);
			
 
				+	skcipher_request_complete(req, err);
			
 
				+}
			
 
				+
			
 
				+static int decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return do_decrypt(req, init_crypt(req, decrypt_done));
			
 
				 }
			
 
				 
			
 
				 int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
			
@@ -293,95 +480,161 @@ int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(lrw_crypt);
			
 
				 
			
 
				-static int init_tfm(struct crypto_tfm *tfm)
			
 
				+static int init_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_cipher *cipher;
			
 
				-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
			
 
				-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
			
 
				-	struct priv *ctx = crypto_tfm_ctx(tfm);
			
 
				-	u32 *flags = &tfm->crt_flags;
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *cipher;
			
 
				 
			
 
				-	cipher = crypto_spawn_cipher(spawn);
			
 
				+	cipher = crypto_spawn_skcipher(spawn);
			
 
				 	if (IS_ERR(cipher))
			
 
				 		return PTR_ERR(cipher);
			
 
				 
			
 
				-	if (crypto_cipher_blocksize(cipher) != LRW_BLOCK_SIZE) {
			
 
				-		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
			
 
				-		crypto_free_cipher(cipher);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				 	ctx->child = cipher;
			
 
				+
			
 
				+	crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) +
			
 
				+					 sizeof(struct rctx));
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void exit_tfm(struct crypto_tfm *tfm)
			
 
				+static void exit_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(tfm);
			
 
				 
			
 
				 	lrw_free_table(&ctx->table);
			
 
				-	crypto_free_cipher(ctx->child);
			
 
				+	crypto_free_skcipher(ctx->child);
			
 
				+}
			
 
				+
			
 
				+static void free(struct skcipher_instance *inst)
			
 
				+{
			
 
				+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				+	kfree(inst);
			
 
				 }
			
 
				 
			
 
				-static struct crypto_instance *alloc(struct rtattr **tb)
			
 
				+static int create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				 {
			
 
				-	struct crypto_instance *inst;
			
 
				-	struct crypto_alg *alg;
			
 
				+	struct crypto_skcipher_spawn *spawn;
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct crypto_attr_type *algt;
			
 
				+	struct skcipher_alg *alg;
			
 
				+	const char *cipher_name;
			
 
				+	char ecb_name[CRYPTO_MAX_ALG_NAME];
			
 
				 	int err;
			
 
				 
			
 
				-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
			
 
				+	algt = crypto_get_attr_type(tb);
			
 
				+	if (IS_ERR(algt))
			
 
				+		return PTR_ERR(algt);
			
 
				+
			
 
				+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	cipher_name = crypto_attr_alg_name(tb[1]);
			
 
				+	if (IS_ERR(cipher_name))
			
 
				+		return PTR_ERR(cipher_name);
			
 
				+
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	spawn = skcipher_instance_ctx(inst);
			
 
				+
			
 
				+	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
			
 
				+	err = crypto_grab_skcipher(spawn, cipher_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				+	if (err == -ENOENT) {
			
 
				+		err = -ENAMETOOLONG;
			
 
				+		if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
			
 
				+			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
			
 
				+			goto err_free_inst;
			
 
				+
			
 
				+		err = crypto_grab_skcipher(spawn, ecb_name, 0,
			
 
				+					   crypto_requires_sync(algt->type,
			
 
				+								algt->mask));
			
 
				+	}
			
 
				+
			
 
				 	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				+		goto err_free_inst;
			
 
				 
			
 
				-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
			
 
				-				  CRYPTO_ALG_TYPE_MASK);
			
 
				-	if (IS_ERR(alg))
			
 
				-		return ERR_CAST(alg);
			
 
				+	alg = crypto_skcipher_spawn_alg(spawn);
			
 
				 
			
 
				-	inst = crypto_alloc_instance("lrw", alg);
			
 
				-	if (IS_ERR(inst))
			
 
				-		goto out_put_alg;
			
 
				+	err = -EINVAL;
			
 
				+	if (alg->base.cra_blocksize != LRW_BLOCK_SIZE)
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
			
 
				-	inst->alg.cra_priority = alg->cra_priority;
			
 
				-	inst->alg.cra_blocksize = alg->cra_blocksize;
			
 
				+	if (crypto_skcipher_alg_ivsize(alg))
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	if (alg->cra_alignmask < 7) inst->alg.cra_alignmask = 7;
			
 
				-	else inst->alg.cra_alignmask = alg->cra_alignmask;
			
 
				-	inst->alg.cra_type = &crypto_blkcipher_type;
			
 
				+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw",
			
 
				+				  &alg->base);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	if (!(alg->cra_blocksize % 4))
			
 
				-		inst->alg.cra_alignmask |= 3;
			
 
				-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_blkcipher.min_keysize =
			
 
				-		alg->cra_cipher.cia_min_keysize + alg->cra_blocksize;
			
 
				-	inst->alg.cra_blkcipher.max_keysize =
			
 
				-		alg->cra_cipher.cia_max_keysize + alg->cra_blocksize;
			
 
				+	err = -EINVAL;
			
 
				+	cipher_name = alg->base.cra_name;
			
 
				 
			
 
				-	inst->alg.cra_ctxsize = sizeof(struct priv);
			
 
				+	/* Alas we screwed up the naming so we have to mangle the
			
 
				+	 * cipher name.
			
 
				+	 */
			
 
				+	if (!strncmp(cipher_name, "ecb(", 4)) {
			
 
				+		unsigned len;
			
 
				 
			
 
				-	inst->alg.cra_init = init_tfm;
			
 
				-	inst->alg.cra_exit = exit_tfm;
			
 
				+		len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
			
 
				+		if (len < 2 || len >= sizeof(ecb_name))
			
 
				+			goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.setkey = setkey;
			
 
				-	inst->alg.cra_blkcipher.encrypt = encrypt;
			
 
				-	inst->alg.cra_blkcipher.decrypt = decrypt;
			
 
				+		if (ecb_name[len - 1] != ')')
			
 
				+			goto err_drop_spawn;
			
 
				 
			
 
				-out_put_alg:
			
 
				-	crypto_mod_put(alg);
			
 
				-	return inst;
			
 
				-}
			
 
				+		ecb_name[len - 1] = 0;
			
 
				 
			
 
				-static void free(struct crypto_instance *inst)
			
 
				-{
			
 
				-	crypto_drop_spawn(crypto_instance_ctx(inst));
			
 
				+		if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
			
 
				+			     "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME)
			
 
				+			return -ENAMETOOLONG;
			
 
				+	}
			
 
				+
			
 
				+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
			
 
				+	inst->alg.base.cra_priority = alg->base.cra_priority;
			
 
				+	inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
			
 
				+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
			
 
				+				       (__alignof__(u64) - 1);
			
 
				+
			
 
				+	inst->alg.ivsize = LRW_BLOCK_SIZE;
			
 
				+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
			
 
				+				LRW_BLOCK_SIZE;
			
 
				+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) +
			
 
				+				LRW_BLOCK_SIZE;
			
 
				+
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct priv);
			
 
				+
			
 
				+	inst->alg.init = init_tfm;
			
 
				+	inst->alg.exit = exit_tfm;
			
 
				+
			
 
				+	inst->alg.setkey = setkey;
			
 
				+	inst->alg.encrypt = encrypt;
			
 
				+	inst->alg.decrypt = decrypt;
			
 
				+
			
 
				+	inst->free = free;
			
 
				+
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+
			
 
				+err_drop_spawn:
			
 
				+	crypto_drop_skcipher(spawn);
			
 
				+err_free_inst:
			
 
				 	kfree(inst);
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 static struct crypto_template crypto_tmpl = {
			
 
				 	.name = "lrw",
			
 
				-	.alloc = alloc,
			
 
				-	.free = free,
			
 
				+	.create = create,
			
 
				 	.module = THIS_MODULE,
			
 
				 };
			
 
				 
			
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -23,36 +23,53 @@
 
				 #include <linux/crypto.h>
			
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/lz4.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				 
			
 
				 struct lz4_ctx {
			
 
				 	void *lz4_comp_mem;
			
 
				 };
			
 
				 
			
 
				+static void *lz4_alloc_ctx(struct crypto_scomp *tfm)
			
 
				+{
			
 
				+	void *ctx;
			
 
				+
			
 
				+	ctx = vmalloc(LZ4_MEM_COMPRESS);
			
 
				+	if (!ctx)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	return ctx;
			
 
				+}
			
 
				+
			
 
				 static int lz4_init(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS);
			
 
				-	if (!ctx->lz4_comp_mem)
			
 
				+	ctx->lz4_comp_mem = lz4_alloc_ctx(NULL);
			
 
				+	if (IS_ERR(ctx->lz4_comp_mem))
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx)
			
 
				+{
			
 
				+	vfree(ctx);
			
 
				+}
			
 
				+
			
 
				 static void lz4_exit(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				-	vfree(ctx->lz4_comp_mem);
			
 
				+
			
 
				+	lz4_free_ctx(NULL, ctx->lz4_comp_mem);
			
 
				 }
			
 
				 
			
 
				-static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			    unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
			
 
				+				 u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				-	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 	size_t tmp_len = *dlen;
			
 
				 	int err;
			
 
				 
			
 
				-	err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem);
			
 
				+	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
			
 
				 
			
 
				 	if (err < 0)
			
 
				 		return -EINVAL;
			
@@ -61,8 +78,23 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			      unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int lz4_scompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			 unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			 void *ctx)
			
 
				+{
			
 
				+	return __lz4_compress_crypto(src, slen, dst, dlen, ctx);
			
 
				+}
			
 
				+
			
 
				+static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				+			       unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+{
			
 
				+	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	return __lz4_compress_crypto(src, slen, dst, dlen, ctx->lz4_comp_mem);
			
 
				+}
			
 
				+
			
 
				+static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
			
 
				+				   u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				 	int err;
			
 
				 	size_t tmp_len = *dlen;
			
@@ -76,6 +108,20 @@ static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			   unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			   void *ctx)
			
 
				+{
			
 
				+	return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
			
 
				+}
			
 
				+
			
 
				+static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				+				 unsigned int slen, u8 *dst,
			
 
				+				 unsigned int *dlen)
			
 
				+{
			
 
				+	return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
			
 
				+}
			
 
				+
			
 
				 static struct crypto_alg alg_lz4 = {
			
 
				 	.cra_name		= "lz4",
			
 
				 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
			
@@ -89,14 +135,39 @@ static struct crypto_alg alg_lz4 = {
 
				 	.coa_decompress		= lz4_decompress_crypto } }
			
 
				 };
			
 
				 
			
 
				+static struct scomp_alg scomp = {
			
 
				+	.alloc_ctx		= lz4_alloc_ctx,
			
 
				+	.free_ctx		= lz4_free_ctx,
			
 
				+	.compress		= lz4_scompress,
			
 
				+	.decompress		= lz4_sdecompress,
			
 
				+	.base			= {
			
 
				+		.cra_name	= "lz4",
			
 
				+		.cra_driver_name = "lz4-scomp",
			
 
				+		.cra_module	 = THIS_MODULE,
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				 static int __init lz4_mod_init(void)
			
 
				 {
			
 
				-	return crypto_register_alg(&alg_lz4);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_register_alg(&alg_lz4);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = crypto_register_scomp(&scomp);
			
 
				+	if (ret) {
			
 
				+		crypto_unregister_alg(&alg_lz4);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void __exit lz4_mod_fini(void)
			
 
				 {
			
 
				 	crypto_unregister_alg(&alg_lz4);
			
 
				+	crypto_unregister_scomp(&scomp);
			
 
				 }
			
 
				 
			
 
				 module_init(lz4_mod_init);
			
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -22,37 +22,53 @@
 
				 #include <linux/crypto.h>
			
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/lz4.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				 
			
 
				 struct lz4hc_ctx {
			
 
				 	void *lz4hc_comp_mem;
			
 
				 };
			
 
				 
			
 
				+static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm)
			
 
				+{
			
 
				+	void *ctx;
			
 
				+
			
 
				+	ctx = vmalloc(LZ4HC_MEM_COMPRESS);
			
 
				+	if (!ctx)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	return ctx;
			
 
				+}
			
 
				+
			
 
				 static int lz4hc_init(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS);
			
 
				-	if (!ctx->lz4hc_comp_mem)
			
 
				+	ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL);
			
 
				+	if (IS_ERR(ctx->lz4hc_comp_mem))
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx)
			
 
				+{
			
 
				+	vfree(ctx);
			
 
				+}
			
 
				+
			
 
				 static void lz4hc_exit(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	vfree(ctx->lz4hc_comp_mem);
			
 
				+	lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem);
			
 
				 }
			
 
				 
			
 
				-static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			    unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
			
 
				+				   u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				-	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 	size_t tmp_len = *dlen;
			
 
				 	int err;
			
 
				 
			
 
				-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem);
			
 
				+	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
			
 
				 
			
 
				 	if (err < 0)
			
 
				 		return -EINVAL;
			
@@ -61,8 +77,25 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			      unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int lz4hc_scompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			   unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			   void *ctx)
			
 
				+{
			
 
				+	return __lz4hc_compress_crypto(src, slen, dst, dlen, ctx);
			
 
				+}
			
 
				+
			
 
				+static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				+				 unsigned int slen, u8 *dst,
			
 
				+				 unsigned int *dlen)
			
 
				+{
			
 
				+	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	return __lz4hc_compress_crypto(src, slen, dst, dlen,
			
 
				+					ctx->lz4hc_comp_mem);
			
 
				+}
			
 
				+
			
 
				+static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
			
 
				+				     u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				 	int err;
			
 
				 	size_t tmp_len = *dlen;
			
@@ -76,6 +109,20 @@ static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			     unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			     void *ctx)
			
 
				+{
			
 
				+	return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
			
 
				+}
			
 
				+
			
 
				+static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
			
 
				+				   unsigned int slen, u8 *dst,
			
 
				+				   unsigned int *dlen)
			
 
				+{
			
 
				+	return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
			
 
				+}
			
 
				+
			
 
				 static struct crypto_alg alg_lz4hc = {
			
 
				 	.cra_name		= "lz4hc",
			
 
				 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
			
@@ -89,14 +136,39 @@ static struct crypto_alg alg_lz4hc = {
 
				 	.coa_decompress		= lz4hc_decompress_crypto } }
			
 
				 };
			
 
				 
			
 
				+static struct scomp_alg scomp = {
			
 
				+	.alloc_ctx		= lz4hc_alloc_ctx,
			
 
				+	.free_ctx		= lz4hc_free_ctx,
			
 
				+	.compress		= lz4hc_scompress,
			
 
				+	.decompress		= lz4hc_sdecompress,
			
 
				+	.base			= {
			
 
				+		.cra_name	= "lz4hc",
			
 
				+		.cra_driver_name = "lz4hc-scomp",
			
 
				+		.cra_module	 = THIS_MODULE,
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				 static int __init lz4hc_mod_init(void)
			
 
				 {
			
 
				-	return crypto_register_alg(&alg_lz4hc);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_register_alg(&alg_lz4hc);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = crypto_register_scomp(&scomp);
			
 
				+	if (ret) {
			
 
				+		crypto_unregister_alg(&alg_lz4hc);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void __exit lz4hc_mod_fini(void)
			
 
				 {
			
 
				 	crypto_unregister_alg(&alg_lz4hc);
			
 
				+	crypto_unregister_scomp(&scomp);
			
 
				 }
			
 
				 
			
 
				 module_init(lz4hc_mod_init);
			
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -22,40 +22,55 @@
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/lzo.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				 
			
 
				 struct lzo_ctx {
			
 
				 	void *lzo_comp_mem;
			
 
				 };
			
 
				 
			
 
				+static void *lzo_alloc_ctx(struct crypto_scomp *tfm)
			
 
				+{
			
 
				+	void *ctx;
			
 
				+
			
 
				+	ctx = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
			
 
				+	if (!ctx)
			
 
				+		ctx = vmalloc(LZO1X_MEM_COMPRESS);
			
 
				+	if (!ctx)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	return ctx;
			
 
				+}
			
 
				+
			
 
				 static int lzo_init(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	ctx->lzo_comp_mem = kmalloc(LZO1X_MEM_COMPRESS,
			
 
				-				    GFP_KERNEL | __GFP_NOWARN);
			
 
				-	if (!ctx->lzo_comp_mem)
			
 
				-		ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS);
			
 
				-	if (!ctx->lzo_comp_mem)
			
 
				+	ctx->lzo_comp_mem = lzo_alloc_ctx(NULL);
			
 
				+	if (IS_ERR(ctx->lzo_comp_mem))
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void lzo_free_ctx(struct crypto_scomp *tfm, void *ctx)
			
 
				+{
			
 
				+	kvfree(ctx);
			
 
				+}
			
 
				+
			
 
				 static void lzo_exit(struct crypto_tfm *tfm)
			
 
				 {
			
 
				 	struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 
			
 
				-	kvfree(ctx->lzo_comp_mem);
			
 
				+	lzo_free_ctx(NULL, ctx->lzo_comp_mem);
			
 
				 }
			
 
				 
			
 
				-static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			    unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int __lzo_compress(const u8 *src, unsigned int slen,
			
 
				+			  u8 *dst, unsigned int *dlen, void *ctx)
			
 
				 {
			
 
				-	struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				 	size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
			
 
				 	int err;
			
 
				 
			
 
				-	err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx->lzo_comp_mem);
			
 
				+	err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx);
			
 
				 
			
 
				 	if (err != LZO_E_OK)
			
 
				 		return -EINVAL;
			
@@ -64,8 +79,23 @@ static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
			
 
				-			      unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
			
 
				+			unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+{
			
 
				+	struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	return __lzo_compress(src, slen, dst, dlen, ctx->lzo_comp_mem);
			
 
				+}
			
 
				+
			
 
				+static int lzo_scompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			 unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			 void *ctx)
			
 
				+{
			
 
				+	return __lzo_compress(src, slen, dst, dlen, ctx);
			
 
				+}
			
 
				+
			
 
				+static int __lzo_decompress(const u8 *src, unsigned int slen,
			
 
				+			    u8 *dst, unsigned int *dlen)
			
 
				 {
			
 
				 	int err;
			
 
				 	size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
			
@@ -77,7 +107,19 @@ static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
 
				 
			
 
				 	*dlen = tmp_len;
			
 
				 	return 0;
			
 
				+}
			
 
				 
			
 
				+static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
			
 
				+			  unsigned int slen, u8 *dst, unsigned int *dlen)
			
 
				+{
			
 
				+	return __lzo_decompress(src, slen, dst, dlen);
			
 
				+}
			
 
				+
			
 
				+static int lzo_sdecompress(struct crypto_scomp *tfm, const u8 *src,
			
 
				+			   unsigned int slen, u8 *dst, unsigned int *dlen,
			
 
				+			   void *ctx)
			
 
				+{
			
 
				+	return __lzo_decompress(src, slen, dst, dlen);
			
 
				 }
			
 
				 
			
 
				 static struct crypto_alg alg = {
			
@@ -88,18 +130,43 @@ static struct crypto_alg alg = {
 
				 	.cra_init		= lzo_init,
			
 
				 	.cra_exit		= lzo_exit,
			
 
				 	.cra_u			= { .compress = {
			
 
				-	.coa_compress 		= lzo_compress,
			
 
				-	.coa_decompress  	= lzo_decompress } }
			
 
				+	.coa_compress		= lzo_compress,
			
 
				+	.coa_decompress		= lzo_decompress } }
			
 
				+};
			
 
				+
			
 
				+static struct scomp_alg scomp = {
			
 
				+	.alloc_ctx		= lzo_alloc_ctx,
			
 
				+	.free_ctx		= lzo_free_ctx,
			
 
				+	.compress		= lzo_scompress,
			
 
				+	.decompress		= lzo_sdecompress,
			
 
				+	.base			= {
			
 
				+		.cra_name	= "lzo",
			
 
				+		.cra_driver_name = "lzo-scomp",
			
 
				+		.cra_module	 = THIS_MODULE,
			
 
				+	}
			
 
				 };
			
 
				 
			
 
				 static int __init lzo_mod_init(void)
			
 
				 {
			
 
				-	return crypto_register_alg(&alg);
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = crypto_register_alg(&alg);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = crypto_register_scomp(&scomp);
			
 
				+	if (ret) {
			
 
				+		crypto_unregister_alg(&alg);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void __exit lzo_mod_fini(void)
			
 
				 {
			
 
				 	crypto_unregister_alg(&alg);
			
 
				+	crypto_unregister_scomp(&scomp);
			
 
				 }
			
 
				 
			
 
				 module_init(lzo_mod_init);
			
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -14,40 +14,37 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/module.h>
			
 
				-#include <linux/scatterlist.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				 struct crypto_pcbc_ctx {
			
 
				 	struct crypto_cipher *child;
			
 
				 };
			
 
				 
			
 
				-static int crypto_pcbc_setkey(struct crypto_tfm *parent, const u8 *key,
			
 
				+static int crypto_pcbc_setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				 			      unsigned int keylen)
			
 
				 {
			
 
				-	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(parent);
			
 
				+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(parent);
			
 
				 	struct crypto_cipher *child = ctx->child;
			
 
				 	int err;
			
 
				 
			
 
				 	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
			
 
				-				CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+				       CRYPTO_TFM_REQ_MASK);
			
 
				 	err = crypto_cipher_setkey(child, key, keylen);
			
 
				-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				-			     CRYPTO_TFM_RES_MASK);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
			
 
				-				       struct blkcipher_walk *walk,
			
 
				+static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
			
 
				+				       struct skcipher_walk *walk,
			
 
				 				       struct crypto_cipher *tfm)
			
 
				 {
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_encrypt;
			
 
				 	int bsize = crypto_cipher_blocksize(tfm);
			
 
				 	unsigned int nbytes = walk->nbytes;
			
 
				 	u8 *src = walk->src.virt.addr;
			
@@ -56,7 +53,7 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
 
				 
			
 
				 	do {
			
 
				 		crypto_xor(iv, src, bsize);
			
 
				-		fn(crypto_cipher_tfm(tfm), dst, iv);
			
 
				+		crypto_cipher_encrypt_one(tfm, dst, iv);
			
 
				 		memcpy(iv, dst, bsize);
			
 
				 		crypto_xor(iv, src, bsize);
			
 
				 
			
@@ -67,12 +64,10 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
 
				 	return nbytes;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
			
 
				-				       struct blkcipher_walk *walk,
			
 
				+static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
			
 
				+				       struct skcipher_walk *walk,
			
 
				 				       struct crypto_cipher *tfm)
			
 
				 {
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_encrypt;
			
 
				 	int bsize = crypto_cipher_blocksize(tfm);
			
 
				 	unsigned int nbytes = walk->nbytes;
			
 
				 	u8 *src = walk->src.virt.addr;
			
@@ -82,7 +77,7 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
 
				 	do {
			
 
				 		memcpy(tmpbuf, src, bsize);
			
 
				 		crypto_xor(iv, src, bsize);
			
 
				-		fn(crypto_cipher_tfm(tfm), src, iv);
			
 
				+		crypto_cipher_encrypt_one(tfm, src, iv);
			
 
				 		memcpy(iv, tmpbuf, bsize);
			
 
				 		crypto_xor(iv, src, bsize);
			
 
				 
			
@@ -94,38 +89,34 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
 
				 	return nbytes;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_encrypt(struct blkcipher_desc *desc,
			
 
				-			       struct scatterlist *dst, struct scatterlist *src,
			
 
				-			       unsigned int nbytes)
			
 
				+static int crypto_pcbc_encrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct blkcipher_walk walk;
			
 
				-	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				-	struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	struct crypto_cipher *child = ctx->child;
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				 
			
 
				 	while ((nbytes = walk.nbytes)) {
			
 
				 		if (walk.src.virt.addr == walk.dst.virt.addr)
			
 
				-			nbytes = crypto_pcbc_encrypt_inplace(desc, &walk,
			
 
				+			nbytes = crypto_pcbc_encrypt_inplace(req, &walk,
			
 
				 							     child);
			
 
				 		else
			
 
				-			nbytes = crypto_pcbc_encrypt_segment(desc, &walk,
			
 
				+			nbytes = crypto_pcbc_encrypt_segment(req, &walk,
			
 
				 							     child);
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
			
 
				-				       struct blkcipher_walk *walk,
			
 
				+static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
			
 
				+				       struct skcipher_walk *walk,
			
 
				 				       struct crypto_cipher *tfm)
			
 
				 {
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_decrypt;
			
 
				 	int bsize = crypto_cipher_blocksize(tfm);
			
 
				 	unsigned int nbytes = walk->nbytes;
			
 
				 	u8 *src = walk->src.virt.addr;
			
@@ -133,7 +124,7 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
 
				 	u8 *iv = walk->iv;
			
 
				 
			
 
				 	do {
			
 
				-		fn(crypto_cipher_tfm(tfm), dst, src);
			
 
				+		crypto_cipher_decrypt_one(tfm, dst, src);
			
 
				 		crypto_xor(dst, iv, bsize);
			
 
				 		memcpy(iv, src, bsize);
			
 
				 		crypto_xor(iv, dst, bsize);
			
@@ -147,21 +138,19 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
 
				 	return nbytes;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc,
			
 
				-				       struct blkcipher_walk *walk,
			
 
				+static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
			
 
				+				       struct skcipher_walk *walk,
			
 
				 				       struct crypto_cipher *tfm)
			
 
				 {
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
			
 
				-		crypto_cipher_alg(tfm)->cia_decrypt;
			
 
				 	int bsize = crypto_cipher_blocksize(tfm);
			
 
				 	unsigned int nbytes = walk->nbytes;
			
 
				 	u8 *src = walk->src.virt.addr;
			
 
				 	u8 *iv = walk->iv;
			
 
				-	u8 tmpbuf[bsize];
			
 
				+	u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32))));
			
 
				 
			
 
				 	do {
			
 
				 		memcpy(tmpbuf, src, bsize);
			
 
				-		fn(crypto_cipher_tfm(tfm), src, src);
			
 
				+		crypto_cipher_decrypt_one(tfm, src, src);
			
 
				 		crypto_xor(src, iv, bsize);
			
 
				 		memcpy(iv, tmpbuf, bsize);
			
 
				 		crypto_xor(iv, src, bsize);
			
@@ -174,37 +163,35 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc,
 
				 	return nbytes;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_decrypt(struct blkcipher_desc *desc,
			
 
				-			       struct scatterlist *dst, struct scatterlist *src,
			
 
				-			       unsigned int nbytes)
			
 
				+static int crypto_pcbc_decrypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct blkcipher_walk walk;
			
 
				-	struct crypto_blkcipher *tfm = desc->tfm;
			
 
				-	struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	struct crypto_cipher *child = ctx->child;
			
 
				+	struct skcipher_walk walk;
			
 
				+	unsigned int nbytes;
			
 
				 	int err;
			
 
				 
			
 
				-	blkcipher_walk_init(&walk, dst, src, nbytes);
			
 
				-	err = blkcipher_walk_virt(desc, &walk);
			
 
				+	err = skcipher_walk_virt(&walk, req, false);
			
 
				 
			
 
				 	while ((nbytes = walk.nbytes)) {
			
 
				 		if (walk.src.virt.addr == walk.dst.virt.addr)
			
 
				-			nbytes = crypto_pcbc_decrypt_inplace(desc, &walk,
			
 
				+			nbytes = crypto_pcbc_decrypt_inplace(req, &walk,
			
 
				 							     child);
			
 
				 		else
			
 
				-			nbytes = crypto_pcbc_decrypt_segment(desc, &walk,
			
 
				+			nbytes = crypto_pcbc_decrypt_segment(req, &walk,
			
 
				 							     child);
			
 
				-		err = blkcipher_walk_done(desc, &walk, nbytes);
			
 
				+		err = skcipher_walk_done(&walk, nbytes);
			
 
				 	}
			
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
			
 
				+static int crypto_pcbc_init_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
			
 
				-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
			
 
				-	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
			
 
				+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				 	struct crypto_cipher *cipher;
			
 
				 
			
 
				 	cipher = crypto_spawn_cipher(spawn);
			
@@ -215,68 +202,98 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void crypto_pcbc_exit_tfm(struct crypto_tfm *tfm)
			
 
				+static void crypto_pcbc_exit_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				 	crypto_free_cipher(ctx->child);
			
 
				 }
			
 
				 
			
 
				-static struct crypto_instance *crypto_pcbc_alloc(struct rtattr **tb)
			
 
				+static void crypto_pcbc_free(struct skcipher_instance *inst)
			
 
				+{
			
 
				+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				+	kfree(inst);
			
 
				+}
			
 
				+
			
 
				+static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				 {
			
 
				-	struct crypto_instance *inst;
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct crypto_attr_type *algt;
			
 
				+	struct crypto_spawn *spawn;
			
 
				 	struct crypto_alg *alg;
			
 
				 	int err;
			
 
				 
			
 
				-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
			
 
				-	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				+	algt = crypto_get_attr_type(tb);
			
 
				+	if (IS_ERR(algt))
			
 
				+		return PTR_ERR(algt);
			
 
				+
			
 
				+	if (((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) &
			
 
				+	    ~CRYPTO_ALG_INTERNAL)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
			
 
				-				  CRYPTO_ALG_TYPE_MASK);
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER |
			
 
				+				      (algt->type & CRYPTO_ALG_INTERNAL),
			
 
				+				  CRYPTO_ALG_TYPE_MASK |
			
 
				+				  (algt->mask & CRYPTO_ALG_INTERNAL));
			
 
				+	err = PTR_ERR(alg);
			
 
				 	if (IS_ERR(alg))
			
 
				-		return ERR_CAST(alg);
			
 
				+		goto err_free_inst;
			
 
				+
			
 
				+	spawn = skcipher_instance_ctx(inst);
			
 
				+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
			
 
				+				CRYPTO_ALG_TYPE_MASK);
			
 
				+	crypto_mod_put(alg);
			
 
				+	if (err)
			
 
				+		goto err_free_inst;
			
 
				 
			
 
				-	inst = crypto_alloc_instance("pcbc", alg);
			
 
				-	if (IS_ERR(inst))
			
 
				-		goto out_put_alg;
			
 
				+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "pcbc", alg);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
			
 
				-	inst->alg.cra_priority = alg->cra_priority;
			
 
				-	inst->alg.cra_blocksize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_alignmask = alg->cra_alignmask;
			
 
				-	inst->alg.cra_type = &crypto_blkcipher_type;
			
 
				+	inst->alg.base.cra_flags = alg->cra_flags & CRYPTO_ALG_INTERNAL;
			
 
				+	inst->alg.base.cra_priority = alg->cra_priority;
			
 
				+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
			
 
				+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
			
 
				 
			
 
				 	/* We access the data as u32s when xoring. */
			
 
				-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
			
 
				+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
			
 
				-	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
			
 
				+	inst->alg.ivsize = alg->cra_blocksize;
			
 
				+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
			
 
				+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
			
 
				 
			
 
				-	inst->alg.cra_ctxsize = sizeof(struct crypto_pcbc_ctx);
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_pcbc_ctx);
			
 
				 
			
 
				-	inst->alg.cra_init = crypto_pcbc_init_tfm;
			
 
				-	inst->alg.cra_exit = crypto_pcbc_exit_tfm;
			
 
				+	inst->alg.init = crypto_pcbc_init_tfm;
			
 
				+	inst->alg.exit = crypto_pcbc_exit_tfm;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.setkey = crypto_pcbc_setkey;
			
 
				-	inst->alg.cra_blkcipher.encrypt = crypto_pcbc_encrypt;
			
 
				-	inst->alg.cra_blkcipher.decrypt = crypto_pcbc_decrypt;
			
 
				+	inst->alg.setkey = crypto_pcbc_setkey;
			
 
				+	inst->alg.encrypt = crypto_pcbc_encrypt;
			
 
				+	inst->alg.decrypt = crypto_pcbc_decrypt;
			
 
				 
			
 
				-out_put_alg:
			
 
				-	crypto_mod_put(alg);
			
 
				-	return inst;
			
 
				-}
			
 
				+	inst->free = crypto_pcbc_free;
			
 
				 
			
 
				-static void crypto_pcbc_free(struct crypto_instance *inst)
			
 
				-{
			
 
				-	crypto_drop_spawn(crypto_instance_ctx(inst));
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+
			
 
				+err_drop_spawn:
			
 
				+	crypto_drop_spawn(spawn);
			
 
				+err_free_inst:
			
 
				 	kfree(inst);
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 static struct crypto_template crypto_pcbc_tmpl = {
			
 
				 	.name = "pcbc",
			
 
				-	.alloc = crypto_pcbc_alloc,
			
 
				-	.free = crypto_pcbc_free,
			
 
				+	.create = crypto_pcbc_create,
			
 
				 	.module = THIS_MODULE,
			
 
				 };
			
 
				 
			
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -17,6 +17,7 @@
 
				 #include <linux/crypto.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/module.h>
			
 
				+#include <asm/unaligned.h>
			
 
				 
			
 
				 static inline u64 mlt(u64 a, u64 b)
			
 
				 {
			
@@ -33,11 +34,6 @@ static inline u32 and(u32 v, u32 mask)
 
				 	return v & mask;
			
 
				 }
			
 
				 
			
 
				-static inline u32 le32_to_cpuvp(const void *p)
			
 
				-{
			
 
				-	return le32_to_cpup(p);
			
 
				-}
			
 
				-
			
 
				 int crypto_poly1305_init(struct shash_desc *desc)
			
 
				 {
			
 
				 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
			
@@ -65,19 +61,19 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
 
				 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
			
 
				 {
			
 
				 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
			
 
				-	dctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ffffff;
			
 
				-	dctx->r[1] = (le32_to_cpuvp(key +  3) >> 2) & 0x3ffff03;
			
 
				-	dctx->r[2] = (le32_to_cpuvp(key +  6) >> 4) & 0x3ffc0ff;
			
 
				-	dctx->r[3] = (le32_to_cpuvp(key +  9) >> 6) & 0x3f03fff;
			
 
				-	dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
			
 
				+	dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
			
 
				+	dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
			
 
				+	dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
			
 
				+	dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
			
 
				+	dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
			
 
				 }
			
 
				 
			
 
				 static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
			
 
				 {
			
 
				-	dctx->s[0] = le32_to_cpuvp(key +  0);
			
 
				-	dctx->s[1] = le32_to_cpuvp(key +  4);
			
 
				-	dctx->s[2] = le32_to_cpuvp(key +  8);
			
 
				-	dctx->s[3] = le32_to_cpuvp(key + 12);
			
 
				+	dctx->s[0] = get_unaligned_le32(key +  0);
			
 
				+	dctx->s[1] = get_unaligned_le32(key +  4);
			
 
				+	dctx->s[2] = get_unaligned_le32(key +  8);
			
 
				+	dctx->s[3] = get_unaligned_le32(key + 12);
			
 
				 }
			
 
				 
			
 
				 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
			
@@ -137,11 +133,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
 
				 	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
			
 
				 
			
 
				 		/* h += m[i] */
			
 
				-		h0 += (le32_to_cpuvp(src +  0) >> 0) & 0x3ffffff;
			
 
				-		h1 += (le32_to_cpuvp(src +  3) >> 2) & 0x3ffffff;
			
 
				-		h2 += (le32_to_cpuvp(src +  6) >> 4) & 0x3ffffff;
			
 
				-		h3 += (le32_to_cpuvp(src +  9) >> 6) & 0x3ffffff;
			
 
				-		h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
			
 
				+		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
			
 
				+		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
			
 
				+		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
			
 
				+		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
			
 
				+		h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
			
 
				 
			
 
				 		/* h *= r */
			
 
				 		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
			
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -0,0 +1,356 @@
 
				+/*
			
 
				+ * Synchronous Compression operations
			
 
				+ *
			
 
				+ * Copyright 2015 LG Electronics Inc.
			
 
				+ * Copyright (c) 2016, Intel Corporation
			
 
				+ * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms of the GNU General Public License as published by the Free
			
 
				+ * Software Foundation; either version 2 of the License, or (at your option)
			
 
				+ * any later version.
			
 
				+ *
			
 
				+ */
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/seq_file.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/crypto.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <crypto/algapi.h>
			
 
				+#include <linux/cryptouser.h>
			
 
				+#include <net/netlink.h>
			
 
				+#include <linux/scatterlist.h>
			
 
				+#include <crypto/scatterwalk.h>
			
 
				+#include <crypto/internal/acompress.h>
			
 
				+#include <crypto/internal/scompress.h>
			
 
				+#include "internal.h"
			
 
				+
			
 
				+static const struct crypto_type crypto_scomp_type;
			
 
				+static void * __percpu *scomp_src_scratches;
			
 
				+static void * __percpu *scomp_dst_scratches;
			
 
				+static int scomp_scratch_users;
			
 
				+static DEFINE_MUTEX(scomp_lock);
			
 
				+
			
 
				+#ifdef CONFIG_NET
			
 
				+static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	struct crypto_report_comp rscomp;
			
 
				+
			
 
				+	strncpy(rscomp.type, "scomp", sizeof(rscomp.type));
			
 
				+
			
 
				+	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
			
 
				+		    sizeof(struct crypto_report_comp), &rscomp))
			
 
				+		goto nla_put_failure;
			
 
				+	return 0;
			
 
				+
			
 
				+nla_put_failure:
			
 
				+	return -EMSGSIZE;
			
 
				+}
			
 
				+#else
			
 
				+static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
			
 
				+{
			
 
				+	return -ENOSYS;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
			
 
				+	__attribute__ ((unused));
			
 
				+
			
 
				+static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
			
 
				+{
			
 
				+	seq_puts(m, "type         : scomp\n");
			
 
				+}
			
 
				+
			
 
				+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void crypto_scomp_free_scratches(void * __percpu *scratches)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	if (!scratches)
			
 
				+		return;
			
 
				+
			
 
				+	for_each_possible_cpu(i)
			
 
				+		vfree(*per_cpu_ptr(scratches, i));
			
 
				+
			
 
				+	free_percpu(scratches);
			
 
				+}
			
 
				+
			
 
				+static void * __percpu *crypto_scomp_alloc_scratches(void)
			
 
				+{
			
 
				+	void * __percpu *scratches;
			
 
				+	int i;
			
 
				+
			
 
				+	scratches = alloc_percpu(void *);
			
 
				+	if (!scratches)
			
 
				+		return NULL;
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		void *scratch;
			
 
				+
			
 
				+		scratch = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i));
			
 
				+		if (!scratch)
			
 
				+			goto error;
			
 
				+		*per_cpu_ptr(scratches, i) = scratch;
			
 
				+	}
			
 
				+
			
 
				+	return scratches;
			
 
				+
			
 
				+error:
			
 
				+	crypto_scomp_free_scratches(scratches);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void crypto_scomp_free_all_scratches(void)
			
 
				+{
			
 
				+	if (!--scomp_scratch_users) {
			
 
				+		crypto_scomp_free_scratches(scomp_src_scratches);
			
 
				+		crypto_scomp_free_scratches(scomp_dst_scratches);
			
 
				+		scomp_src_scratches = NULL;
			
 
				+		scomp_dst_scratches = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int crypto_scomp_alloc_all_scratches(void)
			
 
				+{
			
 
				+	if (!scomp_scratch_users++) {
			
 
				+		scomp_src_scratches = crypto_scomp_alloc_scratches();
			
 
				+		if (!scomp_src_scratches)
			
 
				+			return -ENOMEM;
			
 
				+		scomp_dst_scratches = crypto_scomp_alloc_scratches();
			
 
				+		if (!scomp_dst_scratches)
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void crypto_scomp_sg_free(struct scatterlist *sgl)
			
 
				+{
			
 
				+	int i, n;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (!sgl)
			
 
				+		return;
			
 
				+
			
 
				+	n = sg_nents(sgl);
			
 
				+	for_each_sg(sgl, sgl, n, i) {
			
 
				+		page = sg_page(sgl);
			
 
				+		if (page)
			
 
				+			__free_page(page);
			
 
				+	}
			
 
				+
			
 
				+	kfree(sgl);
			
 
				+}
			
 
				+
			
 
				+static struct scatterlist *crypto_scomp_sg_alloc(size_t size, gfp_t gfp)
			
 
				+{
			
 
				+	struct scatterlist *sgl;
			
 
				+	struct page *page;
			
 
				+	int i, n;
			
 
				+
			
 
				+	n = ((size - 1) >> PAGE_SHIFT) + 1;
			
 
				+
			
 
				+	sgl = kmalloc_array(n, sizeof(struct scatterlist), gfp);
			
 
				+	if (!sgl)
			
 
				+		return NULL;
			
 
				+
			
 
				+	sg_init_table(sgl, n);
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		page = alloc_page(gfp);
			
 
				+		if (!page)
			
 
				+			goto err;
			
 
				+		sg_set_page(sgl + i, page, PAGE_SIZE, 0);
			
 
				+	}
			
 
				+
			
 
				+	return sgl;
			
 
				+
			
 
				+err:
			
 
				+	sg_mark_end(sgl + i);
			
 
				+	crypto_scomp_sg_free(sgl);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
			
 
				+{
			
 
				+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
			
 
				+	void **tfm_ctx = acomp_tfm_ctx(tfm);
			
 
				+	struct crypto_scomp *scomp = *tfm_ctx;
			
 
				+	void **ctx = acomp_request_ctx(req);
			
 
				+	const int cpu = get_cpu();
			
 
				+	u8 *scratch_src = *per_cpu_ptr(scomp_src_scratches, cpu);
			
 
				+	u8 *scratch_dst = *per_cpu_ptr(scomp_dst_scratches, cpu);
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (req->dst && !req->dlen) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE)
			
 
				+		req->dlen = SCOMP_SCRATCH_SIZE;
			
 
				+
			
 
				+	scatterwalk_map_and_copy(scratch_src, req->src, 0, req->slen, 0);
			
 
				+	if (dir)
			
 
				+		ret = crypto_scomp_compress(scomp, scratch_src, req->slen,
			
 
				+					    scratch_dst, &req->dlen, *ctx);
			
 
				+	else
			
 
				+		ret = crypto_scomp_decompress(scomp, scratch_src, req->slen,
			
 
				+					      scratch_dst, &req->dlen, *ctx);
			
 
				+	if (!ret) {
			
 
				+		if (!req->dst) {
			
 
				+			req->dst = crypto_scomp_sg_alloc(req->dlen,
			
 
				+				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
			
 
				+				   GFP_KERNEL : GFP_ATOMIC);
			
 
				+			if (!req->dst)
			
 
				+				goto out;
			
 
				+		}
			
 
				+		scatterwalk_map_and_copy(scratch_dst, req->dst, 0, req->dlen,
			
 
				+					 1);
			
 
				+	}
			
 
				+out:
			
 
				+	put_cpu();
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int scomp_acomp_compress(struct acomp_req *req)
			
 
				+{
			
 
				+	return scomp_acomp_comp_decomp(req, 1);
			
 
				+}
			
 
				+
			
 
				+static int scomp_acomp_decompress(struct acomp_req *req)
			
 
				+{
			
 
				+	return scomp_acomp_comp_decomp(req, 0);
			
 
				+}
			
 
				+
			
 
				+static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	crypto_free_scomp(*ctx);
			
 
				+}
			
 
				+
			
 
				+int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct crypto_alg *calg = tfm->__crt_alg;
			
 
				+	struct crypto_acomp *crt = __crypto_acomp_tfm(tfm);
			
 
				+	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_scomp *scomp;
			
 
				+
			
 
				+	if (!crypto_mod_get(calg))
			
 
				+		return -EAGAIN;
			
 
				+
			
 
				+	scomp = crypto_create_tfm(calg, &crypto_scomp_type);
			
 
				+	if (IS_ERR(scomp)) {
			
 
				+		crypto_mod_put(calg);
			
 
				+		return PTR_ERR(scomp);
			
 
				+	}
			
 
				+
			
 
				+	*ctx = scomp;
			
 
				+	tfm->exit = crypto_exit_scomp_ops_async;
			
 
				+
			
 
				+	crt->compress = scomp_acomp_compress;
			
 
				+	crt->decompress = scomp_acomp_decompress;
			
 
				+	crt->dst_free = crypto_scomp_sg_free;
			
 
				+	crt->reqsize = sizeof(void *);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req)
			
 
				+{
			
 
				+	struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
			
 
				+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
			
 
				+	struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_scomp *scomp = *tfm_ctx;
			
 
				+	void *ctx;
			
 
				+
			
 
				+	ctx = crypto_scomp_alloc_ctx(scomp);
			
 
				+	if (IS_ERR(ctx)) {
			
 
				+		kfree(req);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	*req->__ctx = ctx;
			
 
				+
			
 
				+	return req;
			
 
				+}
			
 
				+
			
 
				+void crypto_acomp_scomp_free_ctx(struct acomp_req *req)
			
 
				+{
			
 
				+	struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
			
 
				+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
			
 
				+	struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm);
			
 
				+	struct crypto_scomp *scomp = *tfm_ctx;
			
 
				+	void *ctx = *req->__ctx;
			
 
				+
			
 
				+	if (ctx)
			
 
				+		crypto_scomp_free_ctx(scomp, ctx);
			
 
				+}
			
 
				+
			
 
				+static const struct crypto_type crypto_scomp_type = {
			
 
				+	.extsize = crypto_alg_extsize,
			
 
				+	.init_tfm = crypto_scomp_init_tfm,
			
 
				+#ifdef CONFIG_PROC_FS
			
 
				+	.show = crypto_scomp_show,
			
 
				+#endif
			
 
				+	.report = crypto_scomp_report,
			
 
				+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
			
 
				+	.maskset = CRYPTO_ALG_TYPE_MASK,
			
 
				+	.type = CRYPTO_ALG_TYPE_SCOMPRESS,
			
 
				+	.tfmsize = offsetof(struct crypto_scomp, base),
			
 
				+};
			
 
				+
			
 
				+int crypto_register_scomp(struct scomp_alg *alg)
			
 
				+{
			
 
				+	struct crypto_alg *base = &alg->base;
			
 
				+	int ret = -ENOMEM;
			
 
				+
			
 
				+	mutex_lock(&scomp_lock);
			
 
				+	if (crypto_scomp_alloc_all_scratches())
			
 
				+		goto error;
			
 
				+
			
 
				+	base->cra_type = &crypto_scomp_type;
			
 
				+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
			
 
				+	base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
			
 
				+
			
 
				+	ret = crypto_register_alg(base);
			
 
				+	if (ret)
			
 
				+		goto error;
			
 
				+
			
 
				+	mutex_unlock(&scomp_lock);
			
 
				+	return ret;
			
 
				+
			
 
				+error:
			
 
				+	crypto_scomp_free_all_scratches();
			
 
				+	mutex_unlock(&scomp_lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(crypto_register_scomp);
			
 
				+
			
 
				+int crypto_unregister_scomp(struct scomp_alg *alg)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	mutex_lock(&scomp_lock);
			
 
				+	ret = crypto_unregister_alg(&alg->base);
			
 
				+	crypto_scomp_free_all_scratches();
			
 
				+	mutex_unlock(&scomp_lock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
			
 
				+
			
 
				+MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("Synchronous compression type");
			
--- a/crypto/simd.c
+++ b/crypto/simd.c
@@ -0,0 +1,226 @@
 
				+/*
			
 
				+ * Shared crypto simd helpers
			
 
				+ *
			
 
				+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
			
 
				+ * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au>
			
 
				+ *
			
 
				+ * Based on aesni-intel_glue.c by:
			
 
				+ *  Copyright (C) 2008, Intel Corp.
			
 
				+ *    Author: Huang Ying <ying.huang@intel.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
			
 
				+ * USA
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <crypto/cryptd.h>
			
 
				+#include <crypto/internal/simd.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/preempt.h>
			
 
				+#include <asm/simd.h>
			
 
				+
			
 
				+struct simd_skcipher_alg {
			
 
				+	const char *ialg_name;
			
 
				+	struct skcipher_alg alg;
			
 
				+};
			
 
				+
			
 
				+struct simd_skcipher_ctx {
			
 
				+	struct cryptd_skcipher *cryptd_tfm;
			
 
				+};
			
 
				+
			
 
				+static int simd_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
			
 
				+				unsigned int key_len)
			
 
				+{
			
 
				+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *child = &ctx->cryptd_tfm->base;
			
 
				+	int err;
			
 
				+
			
 
				+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(tfm) &
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	err = crypto_skcipher_setkey(child, key, key_len);
			
 
				+	crypto_skcipher_set_flags(tfm, crypto_skcipher_get_flags(child) &
			
 
				+				       CRYPTO_TFM_RES_MASK);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int simd_skcipher_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_request *subreq;
			
 
				+	struct crypto_skcipher *child;
			
 
				+
			
 
				+	subreq = skcipher_request_ctx(req);
			
 
				+	*subreq = *req;
			
 
				+
			
 
				+	if (!may_use_simd() ||
			
 
				+	    (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
			
 
				+		child = &ctx->cryptd_tfm->base;
			
 
				+	else
			
 
				+		child = cryptd_skcipher_child(ctx->cryptd_tfm);
			
 
				+
			
 
				+	skcipher_request_set_tfm(subreq, child);
			
 
				+
			
 
				+	return crypto_skcipher_encrypt(subreq);
			
 
				+}
			
 
				+
			
 
				+static int simd_skcipher_decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_request *subreq;
			
 
				+	struct crypto_skcipher *child;
			
 
				+
			
 
				+	subreq = skcipher_request_ctx(req);
			
 
				+	*subreq = *req;
			
 
				+
			
 
				+	if (!may_use_simd() ||
			
 
				+	    (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
			
 
				+		child = &ctx->cryptd_tfm->base;
			
 
				+	else
			
 
				+		child = cryptd_skcipher_child(ctx->cryptd_tfm);
			
 
				+
			
 
				+	skcipher_request_set_tfm(subreq, child);
			
 
				+
			
 
				+	return crypto_skcipher_decrypt(subreq);
			
 
				+}
			
 
				+
			
 
				+static void simd_skcipher_exit(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	cryptd_free_skcipher(ctx->cryptd_tfm);
			
 
				+}
			
 
				+
			
 
				+static int simd_skcipher_init(struct crypto_skcipher *tfm)
			
 
				+{
			
 
				+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct cryptd_skcipher *cryptd_tfm;
			
 
				+	struct simd_skcipher_alg *salg;
			
 
				+	struct skcipher_alg *alg;
			
 
				+	unsigned reqsize;
			
 
				+
			
 
				+	alg = crypto_skcipher_alg(tfm);
			
 
				+	salg = container_of(alg, struct simd_skcipher_alg, alg);
			
 
				+
			
 
				+	cryptd_tfm = cryptd_alloc_skcipher(salg->ialg_name,
			
 
				+					   CRYPTO_ALG_INTERNAL,
			
 
				+					   CRYPTO_ALG_INTERNAL);
			
 
				+	if (IS_ERR(cryptd_tfm))
			
 
				+		return PTR_ERR(cryptd_tfm);
			
 
				+
			
 
				+	ctx->cryptd_tfm = cryptd_tfm;
			
 
				+
			
 
				+	reqsize = sizeof(struct skcipher_request);
			
 
				+	reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base);
			
 
				+
			
 
				+	crypto_skcipher_set_reqsize(tfm, reqsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
			
 
				+						      const char *drvname,
			
 
				+						      const char *basename)
			
 
				+{
			
 
				+	struct simd_skcipher_alg *salg;
			
 
				+	struct crypto_skcipher *tfm;
			
 
				+	struct skcipher_alg *ialg;
			
 
				+	struct skcipher_alg *alg;
			
 
				+	int err;
			
 
				+
			
 
				+	tfm = crypto_alloc_skcipher(basename, CRYPTO_ALG_INTERNAL,
			
 
				+				    CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
			
 
				+	if (IS_ERR(tfm))
			
 
				+		return ERR_CAST(tfm);
			
 
				+
			
 
				+	ialg = crypto_skcipher_alg(tfm);
			
 
				+
			
 
				+	salg = kzalloc(sizeof(*salg), GFP_KERNEL);
			
 
				+	if (!salg) {
			
 
				+		salg = ERR_PTR(-ENOMEM);
			
 
				+		goto out_put_tfm;
			
 
				+	}
			
 
				+
			
 
				+	salg->ialg_name = basename;
			
 
				+	alg = &salg->alg;
			
 
				+
			
 
				+	err = -ENAMETOOLONG;
			
 
				+	if (snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", algname) >=
			
 
				+	    CRYPTO_MAX_ALG_NAME)
			
 
				+		goto out_free_salg;
			
 
				+
			
 
				+	if (snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
			
 
				+		     drvname) >= CRYPTO_MAX_ALG_NAME)
			
 
				+		goto out_free_salg;
			
 
				+
			
 
				+	alg->base.cra_flags = CRYPTO_ALG_ASYNC;
			
 
				+	alg->base.cra_priority = ialg->base.cra_priority;
			
 
				+	alg->base.cra_blocksize = ialg->base.cra_blocksize;
			
 
				+	alg->base.cra_alignmask = ialg->base.cra_alignmask;
			
 
				+	alg->base.cra_module = ialg->base.cra_module;
			
 
				+	alg->base.cra_ctxsize = sizeof(struct simd_skcipher_ctx);
			
 
				+
			
 
				+	alg->ivsize = ialg->ivsize;
			
 
				+	alg->chunksize = ialg->chunksize;
			
 
				+	alg->min_keysize = ialg->min_keysize;
			
 
				+	alg->max_keysize = ialg->max_keysize;
			
 
				+
			
 
				+	alg->init = simd_skcipher_init;
			
 
				+	alg->exit = simd_skcipher_exit;
			
 
				+
			
 
				+	alg->setkey = simd_skcipher_setkey;
			
 
				+	alg->encrypt = simd_skcipher_encrypt;
			
 
				+	alg->decrypt = simd_skcipher_decrypt;
			
 
				+
			
 
				+	err = crypto_register_skcipher(alg);
			
 
				+	if (err)
			
 
				+		goto out_free_salg;
			
 
				+
			
 
				+out_put_tfm:
			
 
				+	crypto_free_skcipher(tfm);
			
 
				+	return salg;
			
 
				+
			
 
				+out_free_salg:
			
 
				+	kfree(salg);
			
 
				+	salg = ERR_PTR(err);
			
 
				+	goto out_put_tfm;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(simd_skcipher_create_compat);
			
 
				+
			
 
				+struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
			
 
				+					       const char *basename)
			
 
				+{
			
 
				+	char drvname[CRYPTO_MAX_ALG_NAME];
			
 
				+
			
 
				+	if (snprintf(drvname, CRYPTO_MAX_ALG_NAME, "simd-%s", basename) >=
			
 
				+	    CRYPTO_MAX_ALG_NAME)
			
 
				+		return ERR_PTR(-ENAMETOOLONG);
			
 
				+
			
 
				+	return simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(simd_skcipher_create);
			
 
				+
			
 
				+void simd_skcipher_free(struct simd_skcipher_alg *salg)
			
 
				+{
			
 
				+	crypto_unregister_skcipher(&salg->alg);
			
 
				+	kfree(salg);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(simd_skcipher_free);
			
 
				+
			
 
				+MODULE_LICENSE("GPL");
			
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -14,9 +14,12 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				+#include <crypto/internal/aead.h>
			
 
				 #include <crypto/internal/skcipher.h>
			
 
				+#include <crypto/scatterwalk.h>
			
 
				 #include <linux/bug.h>
			
 
				 #include <linux/cryptouser.h>
			
 
				+#include <linux/list.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/rtnetlink.h>
			
 
				 #include <linux/seq_file.h>
			
@@ -24,6 +27,543 @@
 
				 
			
 
				 #include "internal.h"
			
 
				 
			
 
				+enum {
			
 
				+	SKCIPHER_WALK_PHYS = 1 << 0,
			
 
				+	SKCIPHER_WALK_SLOW = 1 << 1,
			
 
				+	SKCIPHER_WALK_COPY = 1 << 2,
			
 
				+	SKCIPHER_WALK_DIFF = 1 << 3,
			
 
				+	SKCIPHER_WALK_SLEEP = 1 << 4,
			
 
				+};
			
 
				+
			
 
				+struct skcipher_walk_buffer {
			
 
				+	struct list_head entry;
			
 
				+	struct scatter_walk dst;
			
 
				+	unsigned int len;
			
 
				+	u8 *data;
			
 
				+	u8 buffer[];
			
 
				+};
			
 
				+
			
 
				+static int skcipher_walk_next(struct skcipher_walk *walk);
			
 
				+
			
 
				+static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr)
			
 
				+{
			
 
				+	if (PageHighMem(scatterwalk_page(walk)))
			
 
				+		kunmap_atomic(vaddr);
			
 
				+}
			
 
				+
			
 
				+static inline void *skcipher_map(struct scatter_walk *walk)
			
 
				+{
			
 
				+	struct page *page = scatterwalk_page(walk);
			
 
				+
			
 
				+	return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) +
			
 
				+	       offset_in_page(walk->offset);
			
 
				+}
			
 
				+
			
 
				+static inline void skcipher_map_src(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	walk->src.virt.addr = skcipher_map(&walk->in);
			
 
				+}
			
 
				+
			
 
				+static inline void skcipher_map_dst(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	walk->dst.virt.addr = skcipher_map(&walk->out);
			
 
				+}
			
 
				+
			
 
				+static inline void skcipher_unmap_src(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	skcipher_unmap(&walk->in, walk->src.virt.addr);
			
 
				+}
			
 
				+
			
 
				+static inline void skcipher_unmap_dst(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	skcipher_unmap(&walk->out, walk->dst.virt.addr);
			
 
				+}
			
 
				+
			
 
				+static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
			
 
				+}
			
 
				+
			
 
				+/* Get a spot of the specified length that does not straddle a page.
			
 
				+ * The caller needs to ensure that there is enough space for this operation.
			
 
				+ */
			
 
				+static inline u8 *skcipher_get_spot(u8 *start, unsigned int len)
			
 
				+{
			
 
				+	u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
			
 
				+
			
 
				+	return max(start, end_page);
			
 
				+}
			
 
				+
			
 
				+static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
			
 
				+{
			
 
				+	u8 *addr;
			
 
				+
			
 
				+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
			
 
				+	addr = skcipher_get_spot(addr, bsize);
			
 
				+	scatterwalk_copychunks(addr, &walk->out, bsize,
			
 
				+			       (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int skcipher_walk_done(struct skcipher_walk *walk, int err)
			
 
				+{
			
 
				+	unsigned int n = walk->nbytes - err;
			
 
				+	unsigned int nbytes;
			
 
				+
			
 
				+	nbytes = walk->total - n;
			
 
				+
			
 
				+	if (unlikely(err < 0)) {
			
 
				+		nbytes = 0;
			
 
				+		n = 0;
			
 
				+	} else if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS |
			
 
				+					   SKCIPHER_WALK_SLOW |
			
 
				+					   SKCIPHER_WALK_COPY |
			
 
				+					   SKCIPHER_WALK_DIFF)))) {
			
 
				+unmap_src:
			
 
				+		skcipher_unmap_src(walk);
			
 
				+	} else if (walk->flags & SKCIPHER_WALK_DIFF) {
			
 
				+		skcipher_unmap_dst(walk);
			
 
				+		goto unmap_src;
			
 
				+	} else if (walk->flags & SKCIPHER_WALK_COPY) {
			
 
				+		skcipher_map_dst(walk);
			
 
				+		memcpy(walk->dst.virt.addr, walk->page, n);
			
 
				+		skcipher_unmap_dst(walk);
			
 
				+	} else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) {
			
 
				+		if (WARN_ON(err)) {
			
 
				+			err = -EINVAL;
			
 
				+			nbytes = 0;
			
 
				+		} else
			
 
				+			n = skcipher_done_slow(walk, n);
			
 
				+	}
			
 
				+
			
 
				+	if (err > 0)
			
 
				+		err = 0;
			
 
				+
			
 
				+	walk->total = nbytes;
			
 
				+	walk->nbytes = nbytes;
			
 
				+
			
 
				+	scatterwalk_advance(&walk->in, n);
			
 
				+	scatterwalk_advance(&walk->out, n);
			
 
				+	scatterwalk_done(&walk->in, 0, nbytes);
			
 
				+	scatterwalk_done(&walk->out, 1, nbytes);
			
 
				+
			
 
				+	if (nbytes) {
			
 
				+		crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ?
			
 
				+			     CRYPTO_TFM_REQ_MAY_SLEEP : 0);
			
 
				+		return skcipher_walk_next(walk);
			
 
				+	}
			
 
				+
			
 
				+	/* Short-circuit for the common/fast path. */
			
 
				+	if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (walk->flags & SKCIPHER_WALK_PHYS)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (walk->iv != walk->oiv)
			
 
				+		memcpy(walk->oiv, walk->iv, walk->ivsize);
			
 
				+	if (walk->buffer != walk->page)
			
 
				+		kfree(walk->buffer);
			
 
				+	if (walk->page)
			
 
				+		free_page((unsigned long)walk->page);
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_done);
			
 
				+
			
 
				+void skcipher_walk_complete(struct skcipher_walk *walk, int err)
			
 
				+{
			
 
				+	struct skcipher_walk_buffer *p, *tmp;
			
 
				+
			
 
				+	list_for_each_entry_safe(p, tmp, &walk->buffers, entry) {
			
 
				+		u8 *data;
			
 
				+
			
 
				+		if (err)
			
 
				+			goto done;
			
 
				+
			
 
				+		data = p->data;
			
 
				+		if (!data) {
			
 
				+			data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1);
			
 
				+			data = skcipher_get_spot(data, walk->chunksize);
			
 
				+		}
			
 
				+
			
 
				+		scatterwalk_copychunks(data, &p->dst, p->len, 1);
			
 
				+
			
 
				+		if (offset_in_page(p->data) + p->len + walk->chunksize >
			
 
				+		    PAGE_SIZE)
			
 
				+			free_page((unsigned long)p->data);
			
 
				+
			
 
				+done:
			
 
				+		list_del(&p->entry);
			
 
				+		kfree(p);
			
 
				+	}
			
 
				+
			
 
				+	if (!err && walk->iv != walk->oiv)
			
 
				+		memcpy(walk->oiv, walk->iv, walk->ivsize);
			
 
				+	if (walk->buffer != walk->page)
			
 
				+		kfree(walk->buffer);
			
 
				+	if (walk->page)
			
 
				+		free_page((unsigned long)walk->page);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_complete);
			
 
				+
			
 
				+static void skcipher_queue_write(struct skcipher_walk *walk,
			
 
				+				 struct skcipher_walk_buffer *p)
			
 
				+{
			
 
				+	p->dst = walk->out;
			
 
				+	list_add_tail(&p->entry, &walk->buffers);
			
 
				+}
			
 
				+
			
 
				+static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize)
			
 
				+{
			
 
				+	bool phys = walk->flags & SKCIPHER_WALK_PHYS;
			
 
				+	unsigned alignmask = walk->alignmask;
			
 
				+	struct skcipher_walk_buffer *p;
			
 
				+	unsigned a;
			
 
				+	unsigned n;
			
 
				+	u8 *buffer;
			
 
				+	void *v;
			
 
				+
			
 
				+	if (!phys) {
			
 
				+		buffer = walk->buffer ?: walk->page;
			
 
				+		if (buffer)
			
 
				+			goto ok;
			
 
				+	}
			
 
				+
			
 
				+	/* Start with the minimum alignment of kmalloc. */
			
 
				+	a = crypto_tfm_ctx_alignment() - 1;
			
 
				+	n = bsize;
			
 
				+
			
 
				+	if (phys) {
			
 
				+		/* Calculate the minimum alignment of p->buffer. */
			
 
				+		a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1;
			
 
				+		n += sizeof(*p);
			
 
				+	}
			
 
				+
			
 
				+	/* Minimum size to align p->buffer by alignmask. */
			
 
				+	n += alignmask & ~a;
			
 
				+
			
 
				+	/* Minimum size to ensure p->buffer does not straddle a page. */
			
 
				+	n += (bsize - 1) & ~(alignmask | a);
			
 
				+
			
 
				+	v = kzalloc(n, skcipher_walk_gfp(walk));
			
 
				+	if (!v)
			
 
				+		return skcipher_walk_done(walk, -ENOMEM);
			
 
				+
			
 
				+	if (phys) {
			
 
				+		p = v;
			
 
				+		p->len = bsize;
			
 
				+		skcipher_queue_write(walk, p);
			
 
				+		buffer = p->buffer;
			
 
				+	} else {
			
 
				+		walk->buffer = v;
			
 
				+		buffer = v;
			
 
				+	}
			
 
				+
			
 
				+ok:
			
 
				+	walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1);
			
 
				+	walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize);
			
 
				+	walk->src.virt.addr = walk->dst.virt.addr;
			
 
				+
			
 
				+	scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0);
			
 
				+
			
 
				+	walk->nbytes = bsize;
			
 
				+	walk->flags |= SKCIPHER_WALK_SLOW;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int skcipher_next_copy(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	struct skcipher_walk_buffer *p;
			
 
				+	u8 *tmp = walk->page;
			
 
				+
			
 
				+	skcipher_map_src(walk);
			
 
				+	memcpy(tmp, walk->src.virt.addr, walk->nbytes);
			
 
				+	skcipher_unmap_src(walk);
			
 
				+
			
 
				+	walk->src.virt.addr = tmp;
			
 
				+	walk->dst.virt.addr = tmp;
			
 
				+
			
 
				+	if (!(walk->flags & SKCIPHER_WALK_PHYS))
			
 
				+		return 0;
			
 
				+
			
 
				+	p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk));
			
 
				+	if (!p)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	p->data = walk->page;
			
 
				+	p->len = walk->nbytes;
			
 
				+	skcipher_queue_write(walk, p);
			
 
				+
			
 
				+	if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize >
			
 
				+	    PAGE_SIZE)
			
 
				+		walk->page = NULL;
			
 
				+	else
			
 
				+		walk->page += walk->nbytes;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int skcipher_next_fast(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	unsigned long diff;
			
 
				+
			
 
				+	walk->src.phys.page = scatterwalk_page(&walk->in);
			
 
				+	walk->src.phys.offset = offset_in_page(walk->in.offset);
			
 
				+	walk->dst.phys.page = scatterwalk_page(&walk->out);
			
 
				+	walk->dst.phys.offset = offset_in_page(walk->out.offset);
			
 
				+
			
 
				+	if (walk->flags & SKCIPHER_WALK_PHYS)
			
 
				+		return 0;
			
 
				+
			
 
				+	diff = walk->src.phys.offset - walk->dst.phys.offset;
			
 
				+	diff |= walk->src.virt.page - walk->dst.virt.page;
			
 
				+
			
 
				+	skcipher_map_src(walk);
			
 
				+	walk->dst.virt.addr = walk->src.virt.addr;
			
 
				+
			
 
				+	if (diff) {
			
 
				+		walk->flags |= SKCIPHER_WALK_DIFF;
			
 
				+		skcipher_map_dst(walk);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int skcipher_walk_next(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	unsigned int bsize;
			
 
				+	unsigned int n;
			
 
				+	int err;
			
 
				+
			
 
				+	walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY |
			
 
				+			 SKCIPHER_WALK_DIFF);
			
 
				+
			
 
				+	n = walk->total;
			
 
				+	bsize = min(walk->chunksize, max(n, walk->blocksize));
			
 
				+	n = scatterwalk_clamp(&walk->in, n);
			
 
				+	n = scatterwalk_clamp(&walk->out, n);
			
 
				+
			
 
				+	if (unlikely(n < bsize)) {
			
 
				+		if (unlikely(walk->total < walk->blocksize))
			
 
				+			return skcipher_walk_done(walk, -EINVAL);
			
 
				+
			
 
				+slow_path:
			
 
				+		err = skcipher_next_slow(walk, bsize);
			
 
				+		goto set_phys_lowmem;
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) {
			
 
				+		if (!walk->page) {
			
 
				+			gfp_t gfp = skcipher_walk_gfp(walk);
			
 
				+
			
 
				+			walk->page = (void *)__get_free_page(gfp);
			
 
				+			if (!walk->page)
			
 
				+				goto slow_path;
			
 
				+		}
			
 
				+
			
 
				+		walk->nbytes = min_t(unsigned, n,
			
 
				+				     PAGE_SIZE - offset_in_page(walk->page));
			
 
				+		walk->flags |= SKCIPHER_WALK_COPY;
			
 
				+		err = skcipher_next_copy(walk);
			
 
				+		goto set_phys_lowmem;
			
 
				+	}
			
 
				+
			
 
				+	walk->nbytes = n;
			
 
				+
			
 
				+	return skcipher_next_fast(walk);
			
 
				+
			
 
				+set_phys_lowmem:
			
 
				+	if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) {
			
 
				+		walk->src.phys.page = virt_to_page(walk->src.virt.addr);
			
 
				+		walk->dst.phys.page = virt_to_page(walk->dst.virt.addr);
			
 
				+		walk->src.phys.offset &= PAGE_SIZE - 1;
			
 
				+		walk->dst.phys.offset &= PAGE_SIZE - 1;
			
 
				+	}
			
 
				+	return err;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_next);
			
 
				+
			
 
				+static int skcipher_copy_iv(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	unsigned a = crypto_tfm_ctx_alignment() - 1;
			
 
				+	unsigned alignmask = walk->alignmask;
			
 
				+	unsigned ivsize = walk->ivsize;
			
 
				+	unsigned bs = walk->chunksize;
			
 
				+	unsigned aligned_bs;
			
 
				+	unsigned size;
			
 
				+	u8 *iv;
			
 
				+
			
 
				+	aligned_bs = ALIGN(bs, alignmask);
			
 
				+
			
 
				+	/* Minimum size to align buffer by alignmask. */
			
 
				+	size = alignmask & ~a;
			
 
				+
			
 
				+	if (walk->flags & SKCIPHER_WALK_PHYS)
			
 
				+		size += ivsize;
			
 
				+	else {
			
 
				+		size += aligned_bs + ivsize;
			
 
				+
			
 
				+		/* Minimum size to ensure buffer does not straddle a page. */
			
 
				+		size += (bs - 1) & ~(alignmask | a);
			
 
				+	}
			
 
				+
			
 
				+	walk->buffer = kmalloc(size, skcipher_walk_gfp(walk));
			
 
				+	if (!walk->buffer)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	iv = PTR_ALIGN(walk->buffer, alignmask + 1);
			
 
				+	iv = skcipher_get_spot(iv, bs) + aligned_bs;
			
 
				+
			
 
				+	walk->iv = memcpy(iv, walk->iv, walk->ivsize);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int skcipher_walk_first(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	walk->nbytes = 0;
			
 
				+
			
 
				+	if (WARN_ON_ONCE(in_irq()))
			
 
				+		return -EDEADLK;
			
 
				+
			
 
				+	if (unlikely(!walk->total))
			
 
				+		return 0;
			
 
				+
			
 
				+	walk->buffer = NULL;
			
 
				+	if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
			
 
				+		int err = skcipher_copy_iv(walk);
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	walk->page = NULL;
			
 
				+	walk->nbytes = walk->total;
			
 
				+
			
 
				+	return skcipher_walk_next(walk);
			
 
				+}
			
 
				+
			
 
				+static int skcipher_walk_skcipher(struct skcipher_walk *walk,
			
 
				+				  struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+
			
 
				+	scatterwalk_start(&walk->in, req->src);
			
 
				+	scatterwalk_start(&walk->out, req->dst);
			
 
				+
			
 
				+	walk->total = req->cryptlen;
			
 
				+	walk->iv = req->iv;
			
 
				+	walk->oiv = req->iv;
			
 
				+
			
 
				+	walk->flags &= ~SKCIPHER_WALK_SLEEP;
			
 
				+	walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
			
 
				+		       SKCIPHER_WALK_SLEEP : 0;
			
 
				+
			
 
				+	walk->blocksize = crypto_skcipher_blocksize(tfm);
			
 
				+	walk->chunksize = crypto_skcipher_chunksize(tfm);
			
 
				+	walk->ivsize = crypto_skcipher_ivsize(tfm);
			
 
				+	walk->alignmask = crypto_skcipher_alignmask(tfm);
			
 
				+
			
 
				+	return skcipher_walk_first(walk);
			
 
				+}
			
 
				+
			
 
				+int skcipher_walk_virt(struct skcipher_walk *walk,
			
 
				+		       struct skcipher_request *req, bool atomic)
			
 
				+{
			
 
				+	int err;
			
 
				+
			
 
				+	walk->flags &= ~SKCIPHER_WALK_PHYS;
			
 
				+
			
 
				+	err = skcipher_walk_skcipher(walk, req);
			
 
				+
			
 
				+	walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0;
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_virt);
			
 
				+
			
 
				+void skcipher_walk_atomise(struct skcipher_walk *walk)
			
 
				+{
			
 
				+	walk->flags &= ~SKCIPHER_WALK_SLEEP;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_atomise);
			
 
				+
			
 
				+int skcipher_walk_async(struct skcipher_walk *walk,
			
 
				+			struct skcipher_request *req)
			
 
				+{
			
 
				+	walk->flags |= SKCIPHER_WALK_PHYS;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&walk->buffers);
			
 
				+
			
 
				+	return skcipher_walk_skcipher(walk, req);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_async);
			
 
				+
			
 
				+static int skcipher_walk_aead_common(struct skcipher_walk *walk,
			
 
				+				     struct aead_request *req, bool atomic)
			
 
				+{
			
 
				+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
			
 
				+	int err;
			
 
				+
			
 
				+	walk->flags &= ~SKCIPHER_WALK_PHYS;
			
 
				+
			
 
				+	scatterwalk_start(&walk->in, req->src);
			
 
				+	scatterwalk_start(&walk->out, req->dst);
			
 
				+
			
 
				+	scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2);
			
 
				+	scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2);
			
 
				+
			
 
				+	walk->iv = req->iv;
			
 
				+	walk->oiv = req->iv;
			
 
				+
			
 
				+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
			
 
				+		walk->flags |= SKCIPHER_WALK_SLEEP;
			
 
				+	else
			
 
				+		walk->flags &= ~SKCIPHER_WALK_SLEEP;
			
 
				+
			
 
				+	walk->blocksize = crypto_aead_blocksize(tfm);
			
 
				+	walk->chunksize = crypto_aead_chunksize(tfm);
			
 
				+	walk->ivsize = crypto_aead_ivsize(tfm);
			
 
				+	walk->alignmask = crypto_aead_alignmask(tfm);
			
 
				+
			
 
				+	err = skcipher_walk_first(walk);
			
 
				+
			
 
				+	if (atomic)
			
 
				+		walk->flags &= ~SKCIPHER_WALK_SLEEP;
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req,
			
 
				+		       bool atomic)
			
 
				+{
			
 
				+	walk->total = req->cryptlen;
			
 
				+
			
 
				+	return skcipher_walk_aead_common(walk, req, atomic);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_aead);
			
 
				+
			
 
				+int skcipher_walk_aead_encrypt(struct skcipher_walk *walk,
			
 
				+			       struct aead_request *req, bool atomic)
			
 
				+{
			
 
				+	walk->total = req->cryptlen;
			
 
				+
			
 
				+	return skcipher_walk_aead_common(walk, req, atomic);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_aead_encrypt);
			
 
				+
			
 
				+int skcipher_walk_aead_decrypt(struct skcipher_walk *walk,
			
 
				+			       struct aead_request *req, bool atomic)
			
 
				+{
			
 
				+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
			
 
				+
			
 
				+	walk->total = req->cryptlen - crypto_aead_authsize(tfm);
			
 
				+
			
 
				+	return skcipher_walk_aead_common(walk, req, atomic);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(skcipher_walk_aead_decrypt);
			
 
				+
			
 
				 static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
			
 
				 {
			
 
				 	if (alg->cra_type == &crypto_blkcipher_type)
			
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -33,6 +33,7 @@
 
				 #include <crypto/drbg.h>
			
 
				 #include <crypto/akcipher.h>
			
 
				 #include <crypto/kpp.h>
			
 
				+#include <crypto/acompress.h>
			
 
				 
			
 
				 #include "internal.h"
			
 
				 
			
@@ -62,7 +63,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 
				  */
			
 
				 #define IDX1		32
			
 
				 #define IDX2		32400
			
 
				-#define IDX3		1
			
 
				+#define IDX3		1511
			
 
				 #define IDX4		8193
			
 
				 #define IDX5		22222
			
 
				 #define IDX6		17101
			
@@ -1442,6 +1443,126 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
			
 
				+		      struct comp_testvec *dtemplate, int ctcount, int dtcount)
			
 
				+{
			
 
				+	const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm));
			
 
				+	unsigned int i;
			
 
				+	char *output;
			
 
				+	int ret;
			
 
				+	struct scatterlist src, dst;
			
 
				+	struct acomp_req *req;
			
 
				+	struct tcrypt_result result;
			
 
				+
			
 
				+	output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
			
 
				+	if (!output)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	for (i = 0; i < ctcount; i++) {
			
 
				+		unsigned int dlen = COMP_BUF_SIZE;
			
 
				+		int ilen = ctemplate[i].inlen;
			
 
				+
			
 
				+		memset(output, 0, dlen);
			
 
				+		init_completion(&result.completion);
			
 
				+		sg_init_one(&src, ctemplate[i].input, ilen);
			
 
				+		sg_init_one(&dst, output, dlen);
			
 
				+
			
 
				+		req = acomp_request_alloc(tfm);
			
 
				+		if (!req) {
			
 
				+			pr_err("alg: acomp: request alloc failed for %s\n",
			
 
				+			       algo);
			
 
				+			ret = -ENOMEM;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		acomp_request_set_params(req, &src, &dst, ilen, dlen);
			
 
				+		acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
			
 
				+					   tcrypt_complete, &result);
			
 
				+
			
 
				+		ret = wait_async_op(&result, crypto_acomp_compress(req));
			
 
				+		if (ret) {
			
 
				+			pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
			
 
				+			       i + 1, algo, -ret);
			
 
				+			acomp_request_free(req);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (req->dlen != ctemplate[i].outlen) {
			
 
				+			pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n",
			
 
				+			       i + 1, algo, req->dlen);
			
 
				+			ret = -EINVAL;
			
 
				+			acomp_request_free(req);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (memcmp(output, ctemplate[i].output, req->dlen)) {
			
 
				+			pr_err("alg: acomp: Compression test %d failed for %s\n",
			
 
				+			       i + 1, algo);
			
 
				+			hexdump(output, req->dlen);
			
 
				+			ret = -EINVAL;
			
 
				+			acomp_request_free(req);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		acomp_request_free(req);
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < dtcount; i++) {
			
 
				+		unsigned int dlen = COMP_BUF_SIZE;
			
 
				+		int ilen = dtemplate[i].inlen;
			
 
				+
			
 
				+		memset(output, 0, dlen);
			
 
				+		init_completion(&result.completion);
			
 
				+		sg_init_one(&src, dtemplate[i].input, ilen);
			
 
				+		sg_init_one(&dst, output, dlen);
			
 
				+
			
 
				+		req = acomp_request_alloc(tfm);
			
 
				+		if (!req) {
			
 
				+			pr_err("alg: acomp: request alloc failed for %s\n",
			
 
				+			       algo);
			
 
				+			ret = -ENOMEM;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		acomp_request_set_params(req, &src, &dst, ilen, dlen);
			
 
				+		acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
			
 
				+					   tcrypt_complete, &result);
			
 
				+
			
 
				+		ret = wait_async_op(&result, crypto_acomp_decompress(req));
			
 
				+		if (ret) {
			
 
				+			pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n",
			
 
				+			       i + 1, algo, -ret);
			
 
				+			acomp_request_free(req);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (req->dlen != dtemplate[i].outlen) {
			
 
				+			pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n",
			
 
				+			       i + 1, algo, req->dlen);
			
 
				+			ret = -EINVAL;
			
 
				+			acomp_request_free(req);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (memcmp(output, dtemplate[i].output, req->dlen)) {
			
 
				+			pr_err("alg: acomp: Decompression test %d failed for %s\n",
			
 
				+			       i + 1, algo);
			
 
				+			hexdump(output, req->dlen);
			
 
				+			ret = -EINVAL;
			
 
				+			acomp_request_free(req);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		acomp_request_free(req);
			
 
				+	}
			
 
				+
			
 
				+	ret = 0;
			
 
				+
			
 
				+out:
			
 
				+	kfree(output);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
			
 
				 		      unsigned int tcount)
			
 
				 {
			
@@ -1509,7 +1630,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
 
				 	struct crypto_aead *tfm;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_aead(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		printk(KERN_ERR "alg: aead: Failed to load transform for %s: "
			
 
				 		       "%ld\n", driver, PTR_ERR(tfm));
			
@@ -1538,7 +1659,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc,
 
				 	struct crypto_cipher *tfm;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_cipher(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		printk(KERN_ERR "alg: cipher: Failed to load transform for "
			
 
				 		       "%s: %ld\n", driver, PTR_ERR(tfm));
			
@@ -1567,7 +1688,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
 
				 	struct crypto_skcipher *tfm;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	tfm = crypto_alloc_skcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_skcipher(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		printk(KERN_ERR "alg: skcipher: Failed to load transform for "
			
 
				 		       "%s: %ld\n", driver, PTR_ERR(tfm));
			
@@ -1593,22 +1714,38 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
 
				 static int alg_test_comp(const struct alg_test_desc *desc, const char *driver,
			
 
				 			 u32 type, u32 mask)
			
 
				 {
			
 
				-	struct crypto_comp *tfm;
			
 
				+	struct crypto_comp *comp;
			
 
				+	struct crypto_acomp *acomp;
			
 
				 	int err;
			
 
				+	u32 algo_type = type & CRYPTO_ALG_TYPE_ACOMPRESS_MASK;
			
 
				+
			
 
				+	if (algo_type == CRYPTO_ALG_TYPE_ACOMPRESS) {
			
 
				+		acomp = crypto_alloc_acomp(driver, type, mask);
			
 
				+		if (IS_ERR(acomp)) {
			
 
				+			pr_err("alg: acomp: Failed to load transform for %s: %ld\n",
			
 
				+			       driver, PTR_ERR(acomp));
			
 
				+			return PTR_ERR(acomp);
			
 
				+		}
			
 
				+		err = test_acomp(acomp, desc->suite.comp.comp.vecs,
			
 
				+				 desc->suite.comp.decomp.vecs,
			
 
				+				 desc->suite.comp.comp.count,
			
 
				+				 desc->suite.comp.decomp.count);
			
 
				+		crypto_free_acomp(acomp);
			
 
				+	} else {
			
 
				+		comp = crypto_alloc_comp(driver, type, mask);
			
 
				+		if (IS_ERR(comp)) {
			
 
				+			pr_err("alg: comp: Failed to load transform for %s: %ld\n",
			
 
				+			       driver, PTR_ERR(comp));
			
 
				+			return PTR_ERR(comp);
			
 
				+		}
			
 
				 
			
 
				-	tfm = crypto_alloc_comp(driver, type, mask);
			
 
				-	if (IS_ERR(tfm)) {
			
 
				-		printk(KERN_ERR "alg: comp: Failed to load transform for %s: "
			
 
				-		       "%ld\n", driver, PTR_ERR(tfm));
			
 
				-		return PTR_ERR(tfm);
			
 
				-	}
			
 
				-
			
 
				-	err = test_comp(tfm, desc->suite.comp.comp.vecs,
			
 
				-			desc->suite.comp.decomp.vecs,
			
 
				-			desc->suite.comp.comp.count,
			
 
				-			desc->suite.comp.decomp.count);
			
 
				+		err = test_comp(comp, desc->suite.comp.comp.vecs,
			
 
				+				desc->suite.comp.decomp.vecs,
			
 
				+				desc->suite.comp.comp.count,
			
 
				+				desc->suite.comp.decomp.count);
			
 
				 
			
 
				-	crypto_free_comp(tfm);
			
 
				+		crypto_free_comp(comp);
			
 
				+	}
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -1618,7 +1755,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
 
				 	struct crypto_ahash *tfm;
			
 
				 	int err;
			
 
				 
			
 
				-	tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_ahash(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		printk(KERN_ERR "alg: hash: Failed to load transform for %s: "
			
 
				 		       "%ld\n", driver, PTR_ERR(tfm));
			
@@ -1646,7 +1783,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
 
				 	if (err)
			
 
				 		goto out;
			
 
				 
			
 
				-	tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_shash(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: "
			
 
				 		       "%ld\n", driver, PTR_ERR(tfm));
			
@@ -1688,7 +1825,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
 
				 	struct crypto_rng *rng;
			
 
				 	int err;
			
 
				 
			
 
				-	rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	rng = crypto_alloc_rng(driver, type, mask);
			
 
				 	if (IS_ERR(rng)) {
			
 
				 		printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
			
 
				 		       "%ld\n", driver, PTR_ERR(rng));
			
@@ -1715,7 +1852,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
 
				 	if (!buf)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	drng = crypto_alloc_rng(driver, type, mask);
			
 
				 	if (IS_ERR(drng)) {
			
 
				 		printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for "
			
 
				 		       "%s\n", driver);
			
@@ -1909,7 +2046,7 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver,
 
				 	struct crypto_kpp *tfm;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_kpp(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		pr_err("alg: kpp: Failed to load tfm for %s: %ld\n",
			
 
				 		       driver, PTR_ERR(tfm));
			
@@ -2068,7 +2205,7 @@ static int alg_test_akcipher(const struct alg_test_desc *desc,
 
				 	struct crypto_akcipher *tfm;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	tfm = crypto_alloc_akcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
			
 
				+	tfm = crypto_alloc_akcipher(driver, type, mask);
			
 
				 	if (IS_ERR(tfm)) {
			
 
				 		pr_err("alg: akcipher: Failed to load tfm for %s: %ld\n",
			
 
				 		       driver, PTR_ERR(tfm));
			
@@ -2091,88 +2228,6 @@ static int alg_test_null(const struct alg_test_desc *desc,
 
				 /* Please keep this list sorted by algorithm name. */
			
 
				 static const struct alg_test_desc alg_test_descs[] = {
			
 
				 	{
			
 
				-		.alg = "__cbc-cast5-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__cbc-cast6-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__cbc-serpent-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__cbc-serpent-avx2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__cbc-serpent-sse2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__cbc-twofish-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-aes-aesni",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-camellia-aesni",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-camellia-aesni-avx2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-cast5-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-cast6-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-serpent-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-serpent-avx2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-serpent-sse2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-cbc-twofish-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-aes-aesni",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-camellia-aesni",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-camellia-aesni-avx2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-cast5-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-cast6-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-serpent-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-serpent-avx2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-serpent-sse2",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-ecb-twofish-avx",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "__driver-gcm-aes-aesni",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				-		.alg = "__ghash-pclmulqdqni",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				 		.alg = "ansi_cprng",
			
 
				 		.test = alg_test_cprng,
			
 
				 		.suite = {
			
@@ -2658,55 +2713,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 
				 				.count = CRCT10DIF_TEST_VECTORS
			
 
				 			}
			
 
				 		}
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-cbc-aes-aesni)",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-cbc-camellia-aesni)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-cbc-camellia-aesni-avx2)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-cbc-serpent-avx2)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-aes-aesni)",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-camellia-aesni)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-camellia-aesni-avx2)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-cast5-avx)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-cast6-avx)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-serpent-avx)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-serpent-avx2)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-serpent-sse2)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-ecb-twofish-avx)",
			
 
				-		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__driver-gcm-aes-aesni)",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				-	}, {
			
 
				-		.alg = "cryptd(__ghash-pclmulqdqni)",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				 	}, {
			
 
				 		.alg = "ctr(aes)",
			
 
				 		.test = alg_test_skcipher,
			
@@ -3033,10 +3039,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 
				 		.alg = "drbg_pr_sha512",
			
 
				 		.fips_allowed = 1,
			
 
				 		.test = alg_test_null,
			
 
				-	}, {
			
 
				-		.alg = "ecb(__aes-aesni)",
			
 
				-		.test = alg_test_null,
			
 
				-		.fips_allowed = 1,
			
 
				 	}, {
			
 
				 		.alg = "ecb(aes)",
			
 
				 		.test = alg_test_skcipher,
			
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1334,36 +1334,50 @@ static struct hash_testvec rmd320_tv_template[] = {
 
				 	}
			
 
				 };
			
 
				 
			
 
				-#define CRCT10DIF_TEST_VECTORS	3
			
 
				+#define CRCT10DIF_TEST_VECTORS	ARRAY_SIZE(crct10dif_tv_template)
			
 
				 static struct hash_testvec crct10dif_tv_template[] = {
			
 
				 	{
			
 
				-		.plaintext = "abc",
			
 
				-		.psize  = 3,
			
 
				-#ifdef __LITTLE_ENDIAN
			
 
				-		.digest = "\x3b\x44",
			
 
				-#else
			
 
				-		.digest = "\x44\x3b",
			
 
				-#endif
			
 
				-	}, {
			
 
				-		.plaintext = "1234567890123456789012345678901234567890"
			
 
				-			     "123456789012345678901234567890123456789",
			
 
				-		.psize	= 79,
			
 
				-#ifdef __LITTLE_ENDIAN
			
 
				-		.digest	= "\x70\x4b",
			
 
				-#else
			
 
				-		.digest	= "\x4b\x70",
			
 
				-#endif
			
 
				-	}, {
			
 
				-		.plaintext =
			
 
				-		"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
			
 
				-		.psize  = 56,
			
 
				-#ifdef __LITTLE_ENDIAN
			
 
				-		.digest = "\xe3\x9c",
			
 
				-#else
			
 
				-		.digest = "\x9c\xe3",
			
 
				-#endif
			
 
				-		.np     = 2,
			
 
				-		.tap    = { 28, 28 }
			
 
				+		.plaintext	= "abc",
			
 
				+		.psize		= 3,
			
 
				+		.digest		= (u8 *)(u16 []){ 0x443b },
			
 
				+	}, {
			
 
				+		.plaintext 	= "1234567890123456789012345678901234567890"
			
 
				+				  "123456789012345678901234567890123456789",
			
 
				+		.psize		= 79,
			
 
				+		.digest 	= (u8 *)(u16 []){ 0x4b70 },
			
 
				+		.np		= 2,
			
 
				+		.tap		= { 63, 16 },
			
 
				+	}, {
			
 
				+		.plaintext	= "abcdddddddddddddddddddddddddddddddddddddddd"
			
 
				+				  "ddddddddddddd",
			
 
				+		.psize		= 56,
			
 
				+		.digest		= (u8 *)(u16 []){ 0x9ce3 },
			
 
				+		.np		= 8,
			
 
				+		.tap		= { 1, 2, 28, 7, 6, 5, 4, 3 },
			
 
				+	}, {
			
 
				+		.plaintext 	= "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "123456789012345678901234567890123456789",
			
 
				+		.psize		= 319,
			
 
				+		.digest		= (u8 *)(u16 []){ 0x44c6 },
			
 
				+	}, {
			
 
				+		.plaintext 	= "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "1234567890123456789012345678901234567890"
			
 
				+				  "123456789012345678901234567890123456789",
			
 
				+		.psize		= 319,
			
 
				+		.digest		= (u8 *)(u16 []){ 0x44c6 },
			
 
				+		.np		= 4,
			
 
				+		.tap		= { 1, 255, 57, 6 },
			
 
				 	}
			
 
				 };
			
 
				 
			
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -13,7 +13,8 @@
 
				  * Software Foundation; either version 2 of the License, or (at your option)
			
 
				  * any later version.
			
 
				  */
			
 
				-#include <crypto/algapi.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				+#include <crypto/scatterwalk.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/kernel.h>
			
@@ -25,140 +26,320 @@
 
				 #include <crypto/b128ops.h>
			
 
				 #include <crypto/gf128mul.h>
			
 
				 
			
 
				+#define XTS_BUFFER_SIZE 128u
			
 
				+
			
 
				 struct priv {
			
 
				-	struct crypto_cipher *child;
			
 
				+	struct crypto_skcipher *child;
			
 
				 	struct crypto_cipher *tweak;
			
 
				 };
			
 
				 
			
 
				-static int setkey(struct crypto_tfm *parent, const u8 *key,
			
 
				+struct xts_instance_ctx {
			
 
				+	struct crypto_skcipher_spawn spawn;
			
 
				+	char name[CRYPTO_MAX_ALG_NAME];
			
 
				+};
			
 
				+
			
 
				+struct rctx {
			
 
				+	be128 buf[XTS_BUFFER_SIZE / sizeof(be128)];
			
 
				+
			
 
				+	be128 t;
			
 
				+
			
 
				+	be128 *ext;
			
 
				+
			
 
				+	struct scatterlist srcbuf[2];
			
 
				+	struct scatterlist dstbuf[2];
			
 
				+	struct scatterlist *src;
			
 
				+	struct scatterlist *dst;
			
 
				+
			
 
				+	unsigned int left;
			
 
				+
			
 
				+	struct skcipher_request subreq;
			
 
				+};
			
 
				+
			
 
				+static int setkey(struct crypto_skcipher *parent, const u8 *key,
			
 
				 		  unsigned int keylen)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_tfm_ctx(parent);
			
 
				-	struct crypto_cipher *child = ctx->tweak;
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(parent);
			
 
				+	struct crypto_skcipher *child;
			
 
				+	struct crypto_cipher *tweak;
			
 
				 	int err;
			
 
				 
			
 
				-	err = xts_check_key(parent, key, keylen);
			
 
				+	err = xts_verify_key(parent, key, keylen);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				+	keylen /= 2;
			
 
				+
			
 
				 	/* we need two cipher instances: one to compute the initial 'tweak'
			
 
				 	 * by encrypting the IV (usually the 'plain' iv) and the other
			
 
				 	 * one to encrypt and decrypt the data */
			
 
				 
			
 
				 	/* tweak cipher, uses Key2 i.e. the second half of *key */
			
 
				-	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
			
 
				+	tweak = ctx->tweak;
			
 
				+	crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) &
			
 
				 				       CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_cipher_setkey(child, key + keylen/2, keylen/2);
			
 
				+	err = crypto_cipher_setkey(tweak, key + keylen, keylen);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(tweak) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				-				     CRYPTO_TFM_RES_MASK);
			
 
				-
			
 
				+	/* data cipher, uses Key1 i.e. the first half of *key */
			
 
				 	child = ctx->child;
			
 
				+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
			
 
				+					 CRYPTO_TFM_REQ_MASK);
			
 
				+	err = crypto_skcipher_setkey(child, key, keylen);
			
 
				+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
			
 
				+					  CRYPTO_TFM_RES_MASK);
			
 
				 
			
 
				-	/* data cipher, uses Key1 i.e. the first half of *key */
			
 
				-	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
			
 
				-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
			
 
				-				       CRYPTO_TFM_REQ_MASK);
			
 
				-	err = crypto_cipher_setkey(child, key, keylen/2);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				+	return err;
			
 
				+}
			
 
				 
			
 
				-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
			
 
				-				     CRYPTO_TFM_RES_MASK);
			
 
				+static int post_crypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	be128 *buf = rctx->ext ?: rctx->buf;
			
 
				+	struct skcipher_request *subreq;
			
 
				+	const int bs = XTS_BLOCK_SIZE;
			
 
				+	struct skcipher_walk w;
			
 
				+	struct scatterlist *sg;
			
 
				+	unsigned offset;
			
 
				+	int err;
			
 
				 
			
 
				-	return 0;
			
 
				-}
			
 
				+	subreq = &rctx->subreq;
			
 
				+	err = skcipher_walk_virt(&w, subreq, false);
			
 
				 
			
 
				-struct sinfo {
			
 
				-	be128 *t;
			
 
				-	struct crypto_tfm *tfm;
			
 
				-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *);
			
 
				-};
			
 
				+	while (w.nbytes) {
			
 
				+		unsigned int avail = w.nbytes;
			
 
				+		be128 *wdst;
			
 
				 
			
 
				-static inline void xts_round(struct sinfo *s, void *dst, const void *src)
			
 
				-{
			
 
				-	be128_xor(dst, s->t, src);		/* PP <- T xor P */
			
 
				-	s->fn(s->tfm, dst, dst);		/* CC <- E(Key1,PP) */
			
 
				-	be128_xor(dst, dst, s->t);		/* C <- T xor CC */
			
 
				+		wdst = w.dst.virt.addr;
			
 
				+
			
 
				+		do {
			
 
				+			be128_xor(wdst, buf++, wdst);
			
 
				+			wdst++;
			
 
				+		} while ((avail -= bs) >= bs);
			
 
				+
			
 
				+		err = skcipher_walk_done(&w, avail);
			
 
				+	}
			
 
				+
			
 
				+	rctx->left -= subreq->cryptlen;
			
 
				+
			
 
				+	if (err || !rctx->left)
			
 
				+		goto out;
			
 
				+
			
 
				+	rctx->dst = rctx->dstbuf;
			
 
				+
			
 
				+	scatterwalk_done(&w.out, 0, 1);
			
 
				+	sg = w.out.sg;
			
 
				+	offset = w.out.offset;
			
 
				+
			
 
				+	if (rctx->dst != sg) {
			
 
				+		rctx->dst[0] = *sg;
			
 
				+		sg_unmark_end(rctx->dst);
			
 
				+		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
			
 
				+	}
			
 
				+	rctx->dst[0].length -= offset - sg->offset;
			
 
				+	rctx->dst[0].offset = offset;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				-static int crypt(struct blkcipher_desc *d,
			
 
				-		 struct blkcipher_walk *w, struct priv *ctx,
			
 
				-		 void (*tw)(struct crypto_tfm *, u8 *, const u8 *),
			
 
				-		 void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
			
 
				+static int pre_crypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	int err;
			
 
				-	unsigned int avail;
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	be128 *buf = rctx->ext ?: rctx->buf;
			
 
				+	struct skcipher_request *subreq;
			
 
				 	const int bs = XTS_BLOCK_SIZE;
			
 
				-	struct sinfo s = {
			
 
				-		.tfm = crypto_cipher_tfm(ctx->child),
			
 
				-		.fn = fn
			
 
				-	};
			
 
				-	u8 *wsrc;
			
 
				-	u8 *wdst;
			
 
				-
			
 
				-	err = blkcipher_walk_virt(d, w);
			
 
				-	if (!w->nbytes)
			
 
				-		return err;
			
 
				+	struct skcipher_walk w;
			
 
				+	struct scatterlist *sg;
			
 
				+	unsigned cryptlen;
			
 
				+	unsigned offset;
			
 
				+	bool more;
			
 
				+	int err;
			
 
				 
			
 
				-	s.t = (be128 *)w->iv;
			
 
				-	avail = w->nbytes;
			
 
				+	subreq = &rctx->subreq;
			
 
				+	cryptlen = subreq->cryptlen;
			
 
				 
			
 
				-	wsrc = w->src.virt.addr;
			
 
				-	wdst = w->dst.virt.addr;
			
 
				+	more = rctx->left > cryptlen;
			
 
				+	if (!more)
			
 
				+		cryptlen = rctx->left;
			
 
				 
			
 
				-	/* calculate first value of T */
			
 
				-	tw(crypto_cipher_tfm(ctx->tweak), w->iv, w->iv);
			
 
				+	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
			
 
				+				   cryptlen, NULL);
			
 
				 
			
 
				-	goto first;
			
 
				+	err = skcipher_walk_virt(&w, subreq, false);
			
 
				 
			
 
				-	for (;;) {
			
 
				-		do {
			
 
				-			gf128mul_x_ble(s.t, s.t);
			
 
				+	while (w.nbytes) {
			
 
				+		unsigned int avail = w.nbytes;
			
 
				+		be128 *wsrc;
			
 
				+		be128 *wdst;
			
 
				 
			
 
				-first:
			
 
				-			xts_round(&s, wdst, wsrc);
			
 
				+		wsrc = w.src.virt.addr;
			
 
				+		wdst = w.dst.virt.addr;
			
 
				 
			
 
				-			wsrc += bs;
			
 
				-			wdst += bs;
			
 
				+		do {
			
 
				+			*buf++ = rctx->t;
			
 
				+			be128_xor(wdst++, &rctx->t, wsrc++);
			
 
				+			gf128mul_x_ble(&rctx->t, &rctx->t);
			
 
				 		} while ((avail -= bs) >= bs);
			
 
				 
			
 
				-		err = blkcipher_walk_done(d, w, avail);
			
 
				-		if (!w->nbytes)
			
 
				-			break;
			
 
				+		err = skcipher_walk_done(&w, avail);
			
 
				+	}
			
 
				+
			
 
				+	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
			
 
				+				   cryptlen, NULL);
			
 
				 
			
 
				-		avail = w->nbytes;
			
 
				+	if (err || !more)
			
 
				+		goto out;
			
 
				 
			
 
				-		wsrc = w->src.virt.addr;
			
 
				-		wdst = w->dst.virt.addr;
			
 
				+	rctx->src = rctx->srcbuf;
			
 
				+
			
 
				+	scatterwalk_done(&w.in, 0, 1);
			
 
				+	sg = w.in.sg;
			
 
				+	offset = w.in.offset;
			
 
				+
			
 
				+	if (rctx->src != sg) {
			
 
				+		rctx->src[0] = *sg;
			
 
				+		sg_unmark_end(rctx->src);
			
 
				+		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
			
 
				 	}
			
 
				+	rctx->src[0].length -= offset - sg->offset;
			
 
				+	rctx->src[0].offset = offset;
			
 
				 
			
 
				+out:
			
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		   struct scatterlist *src, unsigned int nbytes)
			
 
				+static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk w;
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq;
			
 
				+	gfp_t gfp;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+	skcipher_request_set_tfm(subreq, ctx->child);
			
 
				+	skcipher_request_set_callback(subreq, req->base.flags, done, req);
			
 
				+
			
 
				+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
			
 
				+							   GFP_ATOMIC;
			
 
				+	rctx->ext = NULL;
			
 
				+
			
 
				+	subreq->cryptlen = XTS_BUFFER_SIZE;
			
 
				+	if (req->cryptlen > XTS_BUFFER_SIZE) {
			
 
				+		subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
			
 
				+		rctx->ext = kmalloc(subreq->cryptlen, gfp);
			
 
				+	}
			
 
				+
			
 
				+	rctx->src = req->src;
			
 
				+	rctx->dst = req->dst;
			
 
				+	rctx->left = req->cryptlen;
			
 
				 
			
 
				-	blkcipher_walk_init(&w, dst, src, nbytes);
			
 
				-	return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt,
			
 
				-		     crypto_cipher_alg(ctx->child)->cia_encrypt);
			
 
				+	/* calculate first value of T */
			
 
				+	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
			
 
				-		   struct scatterlist *src, unsigned int nbytes)
			
 
				+static void exit_crypt(struct skcipher_request *req)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
			
 
				-	struct blkcipher_walk w;
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+
			
 
				+	rctx->left = 0;
			
 
				 
			
 
				-	blkcipher_walk_init(&w, dst, src, nbytes);
			
 
				-	return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt,
			
 
				-		     crypto_cipher_alg(ctx->child)->cia_decrypt);
			
 
				+	if (rctx->ext)
			
 
				+		kzfree(rctx->ext);
			
 
				+}
			
 
				+
			
 
				+static int do_encrypt(struct skcipher_request *req, int err)
			
 
				+{
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+
			
 
				+	while (!err && rctx->left) {
			
 
				+		err = pre_crypt(req) ?:
			
 
				+		      crypto_skcipher_encrypt(subreq) ?:
			
 
				+		      post_crypt(req);
			
 
				+
			
 
				+		if (err == -EINPROGRESS ||
			
 
				+		    (err == -EBUSY &&
			
 
				+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	exit_crypt(req);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void encrypt_done(struct crypto_async_request *areq, int err)
			
 
				+{
			
 
				+	struct skcipher_request *req = areq->data;
			
 
				+	struct skcipher_request *subreq;
			
 
				+	struct rctx *rctx;
			
 
				+
			
 
				+	rctx = skcipher_request_ctx(req);
			
 
				+	subreq = &rctx->subreq;
			
 
				+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
			
 
				+
			
 
				+	err = do_encrypt(req, err ?: post_crypt(req));
			
 
				+	if (rctx->left)
			
 
				+		return;
			
 
				+
			
 
				+	skcipher_request_complete(req, err);
			
 
				+}
			
 
				+
			
 
				+static int encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return do_encrypt(req, init_crypt(req, encrypt_done));
			
 
				+}
			
 
				+
			
 
				+static int do_decrypt(struct skcipher_request *req, int err)
			
 
				+{
			
 
				+	struct rctx *rctx = skcipher_request_ctx(req);
			
 
				+	struct skcipher_request *subreq;
			
 
				+
			
 
				+	subreq = &rctx->subreq;
			
 
				+
			
 
				+	while (!err && rctx->left) {
			
 
				+		err = pre_crypt(req) ?:
			
 
				+		      crypto_skcipher_decrypt(subreq) ?:
			
 
				+		      post_crypt(req);
			
 
				+
			
 
				+		if (err == -EINPROGRESS ||
			
 
				+		    (err == -EBUSY &&
			
 
				+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	exit_crypt(req);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void decrypt_done(struct crypto_async_request *areq, int err)
			
 
				+{
			
 
				+	struct skcipher_request *req = areq->data;
			
 
				+	struct skcipher_request *subreq;
			
 
				+	struct rctx *rctx;
			
 
				+
			
 
				+	rctx = skcipher_request_ctx(req);
			
 
				+	subreq = &rctx->subreq;
			
 
				+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
			
 
				+
			
 
				+	err = do_decrypt(req, err ?: post_crypt(req));
			
 
				+	if (rctx->left)
			
 
				+		return;
			
 
				+
			
 
				+	skcipher_request_complete(req, err);
			
 
				+}
			
 
				+
			
 
				+static int decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return do_decrypt(req, init_crypt(req, decrypt_done));
			
 
				 }
			
 
				 
			
 
				 int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
			
@@ -233,112 +414,168 @@ int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(xts_crypt);
			
 
				 
			
 
				-static int init_tfm(struct crypto_tfm *tfm)
			
 
				+static int init_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct crypto_cipher *cipher;
			
 
				-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
			
 
				-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
			
 
				-	struct priv *ctx = crypto_tfm_ctx(tfm);
			
 
				-	u32 *flags = &tfm->crt_flags;
			
 
				-
			
 
				-	cipher = crypto_spawn_cipher(spawn);
			
 
				-	if (IS_ERR(cipher))
			
 
				-		return PTR_ERR(cipher);
			
 
				-
			
 
				-	if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) {
			
 
				-		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
			
 
				-		crypto_free_cipher(cipher);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
			
 
				+	struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst);
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_skcipher *child;
			
 
				+	struct crypto_cipher *tweak;
			
 
				 
			
 
				-	ctx->child = cipher;
			
 
				+	child = crypto_spawn_skcipher(&ictx->spawn);
			
 
				+	if (IS_ERR(child))
			
 
				+		return PTR_ERR(child);
			
 
				 
			
 
				-	cipher = crypto_spawn_cipher(spawn);
			
 
				-	if (IS_ERR(cipher)) {
			
 
				-		crypto_free_cipher(ctx->child);
			
 
				-		return PTR_ERR(cipher);
			
 
				-	}
			
 
				+	ctx->child = child;
			
 
				 
			
 
				-	/* this check isn't really needed, leave it here just in case */
			
 
				-	if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) {
			
 
				-		crypto_free_cipher(cipher);
			
 
				-		crypto_free_cipher(ctx->child);
			
 
				-		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
			
 
				-		return -EINVAL;
			
 
				+	tweak = crypto_alloc_cipher(ictx->name, 0, 0);
			
 
				+	if (IS_ERR(tweak)) {
			
 
				+		crypto_free_skcipher(ctx->child);
			
 
				+		return PTR_ERR(tweak);
			
 
				 	}
			
 
				 
			
 
				-	ctx->tweak = cipher;
			
 
				+	ctx->tweak = tweak;
			
 
				+
			
 
				+	crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) +
			
 
				+					 sizeof(struct rctx));
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void exit_tfm(struct crypto_tfm *tfm)
			
 
				+static void exit_tfm(struct crypto_skcipher *tfm)
			
 
				 {
			
 
				-	struct priv *ctx = crypto_tfm_ctx(tfm);
			
 
				-	crypto_free_cipher(ctx->child);
			
 
				+	struct priv *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	crypto_free_skcipher(ctx->child);
			
 
				 	crypto_free_cipher(ctx->tweak);
			
 
				 }
			
 
				 
			
 
				-static struct crypto_instance *alloc(struct rtattr **tb)
			
 
				+static void free(struct skcipher_instance *inst)
			
 
				+{
			
 
				+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
			
 
				+	kfree(inst);
			
 
				+}
			
 
				+
			
 
				+static int create(struct crypto_template *tmpl, struct rtattr **tb)
			
 
				 {
			
 
				-	struct crypto_instance *inst;
			
 
				-	struct crypto_alg *alg;
			
 
				+	struct skcipher_instance *inst;
			
 
				+	struct crypto_attr_type *algt;
			
 
				+	struct xts_instance_ctx *ctx;
			
 
				+	struct skcipher_alg *alg;
			
 
				+	const char *cipher_name;
			
 
				 	int err;
			
 
				 
			
 
				-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
			
 
				+	algt = crypto_get_attr_type(tb);
			
 
				+	if (IS_ERR(algt))
			
 
				+		return PTR_ERR(algt);
			
 
				+
			
 
				+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	cipher_name = crypto_attr_alg_name(tb[1]);
			
 
				+	if (IS_ERR(cipher_name))
			
 
				+		return PTR_ERR(cipher_name);
			
 
				+
			
 
				+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
			
 
				+	if (!inst)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	ctx = skcipher_instance_ctx(inst);
			
 
				+
			
 
				+	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
			
 
				+	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0,
			
 
				+				   crypto_requires_sync(algt->type,
			
 
				+							algt->mask));
			
 
				+	if (err == -ENOENT) {
			
 
				+		err = -ENAMETOOLONG;
			
 
				+		if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
			
 
				+			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
			
 
				+			goto err_free_inst;
			
 
				+
			
 
				+		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0,
			
 
				+					   crypto_requires_sync(algt->type,
			
 
				+								algt->mask));
			
 
				+	}
			
 
				+
			
 
				 	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				+		goto err_free_inst;
			
 
				 
			
 
				-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
			
 
				-				  CRYPTO_ALG_TYPE_MASK);
			
 
				-	if (IS_ERR(alg))
			
 
				-		return ERR_CAST(alg);
			
 
				+	alg = crypto_skcipher_spawn_alg(&ctx->spawn);
			
 
				 
			
 
				-	inst = crypto_alloc_instance("xts", alg);
			
 
				-	if (IS_ERR(inst))
			
 
				-		goto out_put_alg;
			
 
				+	err = -EINVAL;
			
 
				+	if (alg->base.cra_blocksize != XTS_BLOCK_SIZE)
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
			
 
				-	inst->alg.cra_priority = alg->cra_priority;
			
 
				-	inst->alg.cra_blocksize = alg->cra_blocksize;
			
 
				+	if (crypto_skcipher_alg_ivsize(alg))
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	if (alg->cra_alignmask < 7)
			
 
				-		inst->alg.cra_alignmask = 7;
			
 
				-	else
			
 
				-		inst->alg.cra_alignmask = alg->cra_alignmask;
			
 
				+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts",
			
 
				+				  &alg->base);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_type = &crypto_blkcipher_type;
			
 
				+	err = -EINVAL;
			
 
				+	cipher_name = alg->base.cra_name;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
			
 
				-	inst->alg.cra_blkcipher.min_keysize =
			
 
				-		2 * alg->cra_cipher.cia_min_keysize;
			
 
				-	inst->alg.cra_blkcipher.max_keysize =
			
 
				-		2 * alg->cra_cipher.cia_max_keysize;
			
 
				+	/* Alas we screwed up the naming so we have to mangle the
			
 
				+	 * cipher name.
			
 
				+	 */
			
 
				+	if (!strncmp(cipher_name, "ecb(", 4)) {
			
 
				+		unsigned len;
			
 
				 
			
 
				-	inst->alg.cra_ctxsize = sizeof(struct priv);
			
 
				+		len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
			
 
				+		if (len < 2 || len >= sizeof(ctx->name))
			
 
				+			goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_init = init_tfm;
			
 
				-	inst->alg.cra_exit = exit_tfm;
			
 
				+		if (ctx->name[len - 1] != ')')
			
 
				+			goto err_drop_spawn;
			
 
				 
			
 
				-	inst->alg.cra_blkcipher.setkey = setkey;
			
 
				-	inst->alg.cra_blkcipher.encrypt = encrypt;
			
 
				-	inst->alg.cra_blkcipher.decrypt = decrypt;
			
 
				+		ctx->name[len - 1] = 0;
			
 
				 
			
 
				-out_put_alg:
			
 
				-	crypto_mod_put(alg);
			
 
				-	return inst;
			
 
				-}
			
 
				+		if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
			
 
				+			     "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME)
			
 
				+			return -ENAMETOOLONG;
			
 
				+	} else
			
 
				+		goto err_drop_spawn;
			
 
				 
			
 
				-static void free(struct crypto_instance *inst)
			
 
				-{
			
 
				-	crypto_drop_spawn(crypto_instance_ctx(inst));
			
 
				+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
			
 
				+	inst->alg.base.cra_priority = alg->base.cra_priority;
			
 
				+	inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE;
			
 
				+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
			
 
				+				       (__alignof__(u64) - 1);
			
 
				+
			
 
				+	inst->alg.ivsize = XTS_BLOCK_SIZE;
			
 
				+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2;
			
 
				+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2;
			
 
				+
			
 
				+	inst->alg.base.cra_ctxsize = sizeof(struct priv);
			
 
				+
			
 
				+	inst->alg.init = init_tfm;
			
 
				+	inst->alg.exit = exit_tfm;
			
 
				+
			
 
				+	inst->alg.setkey = setkey;
			
 
				+	inst->alg.encrypt = encrypt;
			
 
				+	inst->alg.decrypt = decrypt;
			
 
				+
			
 
				+	inst->free = free;
			
 
				+
			
 
				+	err = skcipher_register_instance(tmpl, inst);
			
 
				+	if (err)
			
 
				+		goto err_drop_spawn;
			
 
				+
			
 
				+out:
			
 
				+	return err;
			
 
				+
			
 
				+err_drop_spawn:
			
 
				+	crypto_drop_skcipher(&ctx->spawn);
			
 
				+err_free_inst:
			
 
				 	kfree(inst);
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 static struct crypto_template crypto_tmpl = {
			
 
				 	.name = "xts",
			
 
				-	.alloc = alloc,
			
 
				-	.free = free,
			
 
				+	.create = create,
			
 
				 	.module = THIS_MODULE,
			
 
				 };
			
 
				 
			
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -168,7 +168,7 @@ config HW_RANDOM_IXP4XX
 
				 
			
 
				 config HW_RANDOM_OMAP
			
 
				 	tristate "OMAP Random Number Generator support"
			
 
				-	depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS
			
 
				+	depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU
			
 
				 	default HW_RANDOM
			
 
				  	---help---
			
 
				  	  This driver provides kernel-side support for the Random Number
			
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -48,6 +48,16 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
 
				 		return 0;
			
 
				 }
			
 
				 
			
 
				+static void atmel_trng_enable(struct atmel_trng *trng)
			
 
				+{
			
 
				+	writel(TRNG_KEY | 1, trng->base + TRNG_CR);
			
 
				+}
			
 
				+
			
 
				+static void atmel_trng_disable(struct atmel_trng *trng)
			
 
				+{
			
 
				+	writel(TRNG_KEY, trng->base + TRNG_CR);
			
 
				+}
			
 
				+
			
 
				 static int atmel_trng_probe(struct platform_device *pdev)
			
 
				 {
			
 
				 	struct atmel_trng *trng;
			
@@ -71,7 +81,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	writel(TRNG_KEY | 1, trng->base + TRNG_CR);
			
 
				+	atmel_trng_enable(trng);
			
 
				 	trng->rng.name = pdev->name;
			
 
				 	trng->rng.read = atmel_trng_read;
			
 
				 
			
@@ -84,7 +94,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
 
				 	return 0;
			
 
				 
			
 
				 err_register:
			
 
				-	clk_disable(trng->clk);
			
 
				+	clk_disable_unprepare(trng->clk);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -94,7 +104,7 @@ static int atmel_trng_remove(struct platform_device *pdev)
 
				 
			
 
				 	hwrng_unregister(&trng->rng);
			
 
				 
			
 
				-	writel(TRNG_KEY, trng->base + TRNG_CR);
			
 
				+	atmel_trng_disable(trng);
			
 
				 	clk_disable_unprepare(trng->clk);
			
 
				 
			
 
				 	return 0;
			
@@ -105,6 +115,7 @@ static int atmel_trng_suspend(struct device *dev)
 
				 {
			
 
				 	struct atmel_trng *trng = dev_get_drvdata(dev);
			
 
				 
			
 
				+	atmel_trng_disable(trng);
			
 
				 	clk_disable_unprepare(trng->clk);
			
 
				 
			
 
				 	return 0;
			
@@ -113,8 +124,15 @@ static int atmel_trng_suspend(struct device *dev)
 
				 static int atmel_trng_resume(struct device *dev)
			
 
				 {
			
 
				 	struct atmel_trng *trng = dev_get_drvdata(dev);
			
 
				+	int ret;
			
 
				 
			
 
				-	return clk_prepare_enable(trng->clk);
			
 
				+	ret = clk_prepare_enable(trng->clk);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	atmel_trng_enable(trng);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static const struct dev_pm_ops atmel_trng_pm_ops = {
			
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -92,6 +92,7 @@ static void add_early_randomness(struct hwrng *rng)
 
				 	mutex_unlock(&reading_mutex);
			
 
				 	if (bytes_read > 0)
			
 
				 		add_device_randomness(rng_buffer, bytes_read);
			
 
				+	memset(rng_buffer, 0, size);
			
 
				 }
			
 
				 
			
 
				 static inline void cleanup_rng(struct kref *kref)
			
@@ -287,6 +288,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
 
				 		}
			
 
				 	}
			
 
				 out:
			
 
				+	memset(rng_buffer, 0, rng_buffer_size());
			
 
				 	return ret ? : err;
			
 
				 
			
 
				 out_unlock_reading:
			
@@ -425,6 +427,7 @@ static int hwrng_fillfn(void *unused)
 
				 		/* Outside lock, sure, but y'know: randomness. */
			
 
				 		add_hwgenerator_randomness((void *)rng_fillbuf, rc,
			
 
				 					   rc * current_quality * 8 >> 10);
			
 
				+		memset(rng_fillbuf, 0, rng_buffer_size());
			
 
				 	}
			
 
				 	hwrng_fill = NULL;
			
 
				 	return 0;
			
--- a/drivers/char/hw_random/meson-rng.c
+++ b/drivers/char/hw_random/meson-rng.c
@@ -110,6 +110,7 @@ static const struct of_device_id meson_rng_of_match[] = {
 
				 	{ .compatible = "amlogic,meson-rng", },
			
 
				 	{},
			
 
				 };
			
 
				+MODULE_DEVICE_TABLE(of, meson_rng_of_match);
			
 
				 
			
 
				 static struct platform_driver meson_rng_driver = {
			
 
				 	.probe	= meson_rng_probe,
			
@@ -121,7 +122,6 @@ static struct platform_driver meson_rng_driver = {
 
				 
			
 
				 module_platform_driver(meson_rng_driver);
			
 
				 
			
 
				-MODULE_ALIAS("platform:meson-rng");
			
 
				 MODULE_DESCRIPTION("Meson H/W Random Number Generator driver");
			
 
				 MODULE_AUTHOR("Lawrence Mok <lawrence.mok@amlogic.com>");
			
 
				 MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
			
--- a/drivers/char/hw_random/msm-rng.c
+++ b/drivers/char/hw_random/msm-rng.c
@@ -90,10 +90,6 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
 
				 	/* calculate max size bytes to transfer back to caller */
			
 
				 	maxsize = min_t(size_t, MAX_HW_FIFO_SIZE, max);
			
 
				 
			
 
				-	/* no room for word data */
			
 
				-	if (maxsize < WORD_SZ)
			
 
				-		return 0;
			
 
				-
			
 
				 	ret = clk_prepare_enable(rng->clk);
			
 
				 	if (ret)
			
 
				 		return ret;
			
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -28,6 +28,7 @@
 
				 #include <linux/of_device.h>
			
 
				 #include <linux/of_address.h>
			
 
				 #include <linux/interrupt.h>
			
 
				+#include <linux/clk.h>
			
 
				 
			
 
				 #include <asm/io.h>
			
 
				 
			
@@ -63,10 +64,13 @@
 
				 
			
 
				 #define OMAP2_RNG_OUTPUT_SIZE			0x4
			
 
				 #define OMAP4_RNG_OUTPUT_SIZE			0x8
			
 
				+#define EIP76_RNG_OUTPUT_SIZE			0x10
			
 
				 
			
 
				 enum {
			
 
				-	RNG_OUTPUT_L_REG = 0,
			
 
				-	RNG_OUTPUT_H_REG,
			
 
				+	RNG_OUTPUT_0_REG = 0,
			
 
				+	RNG_OUTPUT_1_REG,
			
 
				+	RNG_OUTPUT_2_REG,
			
 
				+	RNG_OUTPUT_3_REG,
			
 
				 	RNG_STATUS_REG,
			
 
				 	RNG_INTMASK_REG,
			
 
				 	RNG_INTACK_REG,
			
@@ -82,7 +86,7 @@ enum {
 
				 };
			
 
				 
			
 
				 static const u16 reg_map_omap2[] = {
			
 
				-	[RNG_OUTPUT_L_REG]	= 0x0,
			
 
				+	[RNG_OUTPUT_0_REG]	= 0x0,
			
 
				 	[RNG_STATUS_REG]	= 0x4,
			
 
				 	[RNG_CONFIG_REG]	= 0x28,
			
 
				 	[RNG_REV_REG]		= 0x3c,
			
@@ -90,8 +94,8 @@ static const u16 reg_map_omap2[] = {
 
				 };
			
 
				 
			
 
				 static const u16 reg_map_omap4[] = {
			
 
				-	[RNG_OUTPUT_L_REG]	= 0x0,
			
 
				-	[RNG_OUTPUT_H_REG]	= 0x4,
			
 
				+	[RNG_OUTPUT_0_REG]	= 0x0,
			
 
				+	[RNG_OUTPUT_1_REG]	= 0x4,
			
 
				 	[RNG_STATUS_REG]	= 0x8,
			
 
				 	[RNG_INTMASK_REG]	= 0xc,
			
 
				 	[RNG_INTACK_REG]	= 0x10,
			
@@ -106,6 +110,23 @@ static const u16 reg_map_omap4[] = {
 
				 	[RNG_SYSCONFIG_REG]	= 0x1FE4,
			
 
				 };
			
 
				 
			
 
				+static const u16 reg_map_eip76[] = {
			
 
				+	[RNG_OUTPUT_0_REG]	= 0x0,
			
 
				+	[RNG_OUTPUT_1_REG]	= 0x4,
			
 
				+	[RNG_OUTPUT_2_REG]	= 0x8,
			
 
				+	[RNG_OUTPUT_3_REG]	= 0xc,
			
 
				+	[RNG_STATUS_REG]	= 0x10,
			
 
				+	[RNG_INTACK_REG]	= 0x10,
			
 
				+	[RNG_CONTROL_REG]	= 0x14,
			
 
				+	[RNG_CONFIG_REG]	= 0x18,
			
 
				+	[RNG_ALARMCNT_REG]	= 0x1c,
			
 
				+	[RNG_FROENABLE_REG]	= 0x20,
			
 
				+	[RNG_FRODETUNE_REG]	= 0x24,
			
 
				+	[RNG_ALARMMASK_REG]	= 0x28,
			
 
				+	[RNG_ALARMSTOP_REG]	= 0x2c,
			
 
				+	[RNG_REV_REG]		= 0x7c,
			
 
				+};
			
 
				+
			
 
				 struct omap_rng_dev;
			
 
				 /**
			
 
				  * struct omap_rng_pdata - RNG IP block-specific data
			
@@ -127,6 +148,8 @@ struct omap_rng_dev {
 
				 	void __iomem			*base;
			
 
				 	struct device			*dev;
			
 
				 	const struct omap_rng_pdata	*pdata;
			
 
				+	struct hwrng rng;
			
 
				+	struct clk 			*clk;
			
 
				 };
			
 
				 
			
 
				 static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg)
			
@@ -140,41 +163,35 @@ static inline void omap_rng_write(struct omap_rng_dev *priv, u16 reg,
 
				 	__raw_writel(val, priv->base + priv->pdata->regs[reg]);
			
 
				 }
			
 
				 
			
 
				-static int omap_rng_data_present(struct hwrng *rng, int wait)
			
 
				+
			
 
				+static int omap_rng_do_read(struct hwrng *rng, void *data, size_t max,
			
 
				+			    bool wait)
			
 
				 {
			
 
				 	struct omap_rng_dev *priv;
			
 
				-	int data, i;
			
 
				+	int i, present;
			
 
				 
			
 
				 	priv = (struct omap_rng_dev *)rng->priv;
			
 
				 
			
 
				+	if (max < priv->pdata->data_size)
			
 
				+		return 0;
			
 
				+
			
 
				 	for (i = 0; i < 20; i++) {
			
 
				-		data = priv->pdata->data_present(priv);
			
 
				-		if (data || !wait)
			
 
				+		present = priv->pdata->data_present(priv);
			
 
				+		if (present || !wait)
			
 
				 			break;
			
 
				-		/* RNG produces data fast enough (2+ MBit/sec, even
			
 
				-		 * during "rngtest" loads, that these delays don't
			
 
				-		 * seem to trigger.  We *could* use the RNG IRQ, but
			
 
				-		 * that'd be higher overhead ... so why bother?
			
 
				-		 */
			
 
				+
			
 
				 		udelay(10);
			
 
				 	}
			
 
				-	return data;
			
 
				-}
			
 
				-
			
 
				-static int omap_rng_data_read(struct hwrng *rng, u32 *data)
			
 
				-{
			
 
				-	struct omap_rng_dev *priv;
			
 
				-	u32 data_size, i;
			
 
				-
			
 
				-	priv = (struct omap_rng_dev *)rng->priv;
			
 
				-	data_size = priv->pdata->data_size;
			
 
				+	if (!present)
			
 
				+		return 0;
			
 
				 
			
 
				-	for (i = 0; i < data_size / sizeof(u32); i++)
			
 
				-		data[i] = omap_rng_read(priv, RNG_OUTPUT_L_REG + i);
			
 
				+	memcpy_fromio(data, priv->base + priv->pdata->regs[RNG_OUTPUT_0_REG],
			
 
				+		      priv->pdata->data_size);
			
 
				 
			
 
				 	if (priv->pdata->regs[RNG_INTACK_REG])
			
 
				 		omap_rng_write(priv, RNG_INTACK_REG, RNG_REG_INTACK_RDY_MASK);
			
 
				-	return data_size;
			
 
				+
			
 
				+	return priv->pdata->data_size;
			
 
				 }
			
 
				 
			
 
				 static int omap_rng_init(struct hwrng *rng)
			
@@ -193,13 +210,6 @@ static void omap_rng_cleanup(struct hwrng *rng)
 
				 	priv->pdata->cleanup(priv);
			
 
				 }
			
 
				 
			
 
				-static struct hwrng omap_rng_ops = {
			
 
				-	.name		= "omap",
			
 
				-	.data_present	= omap_rng_data_present,
			
 
				-	.data_read	= omap_rng_data_read,
			
 
				-	.init		= omap_rng_init,
			
 
				-	.cleanup	= omap_rng_cleanup,
			
 
				-};
			
 
				 
			
 
				 static inline u32 omap2_rng_data_present(struct omap_rng_dev *priv)
			
 
				 {
			
@@ -231,6 +241,38 @@ static inline u32 omap4_rng_data_present(struct omap_rng_dev *priv)
 
				 	return omap_rng_read(priv, RNG_STATUS_REG) & RNG_REG_STATUS_RDY;
			
 
				 }
			
 
				 
			
 
				+static int eip76_rng_init(struct omap_rng_dev *priv)
			
 
				+{
			
 
				+	u32 val;
			
 
				+
			
 
				+	/* Return if RNG is already running. */
			
 
				+	if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*  Number of 512 bit blocks of raw Noise Source output data that must
			
 
				+	 *  be processed by either the Conditioning Function or the
			
 
				+	 *  SP 800-90 DRBG ‘BC_DF’ functionality to yield a ‘full entropy’
			
 
				+	 *  output value.
			
 
				+	 */
			
 
				+	val = 0x5 << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT;
			
 
				+
			
 
				+	/* Number of FRO samples that are XOR-ed together into one bit to be
			
 
				+	 * shifted into the main shift register
			
 
				+	 */
			
 
				+	val |= RNG_CONFIG_MAX_REFIL_CYCLES << RNG_CONFIG_MAX_REFIL_CYCLES_SHIFT;
			
 
				+	omap_rng_write(priv, RNG_CONFIG_REG, val);
			
 
				+
			
 
				+	/* Enable all available FROs */
			
 
				+	omap_rng_write(priv, RNG_FRODETUNE_REG, 0x0);
			
 
				+	omap_rng_write(priv, RNG_FROENABLE_REG, RNG_REG_FROENABLE_MASK);
			
 
				+
			
 
				+	/* Enable TRNG */
			
 
				+	val = RNG_CONTROL_ENABLE_TRNG_MASK;
			
 
				+	omap_rng_write(priv, RNG_CONTROL_REG, val);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int omap4_rng_init(struct omap_rng_dev *priv)
			
 
				 {
			
 
				 	u32 val;
			
@@ -300,6 +342,14 @@ static struct omap_rng_pdata omap4_rng_pdata = {
 
				 	.cleanup	= omap4_rng_cleanup,
			
 
				 };
			
 
				 
			
 
				+static struct omap_rng_pdata eip76_rng_pdata = {
			
 
				+	.regs		= (u16 *)reg_map_eip76,
			
 
				+	.data_size	= EIP76_RNG_OUTPUT_SIZE,
			
 
				+	.data_present	= omap4_rng_data_present,
			
 
				+	.init		= eip76_rng_init,
			
 
				+	.cleanup	= omap4_rng_cleanup,
			
 
				+};
			
 
				+
			
 
				 static const struct of_device_id omap_rng_of_match[] = {
			
 
				 		{
			
 
				 			.compatible	= "ti,omap2-rng",
			
@@ -309,6 +359,10 @@ static const struct of_device_id omap_rng_of_match[] = {
 
				 			.compatible	= "ti,omap4-rng",
			
 
				 			.data		= &omap4_rng_pdata,
			
 
				 		},
			
 
				+		{
			
 
				+			.compatible	= "inside-secure,safexcel-eip76",
			
 
				+			.data		= &eip76_rng_pdata,
			
 
				+		},
			
 
				 		{},
			
 
				 };
			
 
				 MODULE_DEVICE_TABLE(of, omap_rng_of_match);
			
@@ -327,7 +381,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 
				 	}
			
 
				 	priv->pdata = match->data;
			
 
				 
			
 
				-	if (of_device_is_compatible(dev->of_node, "ti,omap4-rng")) {
			
 
				+	if (of_device_is_compatible(dev->of_node, "ti,omap4-rng") ||
			
 
				+	    of_device_is_compatible(dev->of_node, "inside-secure,safexcel-eip76")) {
			
 
				 		irq = platform_get_irq(pdev, 0);
			
 
				 		if (irq < 0) {
			
 
				 			dev_err(dev, "%s: error getting IRQ resource - %d\n",
			
@@ -343,6 +398,16 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 
				 			return err;
			
 
				 		}
			
 
				 		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
			
 
				+
			
 
				+		priv->clk = of_clk_get(pdev->dev.of_node, 0);
			
 
				+		if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
			
 
				+			return -EPROBE_DEFER;
			
 
				+		if (!IS_ERR(priv->clk)) {
			
 
				+			err = clk_prepare_enable(priv->clk);
			
 
				+			if (err)
			
 
				+				dev_err(&pdev->dev, "unable to enable the clk, "
			
 
				+						    "err = %d\n", err);
			
 
				+		}
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
@@ -372,7 +437,11 @@ static int omap_rng_probe(struct platform_device *pdev)
 
				 	if (!priv)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	omap_rng_ops.priv = (unsigned long)priv;
			
 
				+	priv->rng.read = omap_rng_do_read;
			
 
				+	priv->rng.init = omap_rng_init;
			
 
				+	priv->rng.cleanup = omap_rng_cleanup;
			
 
				+
			
 
				+	priv->rng.priv = (unsigned long)priv;
			
 
				 	platform_set_drvdata(pdev, priv);
			
 
				 	priv->dev = dev;
			
 
				 
			
@@ -383,6 +452,12 @@ static int omap_rng_probe(struct platform_device *pdev)
 
				 		goto err_ioremap;
			
 
				 	}
			
 
				 
			
 
				+	priv->rng.name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
			
 
				+	if (!priv->rng.name) {
			
 
				+		ret = -ENOMEM;
			
 
				+		goto err_ioremap;
			
 
				+	}
			
 
				+
			
 
				 	pm_runtime_enable(&pdev->dev);
			
 
				 	ret = pm_runtime_get_sync(&pdev->dev);
			
 
				 	if (ret < 0) {
			
@@ -394,20 +469,24 @@ static int omap_rng_probe(struct platform_device *pdev)
 
				 	ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
			
 
				 				get_omap_rng_device_details(priv);
			
 
				 	if (ret)
			
 
				-		goto err_ioremap;
			
 
				+		goto err_register;
			
 
				 
			
 
				-	ret = hwrng_register(&omap_rng_ops);
			
 
				+	ret = hwrng_register(&priv->rng);
			
 
				 	if (ret)
			
 
				 		goto err_register;
			
 
				 
			
 
				-	dev_info(&pdev->dev, "OMAP Random Number Generator ver. %02x\n",
			
 
				+	dev_info(&pdev->dev, "Random Number Generator ver. %02x\n",
			
 
				 		 omap_rng_read(priv, RNG_REV_REG));
			
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				 err_register:
			
 
				 	priv->base = NULL;
			
 
				+	pm_runtime_put_sync(&pdev->dev);
			
 
				 	pm_runtime_disable(&pdev->dev);
			
 
				+
			
 
				+	if (!IS_ERR(priv->clk))
			
 
				+		clk_disable_unprepare(priv->clk);
			
 
				 err_ioremap:
			
 
				 	dev_err(dev, "initialization failed.\n");
			
 
				 	return ret;
			
@@ -417,13 +496,16 @@ static int omap_rng_remove(struct platform_device *pdev)
 
				 {
			
 
				 	struct omap_rng_dev *priv = platform_get_drvdata(pdev);
			
 
				 
			
 
				-	hwrng_unregister(&omap_rng_ops);
			
 
				+	hwrng_unregister(&priv->rng);
			
 
				 
			
 
				 	priv->pdata->cleanup(priv);
			
 
				 
			
 
				 	pm_runtime_put_sync(&pdev->dev);
			
 
				 	pm_runtime_disable(&pdev->dev);
			
 
				 
			
 
				+	if (!IS_ERR(priv->clk))
			
 
				+		clk_disable_unprepare(priv->clk);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -62,9 +62,6 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
 
				 	u32 t;
			
 
				 	unsigned int timeout = RNG_TIMEOUT;
			
 
				 
			
 
				-	if (max < 8)
			
 
				-		return 0;
			
 
				-
			
 
				 	do {
			
 
				 		t = readl(priv->base + RNGRCNT) & RCNT_MASK;
			
 
				 		if (t == 64) {
			
--- a/drivers/char/hw_random/pseries-rng.c
+++ b/drivers/char/hw_random/pseries-rng.c
@@ -28,7 +28,6 @@
 
				 static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
			
 
				 {
			
 
				 	u64 buffer[PLPAR_HCALL_BUFSIZE];
			
 
				-	size_t size = max < 8 ? max : 8;
			
 
				 	int rc;
			
 
				 
			
 
				 	rc = plpar_hcall(H_RANDOM, (unsigned long *)buffer);
			
@@ -36,10 +35,10 @@ static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait
 
				 		pr_err_ratelimited("H_RANDOM call failed %d\n", rc);
			
 
				 		return -EIO;
			
 
				 	}
			
 
				-	memcpy(data, buffer, size);
			
 
				+	memcpy(data, buffer, 8);
			
 
				 
			
 
				 	/* The hypervisor interface returns 64 bits */
			
 
				-	return size;
			
 
				+	return 8;
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -135,8 +135,7 @@ int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
 
				 	ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4,
			
 
				 					 &ctx->sa_out_dma_addr, GFP_ATOMIC);
			
 
				 	if (ctx->sa_out == NULL) {
			
 
				-		dma_free_coherent(ctx->dev->core_dev->device,
			
 
				-				  ctx->sa_len * 4,
			
 
				+		dma_free_coherent(ctx->dev->core_dev->device, size * 4,
			
 
				 				  ctx->sa_in, ctx->sa_in_dma_addr);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
--- a/drivers/crypto/atmel-aes-regs.h
+++ b/drivers/crypto/atmel-aes-regs.h
@@ -28,6 +28,7 @@
 
				 #define AES_MR_OPMOD_CFB		(0x3 << 12)
			
 
				 #define AES_MR_OPMOD_CTR		(0x4 << 12)
			
 
				 #define AES_MR_OPMOD_GCM		(0x5 << 12)
			
 
				+#define AES_MR_OPMOD_XTS		(0x6 << 12)
			
 
				 #define AES_MR_LOD				(0x1 << 15)
			
 
				 #define AES_MR_CFBS_MASK		(0x7 << 16)
			
 
				 #define AES_MR_CFBS_128b		(0x0 << 16)
			
@@ -67,6 +68,9 @@
 
				 #define AES_CTRR	0x98
			
 
				 #define AES_GCMHR(x)	(0x9c + ((x) * 0x04))
			
 
				 
			
 
				+#define AES_TWR(x)	(0xc0 + ((x) * 0x04))
			
 
				+#define AES_ALPHAR(x)	(0xd0 + ((x) * 0x04))
			
 
				+
			
 
				 #define AES_HW_VERSION	0xFC
			
 
				 
			
 
				 #endif /* __ATMEL_AES_REGS_H__ */
			
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -36,6 +36,7 @@
 
				 #include <crypto/scatterwalk.h>
			
 
				 #include <crypto/algapi.h>
			
 
				 #include <crypto/aes.h>
			
 
				+#include <crypto/xts.h>
			
 
				 #include <crypto/internal/aead.h>
			
 
				 #include <linux/platform_data/crypto-atmel.h>
			
 
				 #include <dt-bindings/dma/at91.h>
			
@@ -68,6 +69,7 @@
 
				 #define AES_FLAGS_CFB8		(AES_MR_OPMOD_CFB | AES_MR_CFBS_8b)
			
 
				 #define AES_FLAGS_CTR		AES_MR_OPMOD_CTR
			
 
				 #define AES_FLAGS_GCM		AES_MR_OPMOD_GCM
			
 
				+#define AES_FLAGS_XTS		AES_MR_OPMOD_XTS
			
 
				 
			
 
				 #define AES_FLAGS_MODE_MASK	(AES_FLAGS_OPMODE_MASK |	\
			
 
				 				 AES_FLAGS_ENCRYPT |		\
			
@@ -89,6 +91,7 @@ struct atmel_aes_caps {
 
				 	bool			has_cfb64;
			
 
				 	bool			has_ctr32;
			
 
				 	bool			has_gcm;
			
 
				+	bool			has_xts;
			
 
				 	u32			max_burst_size;
			
 
				 };
			
 
				 
			
@@ -135,6 +138,12 @@ struct atmel_aes_gcm_ctx {
 
				 	atmel_aes_fn_t		ghash_resume;
			
 
				 };
			
 
				 
			
 
				+struct atmel_aes_xts_ctx {
			
 
				+	struct atmel_aes_base_ctx	base;
			
 
				+
			
 
				+	u32			key2[AES_KEYSIZE_256 / sizeof(u32)];
			
 
				+};
			
 
				+
			
 
				 struct atmel_aes_reqctx {
			
 
				 	unsigned long		mode;
			
 
				 };
			
@@ -282,6 +291,20 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
 
				 		snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
			
 
				 		break;
			
 
				 
			
 
				+	case AES_TWR(0):
			
 
				+	case AES_TWR(1):
			
 
				+	case AES_TWR(2):
			
 
				+	case AES_TWR(3):
			
 
				+		snprintf(tmp, sz, "TWR[%u]", (offset - AES_TWR(0)) >> 2);
			
 
				+		break;
			
 
				+
			
 
				+	case AES_ALPHAR(0):
			
 
				+	case AES_ALPHAR(1):
			
 
				+	case AES_ALPHAR(2):
			
 
				+	case AES_ALPHAR(3):
			
 
				+		snprintf(tmp, sz, "ALPHAR[%u]", (offset - AES_ALPHAR(0)) >> 2);
			
 
				+		break;
			
 
				+
			
 
				 	default:
			
 
				 		snprintf(tmp, sz, "0x%02x", offset);
			
 
				 		break;
			
@@ -317,7 +340,7 @@ static inline void atmel_aes_write(struct atmel_aes_dev *dd,
 
				 		char tmp[16];
			
 
				 
			
 
				 		dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
			
 
				-			 atmel_aes_reg_name(offset, tmp));
			
 
				+			 atmel_aes_reg_name(offset, tmp, sizeof(tmp)));
			
 
				 	}
			
 
				 #endif /* VERBOSE_DEBUG */
			
 
				 
			
@@ -453,15 +476,15 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
			
 
				-				 const u32 *iv)
			
 
				+static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma,
			
 
				+				     const u32 *iv, const u32 *key, int keylen)
			
 
				 {
			
 
				 	u32 valmr = 0;
			
 
				 
			
 
				 	/* MR register must be set before IV registers */
			
 
				-	if (dd->ctx->keylen == AES_KEYSIZE_128)
			
 
				+	if (keylen == AES_KEYSIZE_128)
			
 
				 		valmr |= AES_MR_KEYSIZE_128;
			
 
				-	else if (dd->ctx->keylen == AES_KEYSIZE_192)
			
 
				+	else if (keylen == AES_KEYSIZE_192)
			
 
				 		valmr |= AES_MR_KEYSIZE_192;
			
 
				 	else
			
 
				 		valmr |= AES_MR_KEYSIZE_256;
			
@@ -478,13 +501,19 @@ static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
 
				 
			
 
				 	atmel_aes_write(dd, AES_MR, valmr);
			
 
				 
			
 
				-	atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key,
			
 
				-			  SIZE_IN_WORDS(dd->ctx->keylen));
			
 
				+	atmel_aes_write_n(dd, AES_KEYWR(0), key, SIZE_IN_WORDS(keylen));
			
 
				 
			
 
				 	if (iv && (valmr & AES_MR_OPMOD_MASK) != AES_MR_OPMOD_ECB)
			
 
				 		atmel_aes_write_block(dd, AES_IVR(0), iv);
			
 
				 }
			
 
				 
			
 
				+static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
			
 
				+					const u32 *iv)
			
 
				+
			
 
				+{
			
 
				+	atmel_aes_write_ctrl_key(dd, use_dma, iv,
			
 
				+				 dd->ctx->key, dd->ctx->keylen);
			
 
				+}
			
 
				 
			
 
				 /* CPU transfer */
			
 
				 
			
@@ -1769,6 +1798,137 @@ static struct aead_alg aes_gcm_alg = {
 
				 };
			
 
				 
			
 
				 
			
 
				+/* xts functions */
			
 
				+
			
 
				+static inline struct atmel_aes_xts_ctx *
			
 
				+atmel_aes_xts_ctx_cast(struct atmel_aes_base_ctx *ctx)
			
 
				+{
			
 
				+	return container_of(ctx, struct atmel_aes_xts_ctx, base);
			
 
				+}
			
 
				+
			
 
				+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd);
			
 
				+
			
 
				+static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
			
 
				+{
			
 
				+	struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx);
			
 
				+	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
			
 
				+	struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
			
 
				+	unsigned long flags;
			
 
				+	int err;
			
 
				+
			
 
				+	atmel_aes_set_mode(dd, rctx);
			
 
				+
			
 
				+	err = atmel_aes_hw_init(dd);
			
 
				+	if (err)
			
 
				+		return atmel_aes_complete(dd, err);
			
 
				+
			
 
				+	/* Compute the tweak value from req->info with ecb(aes). */
			
 
				+	flags = dd->flags;
			
 
				+	dd->flags &= ~AES_FLAGS_MODE_MASK;
			
 
				+	dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
			
 
				+	atmel_aes_write_ctrl_key(dd, false, NULL,
			
 
				+				 ctx->key2, ctx->base.keylen);
			
 
				+	dd->flags = flags;
			
 
				+
			
 
				+	atmel_aes_write_block(dd, AES_IDATAR(0), req->info);
			
 
				+	return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data);
			
 
				+}
			
 
				+
			
 
				+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
			
 
				+{
			
 
				+	struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
			
 
				+	bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD);
			
 
				+	u32 tweak[AES_BLOCK_SIZE / sizeof(u32)];
			
 
				+	static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
			
 
				+	u8 *tweak_bytes = (u8 *)tweak;
			
 
				+	int i;
			
 
				+
			
 
				+	/* Read the computed ciphered tweak value. */
			
 
				+	atmel_aes_read_block(dd, AES_ODATAR(0), tweak);
			
 
				+	/*
			
 
				+	 * Hardware quirk:
			
 
				+	 * the order of the ciphered tweak bytes need to be reversed before
			
 
				+	 * writing them into the ODATARx registers.
			
 
				+	 */
			
 
				+	for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
			
 
				+		u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
			
 
				+
			
 
				+		tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
			
 
				+		tweak_bytes[i] = tmp;
			
 
				+	}
			
 
				+
			
 
				+	/* Process the data. */
			
 
				+	atmel_aes_write_ctrl(dd, use_dma, NULL);
			
 
				+	atmel_aes_write_block(dd, AES_TWR(0), tweak);
			
 
				+	atmel_aes_write_block(dd, AES_ALPHAR(0), one);
			
 
				+	if (use_dma)
			
 
				+		return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
			
 
				+					   atmel_aes_transfer_complete);
			
 
				+
			
 
				+	return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
			
 
				+				   atmel_aes_transfer_complete);
			
 
				+}
			
 
				+
			
 
				+static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
			
 
				+				unsigned int keylen)
			
 
				+{
			
 
				+	struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm);
			
 
				+	int err;
			
 
				+
			
 
				+	err = xts_check_key(crypto_ablkcipher_tfm(tfm), key, keylen);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	memcpy(ctx->base.key, key, keylen/2);
			
 
				+	memcpy(ctx->key2, key + keylen/2, keylen/2);
			
 
				+	ctx->base.keylen = keylen/2;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int atmel_aes_xts_encrypt(struct ablkcipher_request *req)
			
 
				+{
			
 
				+	return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT);
			
 
				+}
			
 
				+
			
 
				+static int atmel_aes_xts_decrypt(struct ablkcipher_request *req)
			
 
				+{
			
 
				+	return atmel_aes_crypt(req, AES_FLAGS_XTS);
			
 
				+}
			
 
				+
			
 
				+static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm)
			
 
				+{
			
 
				+	struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
			
 
				+
			
 
				+	tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
			
 
				+	ctx->base.start = atmel_aes_xts_start;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct crypto_alg aes_xts_alg = {
			
 
				+	.cra_name		= "xts(aes)",
			
 
				+	.cra_driver_name	= "atmel-xts-aes",
			
 
				+	.cra_priority		= ATMEL_AES_PRIORITY,
			
 
				+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
			
 
				+	.cra_blocksize		= AES_BLOCK_SIZE,
			
 
				+	.cra_ctxsize		= sizeof(struct atmel_aes_xts_ctx),
			
 
				+	.cra_alignmask		= 0xf,
			
 
				+	.cra_type		= &crypto_ablkcipher_type,
			
 
				+	.cra_module		= THIS_MODULE,
			
 
				+	.cra_init		= atmel_aes_xts_cra_init,
			
 
				+	.cra_exit		= atmel_aes_cra_exit,
			
 
				+	.cra_u.ablkcipher = {
			
 
				+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
			
 
				+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
			
 
				+		.ivsize		= AES_BLOCK_SIZE,
			
 
				+		.setkey		= atmel_aes_xts_setkey,
			
 
				+		.encrypt	= atmel_aes_xts_encrypt,
			
 
				+		.decrypt	= atmel_aes_xts_decrypt,
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				 /* Probe functions */
			
 
				 
			
 
				 static int atmel_aes_buff_init(struct atmel_aes_dev *dd)
			
@@ -1877,6 +2037,9 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				+	if (dd->caps.has_xts)
			
 
				+		crypto_unregister_alg(&aes_xts_alg);
			
 
				+
			
 
				 	if (dd->caps.has_gcm)
			
 
				 		crypto_unregister_aead(&aes_gcm_alg);
			
 
				 
			
@@ -1909,8 +2072,16 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
 
				 			goto err_aes_gcm_alg;
			
 
				 	}
			
 
				 
			
 
				+	if (dd->caps.has_xts) {
			
 
				+		err = crypto_register_alg(&aes_xts_alg);
			
 
				+		if (err)
			
 
				+			goto err_aes_xts_alg;
			
 
				+	}
			
 
				+
			
 
				 	return 0;
			
 
				 
			
 
				+err_aes_xts_alg:
			
 
				+	crypto_unregister_aead(&aes_gcm_alg);
			
 
				 err_aes_gcm_alg:
			
 
				 	crypto_unregister_alg(&aes_cfb64_alg);
			
 
				 err_aes_cfb64_alg:
			
@@ -1928,6 +2099,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 
				 	dd->caps.has_cfb64 = 0;
			
 
				 	dd->caps.has_ctr32 = 0;
			
 
				 	dd->caps.has_gcm = 0;
			
 
				+	dd->caps.has_xts = 0;
			
 
				 	dd->caps.max_burst_size = 1;
			
 
				 
			
 
				 	/* keep only major version number */
			
@@ -1937,6 +2109,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 
				 		dd->caps.has_cfb64 = 1;
			
 
				 		dd->caps.has_ctr32 = 1;
			
 
				 		dd->caps.has_gcm = 1;
			
 
				+		dd->caps.has_xts = 1;
			
 
				 		dd->caps.max_burst_size = 4;
			
 
				 		break;
			
 
				 	case 0x200:
			
@@ -2138,7 +2311,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
 
				 
			
 
				 static int atmel_aes_remove(struct platform_device *pdev)
			
 
				 {
			
 
				-	static struct atmel_aes_dev *aes_dd;
			
 
				+	struct atmel_aes_dev *aes_dd;
			
 
				 
			
 
				 	aes_dd = platform_get_drvdata(pdev);
			
 
				 	if (!aes_dd)
			
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -74,7 +74,7 @@ config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
 
				 
			
 
				 config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
			
 
				 	tristate "Register algorithm implementations with the Crypto API"
			
 
				-	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
			
 
				+	depends on CRYPTO_DEV_FSL_CAAM_JR
			
 
				 	default y
			
 
				 	select CRYPTO_AEAD
			
 
				 	select CRYPTO_AUTHENC
			
@@ -89,7 +89,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 
				 
			
 
				 config CRYPTO_DEV_FSL_CAAM_AHASH_API
			
 
				 	tristate "Register hash algorithm implementations with Crypto API"
			
 
				-	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
			
 
				+	depends on CRYPTO_DEV_FSL_CAAM_JR
			
 
				 	default y
			
 
				 	select CRYPTO_HASH
			
 
				 	help
			
@@ -101,7 +101,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
 
				 
			
 
				 config CRYPTO_DEV_FSL_CAAM_PKC_API
			
 
				         tristate "Register public key cryptography implementations with Crypto API"
			
 
				-        depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
			
 
				+        depends on CRYPTO_DEV_FSL_CAAM_JR
			
 
				         default y
			
 
				         select CRYPTO_RSA
			
 
				         help
			
@@ -113,7 +113,7 @@ config CRYPTO_DEV_FSL_CAAM_PKC_API
 
				 
			
 
				 config CRYPTO_DEV_FSL_CAAM_RNG_API
			
 
				 	tristate "Register caam device for hwrng API"
			
 
				-	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
			
 
				+	depends on CRYPTO_DEV_FSL_CAAM_JR
			
 
				 	default y
			
 
				 	select CRYPTO_RNG
			
 
				 	select HW_RANDOM
			
@@ -134,3 +134,6 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG
 
				 	help
			
 
				 	  Selecting this will enable printing of various debug
			
 
				 	  information in the CAAM driver.
			
 
				+
			
 
				+config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
			
 
				+	def_tristate CRYPTO_DEV_FSL_CAAM_CRYPTO_API